Changeset 249

Show
Ignore:
Timestamp:
08/25/05 01:26:37 (3 years ago)
Author:
zool
Message:

oh, there are more updates here than i recalled :/

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • bbox/bbox/__init__.py

    Revision 231 Revision 249
    1# bbox - an RSS / RDF aggregator 1# bbox - an RSS / RDF aggregator 
    2# Jo Walsh - Dec 2004 - Mar 2005 2# Jo Walsh - Dec 2004 - Mar 2005 
    3 3 
    4# This code owes heavily to the approach and source in Edd Dumbill's  4# This code owes heavily to the approach and source in Edd Dumbill's  
    5# IBM Developerworks article on aggregating RSS with contexts: 5# IBM Developerworks article on aggregating RSS with contexts: 
    6# http://www-106.ibm.com/developerworks/xml/library/x-rdfprov.html 6# http://www-106.ibm.com/developerworks/xml/library/x-rdfprov.html 
    7 7 
    8# It uses Mark Pilgrim's feedparser, at http://feedparser.org/ 8# It uses Mark Pilgrim's feedparser, at http://feedparser.org/ 
    9# This software has 2000 tests. The code is included in this package. 9# This software has 2000 tests. The code is included in this package. 
    10 10 
    11# It uses the 'rdfobj', an object interface to the python interface 11# It uses the 'rdfobj', an object interface to the python interface 
    12# to the redland rdf toolkit. This is also included. 12# to the redland rdf toolkit. This is also included. 
    13# redland is at http://www.redland.opensource.ac.uk/ 13# redland is at http://www.redland.opensource.ac.uk/ 
    14  14  
    15import feedparser 15import feedparser 
    16import time, datetime 16import time, datetime 
    17import rdfobj 17import rdfobj 
    18import RDF 18import RDF 
    19from bbox.politehttp import polite_request 19from bbox.politehttp import polite_request 
    20import bbox.spatialStore 20import bbox.spatialStore 
    21import bbox.config 21import bbox.config 
    22import os 22import os 
    23from warnings import warn 23from warnings import warn 
    24 24 
    25class BBox: 25class BBox: 
    26 26 
    27    def __init__(self,spatial=None,verbose=None,always_visit=None,db=None): 27    def __init__(self,spatial=None,verbose=None,always_visit=None,db=None): 
    28     28     
    29        """We initialise a bbox by passing a database to it. If wishing to use an optional spatial index, set spatial = [name of index or database]. setting verbose to a true value turns on BBox's stream of consciousness.""" 29        """We initialise a bbox by passing a database to it. If wishing to use an optional spatial index, set spatial = [name of index or database]. setting verbose to a true value turns on BBox's stream of consciousness.""" 
    30        if spatial is not None: 30        if spatial is not None: 
    31            self.spatialStore = bbox.spatialStore.SpatialStore(database=spatial) 31            self.spatialStore = bbox.spatialStore.SpatialStore(database=spatial) 
    32        else: 32        else: 
    33            self.spatialStore = None 33            self.spatialStore = None 
    34 34 
    35        self._verbose = verbose 35        self._verbose = verbose 
    36        self._visit_true = always_visit 36        self._visit_true = always_visit 
    37         37         
    38        os.chdir(bbox.config.store) 38        os.chdir(bbox.config.store) 
    39        if db is None: 39        if db is None: 
    40            db = bbox.config.db 40            db = bbox.config.db 
    41        self.model = rdfobj.Model(db,db='hash') 41        self.model = rdfobj.Model(db,db='hash') 
    42        self.model.load(bbox.config.boot) 42        self.model.load(bbox.config.boot) 
    43        from rdfobj import fbox 43        from rdfobj import fbox 
    44        fbox = rdfobj.fbox 44        fbox = rdfobj.fbox 
    45        counter = self.model.fetch(fbox.Visit_Count) 45        counter = self.model.fetch(fbox.Visit_Count) 
    46        c = counter[fbox.count] 46        c = counter[fbox.count] 
    47        if c is None: c = 0 47        if c is None: c = 0 
    48        c = int(str(c))+1 48        c = int(str(c))+1 
    49        counter[fbox.count] = str(c) 49        counter[fbox.count] = str(c) 
    50         50         
    51        if counter is None: 51        if counter is None: 
    52            v = self.model.create(fbox.Visit_Count, uri=fbox.Visit_Count) 52            v = self.model.create(fbox.Visit_Count, uri=fbox.Visit_Count) 
    53            v[fbox.count] = 0 53            v[fbox.count] = 0 
    54             54             
    55 55 
    56    def mention(self,thought): 56    def mention(self,thought): 
    57        """If BBox is constructed with verbose=1, prints to STDOUT (currently) a record of what it's up to.""" 57        """If BBox is constructed with verbose=1, prints to STDOUT (currently) a record of what it's up to.""" 
    58        if self._verbose: 58        if self._verbose: 
    59            print(thought) 59            print(thought) 
    60             60             
    61    def read_subscriptions(self): 61    def read_subscriptions(self): 
    62        """read_subscriptions() picks up the latest RSS feed updates. """ 62        """read_subscriptions() picks up the latest RSS feed updates. """ 
    63        self.mention("checking subscriptions.")  63        self.mention("checking subscriptions.")  
    64        subs = self.subscriptions() 64        subs = self.subscriptions() 
    65        from rdfobj import fbox 65        from rdfobj import fbox 
    66        fbox = rdfobj.fbox 66        fbox = rdfobj.fbox 
    67        for s in subs: 67        for s in subs: 
    68            self.mention("reading "+str(s[fbox.channel])) 68            self.mention("reading "+str(s[fbox.channel])) 
    69            format = s[fbox.format].uri() 69            format = s[fbox.format].uri() 
    70            c = s[fbox.channel] 70            c = s[fbox.channel] 
    71 71 
    72            # see if we're actually due a visit 72            # see if we're actually due a visit 
    73            due = self.visit_scheduled(s) 73            due = self.visit_scheduled(s) 
    74            if due is None:  74            if due is None:  
    75                print "nothing due to look at!" 75                print "nothing due to look at!" 
    76                subs.next() 76                subs.next() 
    77            else: 77            else: 
    78                if format == fbox.rss: 78                if format == fbox.rss: 
    79                    self.read_rss(c.uri(),subscription=s) 79                    self.read_rss(c.uri(),subscription=s) 
    80                elif format == fbox.rdf: 80                elif format == fbox.rdf: 
    81                    self.read_rdf(s[fbox.channel].uri(),subscription=s) 81                    self.read_rdf(s[fbox.channel].uri(),subscription=s) 
    82             82             
    83    def read_rss(self,uri,context=None,subscription=None): 83    def read_rss(self,uri,context=None,subscription=None,xml=None): 
    84        """Read updates from an RSS feed.""" 84        """Read updates from an RSS feed.""" 
    85 85 
    86        #if subscription is None: subscription = {} 86        #if subscription is None: subscription = {} 
    87        rss = rdfobj.rss 87        rss = rdfobj.rss 
    88        dc = rdfobj.dc 88        dc = rdfobj.dc 
    89        ical = rdfobj.ical 89        ical = rdfobj.ical 
    90        fbox = rdfobj.fbox 90        fbox = rdfobj.fbox 
    91        geo = rdfobj.geo 91        geo = rdfobj.geo 
      92         
    92        result = self.politely_get_uri(uri,subscription=subscription)  93        result = self.politely_get_uri(uri,subscription=subscription)  
    93 94 
    94        channel = self.model.fetch(uri)          95        channel = self.model.fetch(uri)          
    95        """If we got a feed object back from the request, then create a 96        """If we got a feed object back from the request, then create a 
    96        context for this visit to the feed, and store the entries that we 97        context for this visit to the feed, and store the entries that we 
    97        collected from it.""" 98        collected from it.""" 
    98        if self._visit_true: 99        if self._visit_true: 
    99            pass  100            pass  
    100        elif result['status'] != 200:  101        elif result['status'] != 200:  
    101            return 102            return 
    102        warn("PARSING")   
    103        feed = feedparser.parse(result['data']) 103        feed = feedparser.parse(result['data']) 
    104        for k in feed.keys():    
    105            print k   
    106            print feed[k]   
    107        if feed.has_key('feed'): 104        if feed.has_key('feed'): 
    108            context = self.visit(uri) 105            context = self.visit(uri) 
    109            # existence of exact duplicates? 106            # existence of exact duplicates? 
    110             107             
    111            for e in feed.entries: 108            for e in feed.entries: 
    112                for k in e.keys(): print k   
    113                link = str(e.link) 109                link = str(e.link) 
    114                title = None 110                title = None 
    115                item = self.model.create( rss.item, uri=link, context = context ) 111                item = self.model.create( rss.item, uri=link, context = context ) 
    116                 112                 
    117                if e.has_key('summary'): item[rss.description] = str(e.summary) 113                if e.has_key('summary'): item[rss.description] = str(e.summary) 
    118 114 
    119                if e.has_key('content'): item[rss.description] = str(e.content) 115                if e.has_key('content'): item[rss.description] = str(e.content) 
    120 116 
    121                if e.has_key('title'): 117                if e.has_key('title'): 
    122                    item[rss.title] = str(e.title) 118                    item[rss.title] = str(e.title) 
    123                    title = str(e.title) 119                    title = str(e.title) 
    124                     120                     
    125                item[fbox.channel] = channel 121                item[fbox.channel] = channel 
    126                 122                 
    127                # d.entries[0].modified_parsed is common 123                # d.entries[0].modified_parsed is common 
    128                 124                 
    129                time_tuple = None 125                time_tuple = None 
    130                if e.has_key('modified_parsed'): 126                if e.has_key('modified_parsed'): 
    131                    time_tuple = e.modified_parsed 127                    time_tuple = e.modified_parsed 
    132                elif e.has_key('created_parsed'): 128                elif e.has_key('created_parsed'): 
    133                    time_tuple = e.created_parsed 129                    time_tuple = e.created_parsed 
    134                 130                 
    135                # item[ical.datetime] = some process with time_tuple and strftime 131                # item[ical.datetime] = some process with time_tuple and strftime 
    136                # d = datetime.datetime(time_tuple) 132                # d = datetime.datetime(time_tuple) 
    137                # ical_date = ical_datetime.datetime_to_string(d) 133                # ical_date = ical_datetime.datetime_to_string(d) 
    138                # print ical_date 134                # print ical_date 
    139                # item[ical.datetime] = ical_date 135                # item[ical.datetime] = ical_date 
    140                # not much use without a timestamp 136                # not much use without a timestamp 
    141         137         
    142                if time_tuple is None: 138                if time_tuple is None: 
    143                    continue 139                    continue 
    144                ical_enough = time.strftime("%Y%m%dT%H%M%SZ",time_tuple) 140                ical_enough = time.strftime("%Y%m%dT%H%M%SZ",time_tuple) 
    145                item[ical.datetime] = ical_enough 141                item[ical.datetime] = ical_enough 
    146 142 
    147                rdf_type = None 143                rdf_type = None 
    148                if e.has_key('rdf_type'): 144                if e.has_key('rdf_type'): 
    149                    rdf_type = str(e['rdf_type']) 145                    rdf_type = str(e['rdf_type']) 
    150                    item[rdf.type] = rdf_type 146                    item[rdf.type] = rdf_type 
    151 147 
    152                if e.has_key('geo_lat'): print "LATT!!! "+str(e['geo_lat']) 148                if e.has_key('geo_lat'): print "LATT!!! "+str(e['geo_lat']) 
    153                if e.has_key('geo_lat') and e.has_key('geo_long'): 149                if e.has_key('geo_lat') and e.has_key('geo_long'): 
    154                    lat = str(e['geo_lat']) 150                    lat = str(e['geo_lat']) 
    155                    long = str(e['geo_long']) 151                    long = str(e['geo_long']) 
    156                    item[geo.lat] = lat 152                    item[geo.lat] = lat 
    157                    item[geo.long] = long 153                    item[geo.long] = long 
    158                     154                     
    159                    """Update the spatial index, if we have one.""" 155                    """Update the spatial index, if we have one.""" 
    160                    if self.spatialStore is not None: 156                    if self.spatialStore is not None: 
    161                        self.spatialStore.add_or_update_geom(rdf_type=rdf_type,name=title,x=long,y=lat,uri=link) 157                        self.spatialStore.add_or_update_geom(rdf_type=rdf_type,name=title,x=long,y=lat,uri=link) 
    162                             158                             
    163    def read_rdf(self,uri,subscription=None):  159    def read_rdf(self,uri,subscription=None,xml=None):         
    164        """Read updates from an RDF url.""" 160        """Read updates from an RDF url.""" 
    165        from rdfobj import geo, dc, rdf 161        from rdfobj import geo, dc, rdf 
    166        geo = rdfobj.geo 162        geo = rdfobj.geo 
    167        dc = rdfobj.dc 163        dc = rdfobj.dc 
    168        rdf = rdfobj.rdf 164        rdf = rdfobj.rdf 
    169         165         
    170        result = self.politely_get_uri(uri,subscription=subscription) 166        result = self.politely_get_uri(uri,subscription=subscription) 
    171        if self._visit_true: 167        if self._visit_true: 
    172            pass 168            pass 
    173        elif result['status'] != 200: 169        elif result['status'] != 200: 
    174            return 170            return 
    175         171         
    176        context = self.visit(uri) 172        context = self.visit(uri) 
    177         173         
    178        # we can't just use load() because we want the visit context, and to search for spatial things and index them while we're parsing... 174        # we can't just use load() because we want the visit context, and to search for spatial things and index them while we're parsing... 
    179        lats = {} 175        lats = {} 
    180        longs = {} 176        longs = {} 
    181        titles = {} 177        titles = {} 
    182        types = {} 178        types = {} 
    183        warn("parsing")    
    184        parser = RDF.Parser('raptor') 179        parser = RDF.Parser('raptor') 
    185        stream = parser.parse_as_stream(RDF.Uri(uri)) 180        stream = parser.parse_as_stream(RDF.Uri(uri)) 
    186         181         
    187        if stream: 182        if stream: 
    188            while not stream.end(): 183            while not stream.end(): 
    189                statement = stream.current() 184                statement = stream.current() 
    190                #warn(statement)   
    191                self.model.model.add_statement(statement,context) 185                self.model.model.add_statement(statement,context) 
    192                   
    193                # pls don't blame me, i just want to get something working fast 186                # pls don't blame me, i just want to get something working fast 
    194                if self.spatialStore is not None: 187                if self.spatialStore is not None: 
    195                    if statement.predicate == RDF.uri(str(geo.lat)): 188                    if statement.predicate == RDF.Node(uri_string=str(geo.lat)): 
    196                        lats[str(statement.subject)] = str(statement.object) 189                        lats[str(statement.subject)] = str(statement.object) 
    197                         190                         
    198                    elif statement.predicate == RDF.Uri(str(geo.long)): 191                    elif statement.predicate == RDF.Node(uri_string=str(geo.long)): 
    199                        longs[str(statement.subject)] = str(statement.object) 192                        longs[str(statement.subject)] = str(statement.object) 
    200         193         
    201                    elif statement.predicate == RDF.Uri(str(dc.title)): 194                    elif statement.predicate == RDF.Node(uri_string=str(dc.title)): 
    202                        titles[str(statement.subject)] = str(statement.object) 195                        titles[str(statement.subject)] = str(statement.object) 
    203                         196                         
    204                    elif statement.predicate == RDF.Uri(str(rdf.type)): 197                    elif statement.predicate == RDF.Node(uri_string=str(rdf.type)): 
    205                        types[str(statement.subject)] = str(statement.object) 198                        types[str(statement.subject)] = str(statement.object) 
    206            stream.next() 199                        
    207 200                stream.next() 
    208        warn("store") 201 
    209        if self.spatialStore is not None: 202        if self.spatialStore is not None: 
    210            for k in lats.keys(): 203            for k in lats.keys(): 
    211                lat = lats[k] 204                lat = lats[k] 
    212                long = longs[k] 205                long = longs[k] 
    213                title = None 206                title = None 
    214                type = None 207                type = None 
    215                if titles.has_key(k): 208                if titles.has_key(k): 
    216                    title = titles[k] 209                    title = titles[k] 
    217                if types.has_key(k): 210                if types.has_key(k): 
    218                    type = types[k] 211                    type = types[k] 
    219                self.spatialStore.add_or_update_geom(rdf_type=type,name=title,x=long,y=lat)  212                 warn("updating "+k) 
       213                 self.spatialStore.add_or_update_geom(uri=k,rdf_type=type,name=title,x=long,y=lat) 
    220         214         
    221    def politely_get_uri(self,uri,subscription=None): 215    def politely_get_uri(self,uri,subscription=None): 
    222        """Request a copy of the document at a url, first checking that it has changed since what we record as last-modified and the last etag that we have for it.""" 216        """Request a copy of the document at a url, first checking that it has changed since what we record as last-modified and the last etag that we have for it.""" 
    223         217         
    224        # we should deal with etag/last-mod politely here too @@TODO 218        # we should deal with etag/last-mod politely here too @@TODO 
    225        #visit = self.visit(uri) 219        #visit = self.visit(uri) 
    226        result = None 220        result = None 
    227        fbox = rdfobj.fbox 221        fbox = rdfobj.fbox 
    228 222 
    229        if subscription is None: 223        if subscription is None: 
    230            # we might just be using the parser without the context management 224            # we might just be using the parser without the context management 
    231            result = polite_request(str(uri)) 225            result = polite_request(str(uri)) 
    232            subscription = {} 226            subscription = {} 
    233             227             
    234        elif self._visit_true is not None: 228        elif self._visit_true is not None: 
    235            # we might always want to read the feed content (for debugging reasons) 229            # we might always want to read the feed content (for debugging reasons) 
    236            result = polite_request(str(uri))       230            result = polite_request(str(uri))       
    237             231             
    238        elif subscription[fbox.last_etag] is not None: 232        elif subscription[fbox.last_etag] is not None: 
    239            result = polite_request(str(uri),etag=str(subscription[fbox.last_etag])) 233            result = polite_request(str(uri),etag=str(subscription[fbox.last_etag])) 
    240        elif subscription[fbox.last_modified] is not None: 234        elif subscription[fbox.last_modified] is not None: 
    241            result = polite_request(str(uri),last_modified=str(subscription[fbox.last_modified])) 235            result = polite_request(str(uri),last_modified=str(subscription[fbox.last_modified])) 
    242        else: result = polite_request(str(uri)) 236        else: result = polite_request(str(uri)) 
    243        if result is None: 237        if result is None: 
    244            result = {'status':404} 238            result = {'status':404} 
    245            return result 239            return result 
    246 240 
    247        if result.has_key('status'): 241        if result.has_key('status'): 
    248            # this was a HTTP request 242            # this was a HTTP request 
    249            self.mention("received response: "+str(result['status'])) 243            self.mention("received response: "+str(result['status'])) 
    250            244            
    251            """Take actions about other kinds of HTTP statuses.(TODO)""" 245            """Take actions about other kinds of HTTP statuses.(TODO)""" 
    252            # handling different HTTP statuses. 246            # handling different HTTP statuses. 
    253         247         
    254            subscription[fbox.http_status] = str(result['status']) 248            subscription[fbox.http_status] = str(result['status']) 
    255            subscription[fbox.last_etag] = result['etag'] 249            subscription[fbox.last_etag] = result['etag'] 
    256            subscription[fbox.last_modified] = result['lastmodified']    250            subscription[fbox.last_modified] = result['lastmodified']    
    257            subscription[fbox.last_visited] = time.strftime("%Y%m%dT%H%M%SZ")  251            subscription[fbox.last_visited] = time.strftime("%Y%m%dT%H%M%SZ")  
    258         252         
    259        # a 'file:/' uri will only have result['data'] 253        # a 'file:/' uri will only have result['data'] 
    260        elif result['data'] is not None: 254        elif result['data'] is not None: 
    261            # pretend we have a positive HTTP status 255            # pretend we have a positive HTTP status 
    262            result['status'] = 200 256            result['status'] = 200 
    263        warn(str(result['data']))   
    264 257 
    265        return result 258        return result 
    266 259 
    267    def subscriptions(self): 260    def subscriptions(self): 
    268        """Returns a list (Iterator type) of the URLs at which 261        """Returns a list (Iterator type) of the URLs at which 
    269        there is a feed that we are subscribed to (fbox:Feed type)""" 262        there is a feed that we are subscribed to (fbox:Feed type)""" 
    270        from rdfobj import fbox, rdf 263        from rdfobj import fbox, rdf 
    271        fbox = rdfobj.fbox 264        fbox = rdfobj.fbox 
    272        rdf = rdfobj.rdf 265        rdf = rdfobj.rdf 
    273        subs = self.model.search(rdf.type,fbox.Feed) 266        subs = self.model.search(rdf.type,fbox.Feed) 
    274        return subs 267        return subs 
    275 268 
    276    def subscription(self,uri): 269    def subscription(self,uri): 
    277        """Given a uri, returns the rdfobj which is the subscription it represents.""" 270        """Given a uri, returns the rdfobj which is the subscription it represents.""" 
    278        obj = self.model.fetch(uri) 271        obj = self.model.fetch(uri) 
    279        return obj 272        return obj 
    280 273 
    281    def items(self,uri,since=None,until=None): 274    def items(self,uri,since=None,until=None): 
    282        """Get items from a feed, optionally filtering by date. (not completely implemented)""" 275        """Get items from a feed, optionally filtering by date. (not completely implemented)""" 
    283        from rdfobj import fbox, dc, rss 276        from rdfobj import fbox, dc, rss 
    284        rss = fbox.rss 277        rss = fbox.rss 
    285        dc = fbox.dc 278        dc = fbox.dc 
    286        fbox = rdfobj.fbox 279        fbox = rdfobj.fbox 
    287        s = self.subscription(uri) 280        s = self.subscription(uri) 
    288        c = s[fbox.channel] 281        c = s[fbox.channel] 
    289        out = []  282        out = []  
    290        if since is not None: 283        if since is not None: 
    291            for i in c[rss.items]: 284            for i in c[rss.items]: 
    292                warn(i[dc.date]) 285                warn(i[dc.date]) 
    293                if i[dc.date] > since: 286                if i[dc.date] > since: 
    294                    out.append(i) 287                    out.append(i) 
    295 288 
    296        return c.rss_items 289        return c.rss_items 
    297   290   
    298    def subscribe(self,feed=None,format=None,interval=None): 291    def subscribe(self,feed=None,format=None,interval=None): 
    299        """subscribe() creates a subscription to a uri. format is either 'rss' or 'rdf'. RDF is assumed if none is specified. Interval is the maximum interval in minutes that a feed should be checked at. It sends polite HTTP requests so don't worry about setting it to a bit more often than you might need. A value in minutes - defaults to 100 minutes.""" 292        """subscribe() creates a subscription to a uri. format is either 'rss' or 'rdf'. RDF is assumed if none is specified. Interval is the maximum interval in minutes that a feed should be checked at. It sends polite HTTP requests so don't worry about setting it to a bit more often than you might need. A value in minutes - defaults to 100 minutes.""" 
    300        from rdfobj import fbox 293        from rdfobj import fbox 
    301        fbox = rdfobj.fbox       294        fbox = rdfobj.fbox       
    302        if feed is None: return 295        if feed is None: return 
    303 296 
    304        f = self.model.search(fbox.channel,feed) 297        f = self.model.search(fbox.channel,feed) 
    305        found = None 298        found = None 
    306        for n in f: 299        for n in f: 
    307            found = 1     300            found = 1     
    308        if found is not None: 301        if found is not None: 
    309            return 302            return 
    310 303 
    311        self.mention("subscribing to "+str(feed)) 304        self.mention("subscribing to "+str(feed)) 
    312 305 
    313        if format is None:  306        if format is None:  
    314            format = fbox.rdf 307            format = fbox.rdf 
    315        elif format == 'rss': 308        elif format == 'rss': 
    316            format = fbox.rss 309            format = fbox.rss 
    317        elif format == 'rdf': 310        elif format == 'rdf': 
    318            format = fbox.rdf 311            format = fbox.rdf 
    319 312 
    320        if interval is None: interval = str(100) 313        if interval is None: interval = str(100) 
    321 314 
    322        ff = self.model.create( fbox.Feed, uri=None ) 315        ff = self.model.create( fbox.Feed, uri=None ) 
    323        ff[fbox.channel] = str(feed) 316        ff[fbox.channel] = str(feed) 
    324        ff[fbox.format] = str(format) 317        ff[fbox.format] = str(format) 
    325        ff[fbox.interval] = interval 318        ff[fbox.interval] = interval 
    326 319 
    327        return ff 320        return ff 
    328 321 
    329    def update(self): 322    def update(self): 
    330        """Causes all the subscribed URLs to be visited for updates.""" 323        """Causes all the subscribed URLs to be visited for updates.""" 
    331        subs = self.subscriptions() 324        subs = self.subscriptions() 
    332        from rdfobj import fbox 325        from rdfobj import fbox 
    333        fbox = rdfobj.fbox 326        fbox = rdfobj.fbox 
    334        while not subs.end(): 327        while not subs.end(): 
    335            s = subs.current() 328            s = subs.current() 
    336            self.visit(s[fbox.channel]) 329            self.visit(s[fbox.channel]) 
    337            subs.next() 330            subs.next() 
    338 331 
    339    def visit(self,uri=None): 332    def visit(self,uri=None): 
    340        """Creates an anonymous object which records a visit that we 333        """Creates an anonymous object which records a visit that we 
    341        paid to a feed, including a counter of times visited. This object is 334        paid to a feed, including a counter of times visited. This object is 
    342        used as a Redland context for all the information collected from a feed 335        used as a Redland context for all the information collected from a feed 
    343        during this visit.""" 336        during this visit.""" 
    344        # redland had problems serialising models with bnode context uris  337        # redland had problems serialising models with bnode context uris  
    345        count = self.counter() 338        count = self.counter() 
    346        from rdfobj import fbox 339        from rdfobj import fbox 
    347        fbox = rdfobj.fbox 340        fbox = rdfobj.fbox 
    348        visit_uri = str(fbox.visit)+'/'+str(count) 341        visit_uri = str(fbox.visit)+'/'+str(count) 
    349        visit = self.model.create( fbox.Visit , visit_uri) 342        visit = self.model.create( fbox.Visit , visit_uri) 
    350 343 
    351        visit[fbox.source] = uri 344        visit[fbox.source] = uri 
    352        t = time.strftime("%Y%m%dT%H%M%SZ") 345        t = time.strftime("%Y%m%dT%H%M%SZ") 
    353        visit[fbox.timestamp] = t 346        visit[fbox.timestamp] = t 
    354        return RDF.Node(RDF.Uri(str(visit.uri()))) 347        return RDF.Node(RDF.Uri(str(visit.uri()))) 
    355 348 
    356    def user(self,token=None,nick=None,mbox=None): 349    def user(self,token=None,nick=None,mbox=None): 
    357        """Passed either a user's login token, mbox and name, resolved to mutual exclusion in that order, and returns any corresponding user / foaf:Person object. No security - handle this yourself elsewhere!""" 350        """Passed either a user's login token, mbox and name, resolved to mutual exclusion in that order, and returns any corresponding user / foaf:Person object. No security - handle this yourself elsewhere!""" 
    358        from rdfobj import foaf 351        from rdfobj import foaf 
    359        foaf = rdfobj.foaf 352        foaf = rdfobj.foaf 
    360        if token is not None: 353        if token is not None: 
    361            users = self.model.search(foaf.auth_token,token) 354            users = self.model.search(foaf.auth_token,token) 
    362            for u in users: 355            for u in users: 
    363                return u[foaf.alias] 356                return u[foaf.alias] 
    364        if mbox is not None: 357        if mbox is not None: 
    365            users = self.model.search(foaf.mbox,mbox) 358            users = self.model.search(foaf.mbox,mbox) 
    366            for u in users: 359            for u in users: 
    367                return u 360                return u 
    368        if nick is not None: 361        if nick is not None: 
    369            o = [] 362            o = [] 
    370            users = self.model.search(foaf.name,nick) 363            users = self.model.search(foaf.name,nick) 
    371            for u in users: 364            for u in users: 
    372                o.append(u) 365                o.append(u) 
    373            users = self.model.search(foaf.givenName,nick) 366            users = self.model.search(foaf.givenName,nick) 
    374            for u in users: o.append(u) 367            for u in users: o.append(u) 
    375            return o 368            return o 
    376 369 
    377    def add_user(self,nick=None,mbox=None,password=None): 370    def add_user(self,nick=None,mbox=None,password=None): 
    378        """ Create a new user foaf:Person""" 371        """ Create a new user foaf:Person""" 
    379        store = self.store 372        store = self.store 
    380        from rdfobj import foaf, wlan 373        from rdfobj import foaf, wlan 
    381        foaf = rdfobj.foaf 374        foaf = rdfobj.foaf 
    382        wlan = rdfobj.wlan 375        wlan = rdfobj.wlan 
    383 376 
    384        obj = self.model.create(foaf.Person,uri=uri) 377        obj = self.model.create(foaf.Person,uri=uri) 
    385        obj[foaf.mbox] = mbox 378        obj[foaf.mbox] = mbox 
    386        obj[foaf.nick] = nick 379        obj[foaf.nick] = nick 
    387        if page is not None: obj[foaf.homepage] = page 380        if page is not None: obj[foaf.homepage] = page 
    388        self.obj = obj 381        self.obj = obj 
    389 382 
    390        """ Create a kind of shadow user where we store the password and the logged-in token, so they won't get serialised accidentally along with the user. """ 383        """ Create a kind of shadow user where we store the password and the logged-in token, so they won't get serialised accidentally along with the user. """ 
    391 384 
    392        auth = store.create(foaf.AuthedPerson) 385        auth = store.create(foaf.AuthedPerson) 
    393        auth[foaf.password] = password 386        auth[foaf.password] = password 
    394        auth[foaf.nick] = nick 387        auth[foaf.nick] = nick 
    395        auth[foaf.alias] = obj 388        auth[foaf.alias] = obj 
    396 389 
    397        token = self.auth_token() 390        token = self.auth_token() 
    398        auth[foaf.auth_token] = token 391        auth[foaf.auth_token] = token 
    399        self.model.sync() 392        self.model.sync() 
    400        return token 393        return token 
    401 394 
    402    def auth_token(self): 395    def auth_token(self): 
    403        """Generate a random auth token.""" 396        """Generate a random auth token.""" 
    404        x = '' 397        x = '' 
    405        for  n in range(0, 6): 398        for  n in range(0, 6): 
    406            x = x + chr(65 + random.randint(0, 26)) 399            x = x + chr(65 + random.randint(0, 26)) 
    407        return x         400        return x         
    408         401         
    409    def visit_scheduled(self,sub): 402    def visit_scheduled(self,sub): 
    410        """Compare the last visited time, if that's applicable, to the interval between events (rather than a schedule? perhaps we'll have to re-think this later."""    403        """Compare the last visited time, if that's applicable, to the interval between events (rather than a schedule? perhaps we'll have to re-think this later."""    
    411        if self._visit_true is not None: 404        if self._visit_true is not None: 
    412            return 1 405            return 1 
    413        from rdfobj import fbox 406        from rdfobj import fbox 
    414        fbox = rdfobj.fbox 407        fbox = rdfobj.fbox 
    415        last = sub[fbox.last_visited] 408        last = sub[fbox.last_visited] 
    416        if last is None: 409        if last is None: 
    417            return 1 410            return 1 
    418        t = time.time() 411        t = time.time() 
    419        # convert last time simply from ical to epoch? 412        # convert last time simply from ical to epoch? 
    420 413 
    421        return 1 414        return 1 
    422        since = t - float(str(last)) 415        since = t - float(str(last)) 
    423         416         
    424        interval = sub[fbox.interval] 417        interval = sub[fbox.interval] 
    425        if interval is None: 418        if interval is None: 
    426            sub[fbox.interval] = str(100) 419            sub[fbox.interval] = str(100) 
    427            interval = sub[fbox.interval] 420            interval = sub[fbox.interval] 
    428        secs = int(str(interval))*60 421        secs = int(str(interval))*60 
    429        if since >= secs: 422        if since >= secs: 
    430            return 1 423            return 1 
    431        return None 424        return None 
    432                 425                 
    433    def counter(self): 426    def counter(self): 
    434        """Update the counter that's used to generate visit context URIs.""" 427        """Update the counter that's used to generate visit context URIs.""" 
    435        from rdfobj import fbox 428        from rdfobj import fbox 
    436        fbox = rdfobj.fbox 429        fbox = rdfobj.fbox 
    437        counter = self.model.fetch(fbox.Visit_Count) 430        counter = self.model.fetch(fbox.Visit_Count) 
    438        c = counter[fbox.count] 431        c = counter[fbox.count] 
    439        c = int(str(c))+1 432        c = int(str(c))+1 
    440        counter[fbox.count] = str(c) 433        counter[fbox.count] = str(c) 
    441        return c 434        return c 
    442 435 
    443 436 
    444if __name__ == "__main__": 437if __name__ == "__main__": 
    445    bbox = BBox(visit_true = 1) 438    bbox = BBox(visit_true = 1) 
    446    bbox.subscribe(feed='http://frot.org/wirelesslondon/bbox.rdf',format=fbox.rss) 439    bbox.subscribe(feed='http://frot.org/wirelesslondon/bbox.rdf',format=fbox.rss) 
    447    bbox.subscribe(feed='http://frot.org/devlog/index.rss',format=fbox.rss) 440    bbox.subscribe(feed='http://frot.org/devlog/index.rss',format=fbox.rss) 
    448    bbox.subscribe(feed='http://zooleika.org.uk/bio/foaf.rdf',format=fbox.rdf) 441    bbox.subscribe(feed='http://zooleika.org.uk/bio/foaf.rdf',format=fbox.rdf) 
    449    bbox.read_subscriptions() 442    bbox.read_subscriptions() 
  • bbox/store/boot.rdf

    Revision 214 Revision 249
    1<?xml version="1.0" encoding="utf-8"?> 1 
    2<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" 2<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" 
    3        xmlns:fbox="http://frot.org/2005/bbox/"> 3        xmlns:fbox="http://frot.org/2005/bbox/"> 
    4  <rdf:Description rdf:about="http://xmlns.com/foaf/0.1/"> 4  <rdf:Description rdf:about="http://xmlns.com/foaf/0.1/"> 
    5    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">foaf</ns0:qname> 5    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">foaf</ns0:qname> 
    6  </rdf:Description> 6  </rdf:Description> 
    7 7 
    8  <rdf:Description rdf:about="http://xmlns.com/wordnet/1.6/"> 8  <rdf:Description rdf:about="http://xmlns.com/wordnet/1.6/"> 
    9    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">wn</ns0:qname> 9    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">wn</ns0:qname> 
    10  </rdf:Description> 10  </rdf:Description> 
    11 11 
    12  <rdf:Description rdf:about="http://www.w3.org/2002/07/owl#"> 12  <rdf:Description rdf:about="http://www.w3.org/2002/07/owl#"> 
    13    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">owl</ns0:qname> 13    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">owl</ns0:qname> 
    14  </rdf:Description> 14  </rdf:Description> 
    15 15 
    16  <rdf:Description rdf:about="http://purl.org/dc/elements/1.1/"> 16  <rdf:Description rdf:about="http://purl.org/dc/elements/1.1/"> 
    17    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">dc</ns0:qname> 17    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">dc</ns0:qname> 
    18  </rdf:Description> 18  </rdf:Description> 
    19 19 
    20  <rdf:Description rdf:about="http://www.w3.org/2002/12/cal/ical#"> 20  <rdf:Description rdf:about="http://www.w3.org/2002/12/cal/ical#"> 
    21    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">ical</ns0:qname> 21    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">ical</ns0:qname> 
    22  </rdf:Description> 22  </rdf:Description> 
    23 23 
    24  <rdf:Description rdf:about="http://www.w3.org/2000/01/rdf-schema#"> 24  <rdf:Description rdf:about="http://www.w3.org/2000/01/rdf-schema#"> 
    25    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">rdfs</ns0:qname> 25    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">rdfs</ns0:qname> 
    26  </rdf:Description> 26  </rdf:Description> 
    27 27 
    28  <rdf:Description rdf:about="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> 28  <rdf:Description rdf:about="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> 
    29    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">rdf</ns0:qname> 29    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">rdf</ns0:qname> 
    30  </rdf:Description> 30  </rdf:Description> 
    31 31 
    32  <rdf:Description rdf:about="http://www.w3.org/2000/xmlns/"> 32  <rdf:Description rdf:about="http://www.w3.org/2000/xmlns/"> 
    33    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">xmlns</ns0:qname> 33    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">xmlns</ns0:qname> 
    34  </rdf:Description> 34  </rdf:Description> 
    35 35 
    36  <rdf:Description rdf:about="http://www.w3.org/2003/01/geo/wgs84_pos#"> 36  <rdf:Description rdf:about="http://www.w3.org/2003/01/geo/wgs84_pos#"> 
    37    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">geo</ns0:qname> 37    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">geo</ns0:qname> 
    38  </rdf:Description> 38  </rdf:Description> 
    39 39 
    40  <rdf:Description rdf:about="http://frot.org/2005/bbox/"> 40  <rdf:Description rdf:about="http://frot.org/2005/bbox/"> 
    41    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">fbox</ns0:qname> 41    <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">fbox</ns0:qname> 
    42  </rdf:Description> 42  </rdf:Description> 
    43 43 
    44  <rdf:Description rdf:about="http://purl.org/rss/1.0/"> 44  <rdf:Description rdf:about="http://purl.org/rss/1.0/"> 
    45     <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">rss</ns0:qname>        45     <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">rss</ns0:qname>        
    46  </rdf:Description>    46  </rdf:Description>    
    47           47           
      48   <rdf:Description rdf:about="http://xmlns.com/2003/wireless/"> 
      49        <ns0:qname xmlns:ns0="http://www.w3.org/2000/xmlns/">wlan</ns0:qname> 
      50          </rdf:Description> 
      51 
    48 52 
    49</rdf:RDF> 53</rdf:RDF> 
  • bbox/t/01_parse.t

    Revision 226 Revision 249
    1import unittest 1import unittest 
    2import feedparser 2import feedparser 
    3from bbox import BBox 3from bbox import BBox 
    4 4 
    5class FeedTestCase(unittest.TestCase): 5class FeedTestCase(unittest.TestCase): 
    6    def __init__(self,*args): 6    def __init__(self,*args): 
    7        unittest.TestCase.__init__(self, *args) 7        unittest.TestCase.__init__(self, *args) 
    8 8 
    9    def test_01_parse_rss(self): 9    def test_01_parse_rss(self): 
    10        fh = open('t/geo.rss') 10        fh = open('t/geo.rss') 
    11        xml = fh.read() 11        xml = fh.read() 
    12        feed = feedparser.parse(xml) 12        feed = feedparser.parse(xml) 
    13        for e in feed.entries: 13        for e in feed.entries: 
    14            for k in e.keys(): 14            for k in e.keys(): 
    15                print e[k] 15                print e[k] 
    16 16 
    17    def test_02_bbox_parse_rss(self): 17    def test_02_bbox_parse_rss(self): 
    18        bbox = BBox() 18        bbox = BBox() 
    19        bbox.read_rss('file:t/geo.rss') 19        bbox.read_rss('file:t/geo.rss') 
    20 20 
    21    def test_03_bbox_parse_spatial_rss(self): 21    def test_03_bbox_parse_spatial_rss(self): 
    22        bbox = BBox(spatial='bbox') 22        bbox = BBox(spatial='bbox') 
    23        bbox.read_rss('http://localhost/geo.rss') 23        bbox.read_rss('http://localhost/geo.rss') 
    24 24 
    25    def test_03_bbox_parse_spatial_rdf(self): 25    def test_03_bbox_parse_spatial_rdf(self): 
    26        bbox = BBox(spatial='bbox') 26        bbox = BBox(spatial='bbox') 
    27        bbox.read_rdf('http://localhost/geo.rss') 27        bbox.read_rdf('http://localhost/JoWalsh.rdf') 
    28if __name__ == '__main__': 28if __name__ == '__main__': 
    29    unittest.main() 29    unittest.main() 
    30 30 
  • bbox/t/JoWalsh.rdf

    Revision 226 Revision 249
    1<?xml version="1.0" encoding="iso-8859-1" ?> 1<?xml version="1.0" encoding="iso-8859-1" ?> 
    2<!-- generator="Drupal FOAF.Module" --> 2<!-- generator="Drupal FOAF.Module" --> 
    3<rdf:RDF xmlns="http://xmlns.com/foaf/0.1" 3<rdf:RDF xmlns="http://xmlns.com/foaf/0.1"