Changeset 417

Show
Ignore:
Timestamp:
09/30/05 20:29:51 (3 years ago)
Author:
zool
Message:

before, made multiple copies of existing statements, with context, now if not making new statements by default

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • bbox/bbox/__init__.py

    Revision 345 Revision 417
    1# bbox - an RSS / RDF aggregator 1# bbox - an RSS / RDF aggregator 
    2# Jo Walsh - Dec 2004 - Mar 2005 2# Walsh - Dec 2004 - Mar 2005 
    3 3 
    4# This code owes heavily to the approach and source in Edd Dumbill's  4# This code owes heavily to the approach and source in Edd Dumbill's  
    5# IBM Developerworks article on aggregating RSS with contexts: 5# IBM Developerworks article on aggregating RSS with contexts: 
    6# http://www-106.ibm.com/developerworks/xml/library/x-rdfprov.html 6# http://www-106.ibm.com/developerworks/xml/library/x-rdfprov.html 
    7 7 
    8# It uses Mark Pilgrim's feedparser, at http://feedparser.org/ 8# It uses Mark Pilgrim's feedparser, at http://feedparser.org/ 
    9# This software has 2000 tests. The code is included in this package. 9# This software has 2000 tests. The code is included in this package. 
    10 10 
    11# It uses the 'rdfobj', an object interface to the python interface 11# It uses the 'rdfobj', an object interface to the python interface 
    12# to the redland rdf toolkit. This is also included. 12# to the redland rdf toolkit. This is also included. 
    13# redland is at http://www.redland.opensource.ac.uk/ 13# redland is at http://www.redland.opensource.ac.uk/ 
    14  14  
    15import feedparser 15import feedparser 
    16import time, datetime 16import time, datetime 
    17import rdfobj 17import rdfobj 
    18import RDF 18import RDF 
    19from bbox.politehttp import polite_request 19from bbox.politehttp import polite_request 
    20import bbox.spatialStore 20import bbox.spatialStore 
    21import bbox.config 21import bbox.config 
    22import os 22import os 
    23from warnings import warn 23from warnings import warn 
    24 24 
    25class BBox: 25class BBox: 
    26 26 
    27    def __init__(self,spatial=None,verbose=None,always_visit=None,db=None,model=None,store=None): 27    def __init__(self,spatial=None,verbose=None,always_visit=None,db=None,model=None,store=None,textindex=None): 
    28     28     
    29        """We initialise a bbox by passing a database to it. If wishing to use an optional spatial index, set spatial = [name of index or database]. setting verbose to a true value turns on BBox's stream of consciousness.""" 29        """We initialise a bbox by passing a database to it. If wishing to use an optional spatial index, set spatial = [name of index or database]. setting verbose to a true value turns on BBox's stream of consciousness.""" 
    30        if spatial is not None: 30        if spatial is not None: 
    31            self.spatialStore = bbox.spatialStore.SpatialStore(database=spatial) 31            self.spatialStore = bbox.spatialStore.SpatialStore(database=spatial) 
    32        else: 32        else: 
    33            self.spatialStore = None 33            self.spatialStore = None 
      34        if textindex: 
      35            self.textindex = textindex 
    34 36 
    35        self._verbose = verbose 37        self._verbose = verbose 
    36        self._visit_true = always_visit 38        self._visit_true = always_visit 
    37         39         
    38        if db is None: 40        if db is None: 
    39            db = bbox.config.db 41            db = bbox.config.db 
    40        if store is None: 42        if store is None: 
    41            store = bbox.config.store 43            store = bbox.config.store 
    42 44 
    43        if model is not None: 45        if model is not None: 
    44            self.model = model 46            self.model = model 
    45        else:  47        else:  
    46            if store is not None:  48            if store is not None:  
    47                os.chdir(store)       49                os.chdir(store)       
    48            self.model = rdfobj.Model(db,db='hash') 50            self.model = rdfobj.Model(db,db='hash') 
    49            self.model.load(bbox.config.boot) 51            self.model.load(bbox.config.boot) 
    50 52 
    51        from rdfobj import fbox 53        from rdfobj import fbox 
    52        counter = self.model.fetch(fbox.Visit_Count) 54        counter = self.model.fetch(fbox.Visit_Count) 
    53        c = counter[fbox.count] 55        c = counter[fbox.count] 
    54        if c is None: c = 0 56        if c is None: c = 0 
    55        c = int(str(c))+1 57        c = int(str(c))+1 
    56        counter[fbox.count] = str(c) 58        counter[fbox.count] = str(c) 
    57         59         
    58        if counter is None: 60        if counter is None: 
    59            v = self.model.create(fbox.Visit_Count, uri=fbox.Visit_Count) 61            v = self.model.create(fbox.Visit_Count, uri=fbox.Visit_Count) 
    60            v[fbox.count] = 0 62            v[fbox.count] = 0 
    61             63             
    62 64 
    63    def mention(self,thought): 65    def mention(self,thought): 
    64        """If BBox is constructed with verbose=1, prints to STDOUT (currently) a record of what it's up to.""" 66        """If BBox is constructed with verbose=1, prints to STDOUT (currently) a record of what it's up to.""" 
    65        if self._verbose: 67        if self._verbose: 
    66            print(thought) 68            print(thought) 
    67             69             
    68    def read_subscriptions(self): 70    def read_subscriptions(self): 
    69        """read_subscriptions() picks up the latest RSS feed updates. """ 71        """read_subscriptions() picks up the latest RSS feed updates. """ 
    70        self.mention("checking subscriptions.")  72        self.mention("checking subscriptions.")  
    71        subs = self.subscriptions() 73        subs = self.subscriptions() 
    72        from rdfobj import fbox 74        from rdfobj import fbox 
    73        for s in subs: 75        for s in subs: 
    74            self.mention("reading "+str(s[fbox.channel])) 76            self.mention("reading "+str(s[fbox.channel])) 
    75            format = s[fbox.format].uri() 77            format = s[fbox.format].uri() 
    76            c = s[fbox.channel] 78            c = s[fbox.channel] 
    77 79 
    78            # see if we're actually due a visit 80            # see if we're actually due a visit 
    79            due = self.visit_scheduled(s) 81            due = self.visit_scheduled(s) 
    80            if due is None:  82            if due is None:  
    81                print "nothing due to look at!" 83                print "nothing due to look at!" 
    82                subs.next() 84                subs.next() 
    83            else: 85            else: 
    84                if format == fbox.rss: 86                if format == fbox.rss: 
    85                    self.read_rss(c.uri(),subscription=s) 87                    self.read_rss(c.uri(),subscription=s) 
    86                elif format == fbox.rdf: 88                elif format == fbox.rdf: 
    87                    self.read_rdf(s[fbox.channel].uri(),subscription=s) 89                    self.read_rdf(s[fbox.channel].uri(),subscription=s) 
    88             90             
    89    def read_rss(self,uri,context=None,subscription=None,xml=None): 91    def read_rss(self,uri,context=None,subscription=None,xml=None): 
    90        """Read updates from an RSS feed.""" 92        """Read updates from an RSS feed.""" 
    91        from rdfobj import rss, rdf, fbox, ical, dc, foaf 93        from rdfobj import rss, rdf, fbox, ical, dc, foaf 
    92        #if subscription is None: subscription = {} 94        #if subscription is None: subscription = {} 
    93           
    94        result = self.politely_get_uri(uri,subscription=subscription)  95        result = self.politely_get_uri(uri,subscription=subscription)  
    95 96 
    96        channel = self.model.fetch(uri)          97        channel = self.model.fetch(uri)          
    97        """If we got a feed object back from the request, then create a 98        """If we got a feed object back from the request, then create a 
    98        context for this visit to the feed, and store the entries that we 99        context for this visit to the feed, and store the entries that we 
    99        collected from it.""" 100        collected from it.""" 
    100        if self._visit_true: 101        if self._visit_true: 
    101            pass  102            pass  
    102        elif result['status'] != 200:  103        elif result['status'] != 200:  
    103            return 104            return [] 
    104        items = [] 105        items = [] 
    105        feed = feedparser.parse(result['data']) 106        feed = feedparser.parse(result['data']) 
    106        if feed.has_key('feed'): 107        if feed.has_key('feed'): 
    107            context = self.visit(uri) 108            context = self.visit(uri) 
    108            # existence of exact duplicates? 109            # existence of exact duplicates? 
    109             110             
    110            for e in feed.entries: 111            for e in feed.entries: 
    111                link = str(e.link) 112                link = str(e.link) 
    112                title = None 113                title = None 
    113                item = self.model.create( rss.item, uri=link, context = context ) 114                item = self.model.create( rss.item, uri=link, context = context ) 
    114                 115                 
    115                if e.has_key('summary'): item[rss.description] = str(e.summary) 116                if e.has_key('summary'): item[rss.description] = str(e.summary) 
    116 117 
    117                if e.has_key('content'): item[rss.description] = str(e.content) 118                if e.has_key('content'): item[rss.description] = str(e.content) 
    118 119 
    119                if e.has_key('title'): 120                if e.has_key('title'): 
    120                    item[rss.title] = str(e.title) 121                    item[rss.title] = str(e.title) 
    121                    title = str(e.title) 122                    title = str(e.title) 
    122                     123                     
    123                item[fbox.channel] = channel 124                item[fbox.channel] = channel 
    124                 125                 
    125                # d.entries[0].modified_parsed is common 126                # d.entries[0].modified_parsed is common 
    126                 127                 
    127                time_tuple = None 128                time_tuple = None 
    128                if e.has_key('modified_parsed'): 129                if e.has_key('modified_parsed'): 
    129                    time_tuple = e.modified_parsed 130                    time_tuple = e.modified_parsed 
    130                elif e.has_key('created_parsed'): 131                elif e.has_key('created_parsed'): 
    131                    time_tuple = e.created_parsed 132                    time_tuple = e.created_parsed 
    132                 133                 
    133                # item[ical.datetime] = some process with time_tuple and strftime 134                # item[ical.datetime] = some process with time_tuple and strftime 
    134                # d = datetime.datetime(time_tuple) 135                # d = datetime.datetime(time_tuple) 
    135                # ical_date = ical_datetime.datetime_to_string(d) 136                # ical_date = ical_datetime.datetime_to_string(d) 
    136                # print ical_date 137                # print ical_date 
    137                # item[ical.datetime] = ical_date 138                # item[ical.datetime] = ical_date 
    138                # not much use without a timestamp 139                # not much use without a timestamp 
    139         140         
    140                if time_tuple is None: 141                if time_tuple is None: 
    141                    continue 142                    continue 
    142                ical_enough = time.strftime("%Y%m%dT%H%M%SZ",time_tuple) 143                ical_enough = time.strftime("%Y%m%dT%H%M%SZ",time_tuple) 
    143                item[ical.datetime] = ical_enough 144                item[ical.datetime] = ical_enough 
    144 145 
    145                rdf_type = None 146                rdf_type = None 
    146                if e.has_key('rdf_type'): 147                if e.has_key('rdf_type'): 
    147                    rdf_type = str(e['rdf_type']) 148                    rdf_type = str(e['rdf_type']) 
    148                    item[rdf.type] = rdf_type 149                    item[rdf.type] = rdf_type 
    149 150 
    150                if e.has_key('geo_lat'): print "LATT!!! "+str(e['geo_lat']) 151                if e.has_key('geo_lat'): print "LATT!!! "+str(e['geo_lat']) 
    151                if e.has_key('geo_lat') and e.has_key('geo_long'): 152                if e.has_key('geo_lat') and e.has_key('geo_long'): 
    152                    lat = str(e['geo_lat']) 153                    lat = str(e['geo_lat']) 
    153                    long = str(e['geo_long']) 154                    long = str(e['geo_long']) 
    154                    item[geo.lat] = lat 155                    item[geo.lat] = lat 
    155                    item[geo.long] = long 156                    item[geo.long] = long 
    156                     157                     
    157                    """Update the spatial index, if we have one.""" 158                    """Update the spatial index, if we have one.""" 
    158                    if self.spatialStore is not None: 159                    if self.spatialStore is not None: 
    159                        self.spatialStore.add_or_update_geom(rdf_type=rdf_type,name=title,x=long,y=lat,uri=link) 160                        self.spatialStore.add_or_update_geom(rdf_type=rdf_type,name=title,x=long,y=lat,uri=link) 
    160                items.append(item) 161                items.append(item) 
    161        return items 162        return items 
    162                             163                             
    163    def read_rdf(self,uri,subscription=None,xml=None):   164    def read_rdf(self,uri,subscription=None,xml=None):   
    164        """Read updates from an RDF url.""" 165        """Read updates from an RDF url.""" 
    165        from rdfobj import geo, dc, rdf 166        from rdfobj import geo, dc, rdf 
    166         167         
    167        result = self.politely_get_uri(uri,subscription=subscription) 168        result = self.politely_get_uri(uri,subscription=subscription) 
    168        if self._visit_true: 169        if self._visit_true: 
    169            pass 170            pass 
    170        elif result['status'] != 200: 171        elif result['status'] != 200: 
    171            return 172            return 
    172         173         
    173        context = self.visit(uri) 174        context = self.visit(uri) 
    174         175         
    175        # we can't just use load() because we want the visit context, and to search for spatial things and index them while we're parsing... 176        # we can't just use load() because we want the visit context, and to search for spatial things and index them while we're parsing... 
    176        lats = {} 177        lats = {} 
    177        longs = {} 178        longs = {} 
    178        titles = {} 179        titles = {} 
    179        types = {} 180        types = {} 
    180        parser = RDF.Parser('raptor') 181        parser = RDF.Parser('raptor') 
    181        stream = parser.parse_as_stream(RDF.Uri(uri)) 182        stream = parser.parse_as_stream(RDF.Uri(uri)) 
    182        subjects = {}    183        subjects = {}    
    183        if stream: 184        if stream: 
    184            while not stream.end(): 185            while not stream.end(): 
    185                statement = stream.current() 186                statement = stream.current() 
    186                subjects[statement.subject] = 1 187                subjects[statement.subject] = 1 
    187                self.model.model.add_statement(statement,context)  188                 # check for statement existence 
    188                # pls don't blame me, i just want to get something working fast  189                 exists = None 
    189                if self.spatialStore is not None:  190                 existing = self.model.model.find_statements(statement) 
    190                    if statement.predicate == RDF.Node(uri_string=str(geo.lat)):  191                 for s in existing: 
    191                        lats[str(statement.subject)] = str(statement.object)  192                     exists = s 
       193                 if exists is None: 
       194                  
       195                     self.model.model.add_statement(statement,context) 
       196                         # pls don't blame me, i just want to get something working fast 
       197                     if self.spatialStore is not None: 
       198                         if statement.predicate == RDF.Node(uri_string=str(geo.lat)): 
       199                             lats[str(statement.subject)] = str(statement.object) 
    192                         200                         
    193                    elif statement.predicate == RDF.Node(uri_string=str(geo.long)): 201                       elif statement.predicate == RDF.Node(uri_string=str(geo.long)): 
    194                        longs[str(statement.subject)] = str(statement.object) 202                            longs[str(statement.subject)] = str(statement.object) 
    195         203         
    196                    elif statement.predicate == RDF.Node(uri_string=str(dc.title)): 204                       elif statement.predicate == RDF.Node(uri_string=str(dc.title)): 
    197                        titles[str(statement.subject)] = str(statement.object) 205                            titles[str(statement.subject)] = str(statement.object) 
    198                         206                         
    199                    elif statement.predicate == RDF.Node(uri_string=str(rdf.type)): 207                        elif statement.predicate == RDF.Node(uri_string=str(rdf.type)): 
    200                        types[str(statement.subject)] = str(statement.object) 208                            types[str(statement.subject)] = str(statement.object) 
    201                        209                          
    202                stream.next() 210                stream.next() 
    203        objects = [] 211        objects = [] 
    204        for s in subjects.keys(): 212        for s in subjects.keys(): 
    205            objects.append(self.model.fetch(s)) 213            objects.append(self.model.fetch(s)) 
    206             214             
    207        if self.spatialStore is not None: 215        if self.spatialStore is not None: 
    208            for k in lats.keys(): 216            for k in lats.keys(): 
    209                lat = lats[k] 217                lat = lats[k] 
    210                long = longs[k] 218                long = longs[k] 
    211                title = None 219                title = None 
    212                type = None 220                type = None 
    213                if titles.has_key(k): 221                if titles.has_key(k): 
    214                    title = titles[k] 222                    title = titles[k] 
    215                if types.has_key(k): 223                if types.has_key(k): 
    216                    type = types[k] 224                    type = types[k] 
    217                warn("updating "+k) 225                warn("updating "+k) 
    218                self.spatialStore.add_or_update_geom(uri=k,rdf_type=type,name=title,x=long,y=lat) 226                self.spatialStore.add_or_update_geom(uri=k,rdf_type=type,name=title,x=long,y=lat) 
      227        if self.textindex is not None: 
      228            warn("index") 
      229            for o in objects: 
      230                 
      231                schema = o.rdf_type 
      232                warn(str(o)) 
      233                warn(str(schema)) 
      234                if schema is not None: 
      235                    self.textindex.text_index(schema,o) 
      236         
    219        return objects 237        return objects 
    220         238         
    221    def politely_get_uri(self,uri,subscription=None): 239    def politely_get_uri(self,uri,subscription=None): 
    222        """Request a copy of the document at a url, first checking that it has changed since what we record as last-modified and the last etag that we have for it.""" 240        """Request a copy of the document at a url, first checking that it has changed since what we record as last-modified and the last etag that we have for it.""" 
    223         241         
    224        # we should deal with etag/last-mod politely here too @@TODO 242        # we should deal with etag/last-mod politely here too @@TODO 
    225        #visit = self.visit(uri) 243        #visit = self.visit(uri) 
    226        result = None 244        result = None 
    227        from rdfobj import fbox 245        from rdfobj import fbox 
    228        if subscription is None: 246        if subscription is None: 
    229            # we might just be using the parser without the context management 247            # we might just be using the parser without the context management 
    230            result = polite_request(str(uri)) 248            result = polite_request(str(uri)) 
    231            subscription = {'fake':1} 249            subscription = {'fake':1} 
    232             250             
    233        elif self._visit_true is not None: 251        elif self._visit_true is not None: 
    234            # we might always want to read the feed content (for debugging reasons) 252            # we might always want to read the feed content (for debugging reasons) 
    235            result = polite_request(str(uri))       253            result = polite_request(str(uri))       
    236             254             
    237        elif subscription[fbox.last_etag] is not None: 255        elif subscription[fbox.last_etag] is not None: 
    238            result = polite_request(str(uri),etag=str(subscription[fbox.last_etag])) 256            result = polite_request(str(uri),etag=str(subscription[fbox.last_etag])) 
    239        elif subscription[fbox.last_modified] is not None: 257        elif subscription[fbox.last_modified] is not None: 
    240            result = polite_request(str(uri),last_modified=str(subscription[fbox.last_modified])) 258            result = polite_request(str(uri),last_modified=str(subscription[fbox.last_modified])) 
    241        else: result = polite_request(str(uri)) 259        else: result = polite_request(str(uri)) 
    242        if result is None: 260        if result is None: 
    243            result = {'status':404} 261            result = {'status':404} 
    244            return result 262            return result 
    245 263 
    246        if result.has_key('status'): 264        if result.has_key('status'): 
    247            # this was a HTTP request 265            # this was a HTTP request 
    248            self.mention("received response: "+str(result['status'])) 266            self.mention("received response: "+str(result['status'])) 
    249            267            
    250            """Take actions about other kinds of HTTP statuses.(TODO)""" 268            """Take actions about other kinds of HTTP statuses.(TODO)""" 
    251            # handling different HTTP statuses. 269            # handling different HTTP statuses. 
    252             270             
    253            if subscription.has_key('fake'): 271            if subscription.has_key('fake'): 
    254                pass 272                pass 
    255            else: 273            else: 
    256                subscription[fbox.http_status] = str(result['status']) 274                subscription[fbox.http_status] = str(result['status']) 
    257                subscription[fbox.last_etag] = result['etag'] 275                subscription[fbox.last_etag] = result['etag'] 
    258                subscription[fbox.last_modified] = result['lastmodified']        276                subscription[fbox.last_modified] = result['lastmodified']        
    259                subscription[fbox.last_visited] = time.strftime("%Y%m%dT%H%M%SZ")  277                subscription[fbox.last_visited] = time.strftime("%Y%m%dT%H%M%SZ")  
    260         278         
    261        # a 'file:/' uri will only have result['data'] 279        # a 'file:/' uri will only have result['data'] 
    262        elif result['data'] is not None: 280        elif result['data'] is not None: 
    263            # pretend we have a positive HTTP status 281            # pretend we have a positive HTTP status 
    264            result['status'] = 200 282            result['status'] = 200 
    265 283 
    266        return result 284        return result 
    267 285 
    268    def subscriptions(self): 286    def subscriptions(self): 
    269        """Returns a list (Iterator type) of the URLs at which 287        """Returns a list (Iterator type) of the URLs at which 
    270        there is a feed that we are subscribed to (fbox:Feed type)""" 288        there is a feed that we are subscribed to (fbox:Feed type)""" 
    271        from rdfobj import fbox, rdf 289        from rdfobj import fbox, rdf 
    272        # workaround, as we want a list, not an Iterator which is sometimes empty 290        # workaround, as we want a list, not an Iterator which is sometimes empty 
    273        subs = self.model.search(rdf.type,fbox.Feed).list() 291        subs = self.model.search(rdf.type,fbox.Feed).list() 
    274        return subs 292        return subs 
    275 293 
    276    def subscription(self,uri): 294    def subscription(self,uri): 
    277        """Given a uri, returns the rdfobj which is the subscription it represents.""" 295        """Given a uri, returns the rdfobj which is the subscription it represents.""" 
    278        obj = self.model.fetch(uri) 296        obj = self.model.fetch(uri) 
    279        return obj 297        return obj 
    280 298 
    281    def items(self,uri,since=None,until=None): 299    def items(self,uri,since=None,until=None): 
    282        """Get items from a feed, optionally filtering by date. (not completely implemented)""" 300        """Get items from a feed, optionally filtering by date. (not completely implemented)""" 
    283        from rdfobj import fbox, dc, rss 301        from rdfobj import fbox, dc, rss 
    284        s = self.subscription(uri) 302        s = self.subscription(uri) 
    285        c = s[fbox.channel] 303        c = s[fbox.channel] 
    286        out = []  304        out = []  
    287        if since is not None: 305        if since is not None: 
    288            for i in c[rss.items]: 306            for i in c[rss.items]: 
    289                warn(i[dc.date]) 307                warn(i[dc.date]) 
    290                if i[dc.date] > since: 308                if i[dc.date] > since: 
    291                    out.append(i) 309                    out.append(i) 
    292 310 
    293        return c.rss_items 311        return c.rss_items 
    294   312   
    295    def subscribe(self,feed=None,format=None,interval=None): 313    def subscribe(self,feed=None,format=None,interval=None): 
    296        """subscribe() creates a subscription to a uri. format is either 'rss' or 'rdf'. RDF is assumed if none is specified. Interval is the maximum interval in minutes that a feed should be checked at. It sends polite HTTP requests so don't worry about setting it to a bit more often than you might need. A value in minutes - defaults to 100 minutes.""" 314        """subscribe() creates a subscription to a uri. format is either 'rss' or 'rdf'. RDF is assumed if none is specified. Interval is the maximum interval in minutes that a feed should be checked at. It sends polite HTTP requests so don't worry about setting it to a bit more often than you might need. A value in minutes - defaults to 100 minutes.""" 
    297        from rdfobj import fbox 315        from rdfobj import fbox 
    298        if feed is None: return 316        if feed is None: return 
    299 317 
    300        f = self.model.search(fbox.channel,feed).first() 318        f = self.model.search(fbox.channel,feed).first() 
    301        if f is not None: 319        if f is not None: 
    302            return 320            return 
    303 321 
    304        self.mention("subscribing to "+str(feed)) 322        self.mention("subscribing to "+str(feed)) 
    305 323 
    306        if format is None:  324        if format is None:  
    307            format = fbox.rdf 325            format = fbox.rdf 
    308        elif format == 'rss': 326        elif format == 'rss': 
    309            format = fbox.rss 327            format = fbox.rss 
    310        elif format == 'rdf': 328        elif format == 'rdf': 
    311            format = fbox.rdf 329            format = fbox.rdf 
    312 330 
    313        if interval is None: interval = str(100) 331        if interval is None: interval = str(100) 
    314 332 
    315        ff = self.model.create( fbox.Feed, uri=None ) 333        ff = self.model.create( fbox.Feed, uri=None ) 
    316        ff[fbox.channel] = str(feed) 334        ff[fbox.channel] = str(feed) 
    317        ff[fbox.format] = str(format) 335        ff[fbox.format] = str(format) 
    318        ff[fbox.interval] = interval 336        ff[fbox.interval] = interval 
    319 337 
    320        return ff 338        return ff 
    321 339 
    322    def update(self): 340    def update(self): 
    323        """Causes all the subscribed URLs to be visited for updates.""" 341        """Causes all the subscribed URLs to be visited for updates.""" 
    324        subs = self.subscriptions() 342        subs = self.subscriptions() 
    325        from rdfobj import fbox 343        from rdfobj import fbox 
    326        for s in subs:     344        for s in subs:     
    327            self.visit(s[fbox.channel]) 345            self.visit(s[fbox.channel]) 
    328 346 
    329    def visit(self,uri=None): 347    def visit(self,uri=None): 
    330        """Creates an anonymous object which records a visit that we 348        """Creates an anonymous object which records a visit that we 
    331        paid to a feed, including a counter of times visited. This object is 349        paid to a feed, including a counter of times visited. This object is 
    332        used as a Redland context for all the information collected from a feed 350        used as a Redland context for all the information collected from a feed 
    333        during this visit.""" 351        during this visit.""" 
    334        # redland had problems serialising models with bnode context uris  352        # redland had problems serialising models with bnode context uris  
    335        count = self.counter() 353        count = self.counter() 
    336        from rdfobj import fbox 354        from rdfobj import fbox 
    337        visit_uri = str(fbox.visit)+'/'+str(count) 355        visit_uri = str(fbox.visit)+'/'+str(count) 
    338        visit = self.model.create( fbox.Visit , visit_uri) 356        visit = self.model.create( fbox.Visit , visit_uri) 
    339 357 
    340        visit[fbox.source] = uri 358        visit[fbox.source] = uri 
    341        t = time.strftime("%Y%m%dT%H%M%SZ") 359        t = time.strftime("%Y%m%dT%H%M%SZ") 
    342        visit[fbox.timestamp] = t 360        visit[fbox.timestamp] = t 
    343        return RDF.Node(RDF.Uri(str(visit.uri()))) 361        return RDF.Node(RDF.Uri(str(visit.uri()))) 
    344 362 
    345    def user(self,token=None,nick=None,mbox=None): 363    def user(self,token=None,nick=None,mbox=None): 
    346        """Passed either a user's login token, mbox and name, resolved to mutual exclusion in that order, and returns any corresponding user / foaf:Person object. No security - handle this yourself elsewhere!""" 364        """Passed either a user's login token, mbox and name, resolved to mutual exclusion in that order, and returns any corresponding user / foaf:Person object. No security - handle this yourself elsewhere!""" 
    347        from rdfobj import foaf 365        from rdfobj import foaf 
    348        if token is not None: 366        if token is not None: 
    349            u = self.model.search(foaf.auth_token,token).first() 367            u = self.model.search(foaf.auth_token,token).first() 
    350            return u[foaf.alias] 368            return u[foaf.alias] 
    351 369 
    352        if mbox is not None: 370        if mbox is not None: 
    353            u = self.model.search(foaf.mbox,mbox).first() 371            u = self.model.search(foaf.mbox,mbox).first() 
    354            return u 372            return u 
    355 373 
    356        if nick is not None: 374        if nick is not None: 
    357            users = self.model.search(foaf.name,nick).list() 375            users = self.model.search(foaf.name,nick).list() 
    358            users.append( self.model.search(foaf.givenName,nick).list() ) 376            users.append( self.model.search(foaf.givenName,nick).list() ) 
    359            return users 377            return users 
    360 378 
    361    def add_user(self,nick=None,mbox=None,password=None): 379    def add_user(self,nick=None,mbox=None,password=None): 
    362        """ Create a new user foaf:Person""" 380        """ Create a new user foaf:Person""" 
    363        store = self.store 381        store = self.store 
    364        from rdfobj import foaf, wlan 382        from rdfobj import foaf, wlan 
    365 383 
    366        obj = self.model.create(foaf.Person,uri=uri) 384        obj = self.model.create(foaf.Person,uri=uri) 
    367        obj[foaf.mbox] = mbox 385        obj[foaf.mbox] = mbox 
    368        obj[foaf.nick] = nick 386        obj[foaf.nick] = nick 
    369        if page is not None: obj[foaf.homepage] = page 387        if page is not None: obj[foaf.homepage] = page 
    370        self.obj = obj 388        self.obj = obj 
    371 389 
    372        """ Create a kind of shadow user where we store the password and the logged-in token, so they won't get serialised accidentally along with the user. """ 390        """ Create a kind of shadow user where we store the password and the logged-in token, so they won't get serialised accidentally along with the user. """ 
    373 391 
    374        auth = store.create(foaf.AuthedPerson) 392        auth = store.create(foaf.AuthedPerson) 
    375        auth[foaf.password] = password 393        auth[foaf.password] = password 
    376        auth[foaf.nick] = nick 394        auth[foaf.nick] = nick 
    377        auth[foaf.alias] = obj 395        auth[foaf.alias] = obj 
    378 396 
    379        token = self.auth_token() 397        token = self.auth_token() 
    380        auth[foaf.auth_token] = token 398        auth[foaf.auth_token] = token 
    381        self.model.sync() 399        self.model.sync() 
    382        return token 400        return token 
    383 401 
    384    def auth_token(self): 402    def auth_token(self): 
    385        """Generate a random auth token.""" 403        """Generate a random auth token.""" 
    386        x = '' 404        x = '' 
    387        for  n in range(0, 6): 405        for  n in range(0, 6): 
    388            x = x + chr(65 + random.randint(0, 26)) 406            x = x + chr(65 + random.randint(0, 26)) 
    389        return x         407        return x         
    390         408         
    391    def visit_scheduled(self,sub): 409    def visit_scheduled(self,sub): 
    392        """Compare the last visited time, if that's applicable, to the interval between events (rather than a schedule? perhaps we'll have to re-think this later."""    410        """Compare the last visited time, if that's applicable, to the interval between events (rather than a schedule? perhaps we'll have to re-think this later."""    
    393        if self._visit_true is not None: 411        if self._visit_true is not None: 
    394            return 1 412            return 1 
    395        from rdfobj import fbox 413        from rdfobj import fbox 
    396        last = sub[fbox.last_visited] 414        last = sub[fbox.last_visited] 
    397        if last is None: 415        if last is None: 
    398            return 1 416            return 1 
    399        t = time.time() 417        t = time.time() 
    400        # convert last time simply from ical to epoch? 418        # convert last time simply from ical to epoch? 
    401 419 
    402        return 1 420        return 1 
    403        since = t - float(str(last)) 421        since = t - float(str(last)) 
    404         422         
    405        interval = sub[fbox.interval] 423        interval = sub[fbox.interval] 
    406        if interval is None: 424        if interval is None: 
    407            sub[fbox.interval] = str(100) 425            sub[fbox.interval] = str(100) 
    408            interval = sub[fbox.interval] 426            interval = sub[fbox.interval] 
    409        secs = int(str(interval))*60 427        secs = int(str(interval))*60 
    410        if since >= secs: 428        if since >= secs: 
    411            return 1 429            return 1 
    412        return None 430        return None 
    413                 431                 
    414    def counter(self): 432    def counter(self): 
    415        """Update the counter that's used to generate visit context URIs.""" 433        """Update the counter that's used to generate visit context URIs.""" 
    416        from rdfobj import fbox 434        from rdfobj import fbox 
    417        counter = self.model.fetch(fbox.Visit_Count) 435        counter = self.model.fetch(fbox.Visit_Count) 
    418        c = counter[fbox.count] 436        c = counter[fbox.count] 
    419        c = int(str(c))+1 437        c = int(str(c))+1 
    420        counter[fbox.count] = str(c) 438        counter[fbox.count] = str(c) 
    421        return c 439        return c 
    422 440 
    423 441 
    424if __name__ == "__main__": 442if __name__ == "__main__": 
    425    bbox = BBox(visit_true = 1) 443    bbox = BBox(visit_true = 1) 
    426    bbox.subscribe(feed='http://frot.org/wirelesslondon/bbox.rdf',format=fbox.rss) 444    bbox.subscribe(feed='http://frot.org/wirelesslondon/bbox.rdf',format=fbox.rss) 
    427    bbox.subscribe(feed='http://frot.org/devlog/index.rss',format=fbox.rss) 445    bbox.subscribe(feed='http://frot.org/devlog/index.rss',format=fbox.rss) 
    428    bbox.subscribe(feed='http://zooleika.org.uk/bio/foaf.rdf',format=fbox.rdf) 446    bbox.subscribe(feed='http://zooleika.org.uk/bio/foaf.rdf',format=fbox.rdf) 
    429    bbox.read_subscriptions() 447    bbox.read_subscriptions() 
  • bbox/bbox/spatialStore.py

    Revision 218 Revision 417
    1"""provides the unified data store between redland RDF and spatial (for now PostGIS) index""" 1"""provides the unified data store between redland RDF and spatial (for now PostGIS) index""" 
    2import pgdb 2import pgdb 
    3import re 3import re 
    4import warnings 4import warnings 
    5import bbox.config 5import bbox.config 
    6 6 
    7_utm_zone = bbox.config.utm_zone 7_utm_zone = bbox.config.utm_zone 
    8_srid = bbox.config.srid  8_srid = bbox.config.srid  
    9 9 
    10class SpatialStore: 10class SpatialStore: 
    11 11 
    12    def __init__(self,vars = None,database=None): 12    def __init__(self,vars = None,database=None): 
    13        """Create a new store interface to a certain database name, passed in here or defaulting to what's set in bbox.config._spatialdb""" 13        """Create a new store interface to a certain database name, passed in here or defaulting to what's set in bbox.config._spatialdb""" 
    14        self.vars = vars 14        self.vars = vars 
    15        if database is None:     15        if database is None:     
    16            database = _database 16            database = _database 
    17             17             
    18        db = pgdb.connect(database=database) 18        db = pgdb.connect(database=database) 
    19        self.db = db 19        self.db = db 
    20 20 
    21    def geom(self,uri): 21    def geom(self,uri): 
    22        """Passed the uri of something in the DB, returns geometry for it as a dict; if the geom is a point, {'x':x,'y':y}; if the geom is a line, x1,y1,x2,y2; polygon geom isn't really supported yet.""" 22        """Passed the uri of something in the DB, returns geometry for it as a dict; if the geom is a point, {'x':x,'y':y}; if the geom is a line, x1,y1,x2,y2; polygon geom isn't really supported yet.""" 
    23        # again we have an issue of what kind of geom this is 23        # again we have an issue of what kind of geom this is 
    24        select = "select GeometryType(geom) from nodes where node='"+uri+"'" 24        select = "select GeometryType(geom) from nodes where node='"+uri+"'" 
    25        db = self.db.cursor() 25        db = self.db.cursor() 
    26        db.execute(select) 26        db.execute(select) 
    27        g = db.fetchone() 27        g = db.fetchone() 
    28        geom = {} 28        geom = {} 
    29        if g[0] == 'LINESTRING': 29        if g[0] == 'LINESTRING': 
    30            select = "SELECT name, type, X(StartPoint(geom)) as x, Y(StartPoint(geom)), GeometryFromText(geom) as y, X(EndPoint(geom)), Y(EndPoint(geom)) from nodes where node='"+uri+"'" 30            select = "SELECT name, type, X(StartPoint(geom)) as x, Y(StartPoint(geom)), GeometryFromText(geom) as y, X(EndPoint(geom)), Y(EndPoint(geom)) from nodes where node='"+uri+"'" 
    31            db.execute(select) 31            db.execute(select) 
    32            c = db.fetchone() 32            c = db.fetchone() 
    33            geom['x1'] = c[2] 33            geom['x1'] = c[2] 
    34            geom['x2'] = c[4] 34            geom['x2'] = c[4] 
    35            geom['y1'] = c[3] 35            geom['y1'] = c[3] 
    36            geom['y2'] = c[5] 36            geom['y2'] = c[5] 
    37 37 
    38        elif g[0] == 'POINT': 38        elif g[0] == 'POINT': 
    39            select = "SELECT name,type,X(geom),Y(geom) from nodes where node='"+uri+"'" 39            select = "SELECT name,type,X(geom),Y(geom) from nodes where node='"+uri+"'" 
    40            db.execute(select) 40            db.execute(select) 
    41            c = db.fetchone() 41            c = db.fetchone() 
    42            geom['x'] = c[2] 42            geom['x'] = c[2] 
    43            geom['y'] = c[3] 43            geom['y'] = c[3] 
    44 44 
    45        elif g[0] == 'POLYGON': 45        elif g[0] == 'POLYGON': 
    46            #@@TODO support this properly, as vectors, whatever 46            #@@TODO support this properly, as vectors, whatever 
    47            select = 'SELECT GeometryAsText(geom) where node = \''+uri+"'"       47            select = 'SELECT GeometryAsText(geom) where node = \''+uri+"'"       
    48         48         
    49        return geom 49        return geom 
    50 50 
    51    def add_geom(self,uri=None,name=None,type=None,rdf_type=None,status=None,owner=None,x=None,y=None,z=None,date=None): 51    def add_geom(self,uri=None,name=None,type=None,rdf_type=None,status=None,owner=None,x=None,y=None,z=None,date=None): 
    52        """ Add geometry for a Uri into a spatial index. needs uri and also name=[name] must be supplied.""" 52        """ Add geometry for a Uri into a spatial index. needs uri and also name=[name] must be supplied.""" 
    53        if uri is None: return 53        if uri is None: return 
    54        if type is None: type = '' 54        if type is None: type = '' 
    55         55         
    56        if rdf_type is None: 56        if rdf_type is None: 
    57            rdf_type = 'http://www.w3.org/2003/01/geo/wgs84_pos#SpatialThing' 57            rdf_type = 'http://www.w3.org/2003/01/geo/wgs84_pos#SpatialThing' 
    58        if status is None: status = '' 58        if status is None: status = '' 
    59        if owner is None: owner = 'wl@frot.org' 59        if owner is None: owner = 'wl@frot.org' 
    60        if date is None: 60        if date is None: 
    61            date = 'now()' 61            date = 'now()' 
    62        else: 62        else: 
    63            date = "'"+str(date)+"'" 63            date = "'"+str(date)+"'" 
    64         64         
    65        geom = "GeometryFromText('POINT("+str(x)+' '+str(y)+")',"+str(_srid)+")" 65        geom = "GeometryFromText('POINT("+str(x)+' '+str(y)+")',"+str(_srid)+")" 
    66         66         
    67        #insert = "INSERT INTO nodes (node,geom,name,status,rdf_type,type,created) values ('"+str(uri)+"',"+geom+",'"+str(name)+"','"+str(status)+"','"+str(rdf_type)+"','"+str(type)+"',"+date+')' 67        #insert = "INSERT INTO nodes (node,geom,name,status,rdf_type,type,created) values ('"+str(uri)+"',"+geom+",'"+str(name)+"','"+str(status)+"','"+str(rdf_type)+"','"+str(type)+"',"+date+')' 
    68        #print insert    68        #print insert    
    69 69 
    70        db = self.db 70        db = self.db 
    71        db.cursor().execute("INSERT INTO nodes (node,geom,name,status,rdf_type,type,created) values (%s,"+geom+",%s,%s,%s,%s,"+date+")", (str(uri),str(name),str(status),str(rdf_type),str(type))) 71        db.cursor().execute("INSERT INTO nodes (node,geom,name,status,rdf_type,type,created) values (%s,"+geom+",%s,%s,%s,%s,"+date+")", (str(uri),str(name),str(status),str(rdf_type),str(type))) 
    72        db.commit() 72        db.commit() 
    73        #print "new geometry for "+str(uri) 73        #print "new geometry for "+str(uri) 
    74 74 
    75    def find_near(self,node=None, lat=None,lon=None,x=None,y=None,r=None,type=None,terse=None): 75    def find_near(self,node=None, lat=None,lon=None,x=None,y=None,r=None,type=None,terse=None): 
    76        if lat is not None: 76        if lat is not None: 
    77            thing = wl.spatialThing.SpatialThing(spatialStore = self) 77            thing = wl.spatialThing.SpatialThing(spatialStore = self) 
    78            (x,y) = thing.latlon_to_utm(lat=lat,lon=lon) 78            (x,y) = thing.latlon_to_utm(lat=lat,lon=lon) 
    79        if r is None: 79        if r is None: 
    80            r = 1600  80            r = 1600  
    81        found = self.within_box(minx=x-r,miny=y-r,maxx=x+r,maxy=y+r,rdf_type=type,terse=terse) 81        found = self.within_box(minx=x-r,miny=y-r,maxx=x+r,maxy=y+r,rdf_type=type,terse=terse) 
    82        return found 82        return found 
    83     83     
    84    def within_box(self,minx=None,miny=None,maxx=None,maxy=None,type=None,rdf_type=None,terse=None): 84    def within_box(self,minx=None,miny=None,maxx=None,maxy=None,type=None,rdf_type=None,terse=None): 
    85        """Accepts a minx,miny,maxx,maxy bounding box; should return a list of spatialThings, unless you specify terse=1, in whih case you just get a