| 1 | # bbox - an RSS / RDF aggregator | 1 | # bbox - an RSS / RDF aggregator |
|---|
| 2 | # Walsh - Dec 2004 - Mar 2005 | 2 | # Walsh - Dec 2004 - Mar 2005 |
|---|
| 3 | | 3 | |
|---|
| 4 | # This code owes heavily to the approach and source in Edd Dumbill's | 4 | # This code owes heavily to the approach and source in Edd Dumbill's |
|---|
| 5 | # IBM Developerworks article on aggregating RSS with contexts: | 5 | # IBM Developerworks article on aggregating RSS with contexts: |
|---|
| 6 | # http://www-106.ibm.com/developerworks/xml/library/x-rdfprov.html | 6 | # http://www-106.ibm.com/developerworks/xml/library/x-rdfprov.html |
|---|
| 7 | | 7 | |
|---|
| 8 | # It uses Mark Pilgrim's feedparser, at http://feedparser.org/ | 8 | # It uses Mark Pilgrim's feedparser, at http://feedparser.org/ |
|---|
| 9 | # This software has 2000 tests. The code is included in this package. | 9 | # This software has 2000 tests. The code is included in this package. |
|---|
| 10 | | 10 | |
|---|
| 11 | # It uses the 'rdfobj', an object interface to the python interface | 11 | # It uses the 'rdfobj', an object interface to the python interface |
|---|
| 12 | # to the redland rdf toolkit. This is also included. | 12 | # to the redland rdf toolkit. This is also included. |
|---|
| 13 | # redland is at http://www.redland.opensource.ac.uk/ | 13 | # redland is at http://www.redland.opensource.ac.uk/ |
|---|
| 14 | | 14 | |
|---|
| 15 | import feedparser | 15 | import feedparser |
|---|
| 16 | import time, datetime | 16 | import time, datetime |
|---|
| 17 | import rdfobj | 17 | import rdfobj |
|---|
| 18 | import RDF | 18 | import RDF |
|---|
| 19 | from bbox.politehttp import polite_request | 19 | from bbox.politehttp import polite_request |
|---|
| 20 | import bbox.spatialStore | 20 | import bbox.spatialStore |
|---|
| 21 | import bbox.config | 21 | import bbox.config |
|---|
| 22 | import os | 22 | import os |
|---|
| 23 | from warnings import warn | 23 | from warnings import warn |
|---|
| 24 | | 24 | |
|---|
| 25 | class BBox: | 25 | class BBox: |
|---|
| 26 | def __init__(self,spatial=None,verbose=None,always_visit=None,db=None,model=None,store=None,textindex=None): | 26 | def __init__(self,spatial=None,verbose=None,always_visit=None,db=None,model=None,store=None,textindex=None): |
|---|
| 27 | | 27 | |
|---|
| 28 | """We initialise a bbox by passing a database to it. | 28 | """We initialise a bbox by passing a database to it. |
|---|
| 29 | If wishing to use an optional spatial index, set | 29 | If wishing to use an optional spatial index, set |
|---|
| 30 | | 30 | |
|---|
| 31 | spatial = [name of index or database] | 31 | spatial = [name of index or database] |
|---|
| 32 | | 32 | |
|---|
| 33 | To create a data model in a specific directory: | 33 | To create a data model in a specific directory: |
|---|
| 34 | | 34 | |
|---|
| 35 | store = /path/to/storage/directory | 35 | store = /path/to/storage/directory |
|---|
| 36 | | 36 | |
|---|
| 37 | To create a data model with a specific name: | 37 | To create a data model with a specific name: |
|---|
| 38 | | 38 | |
|---|
| 39 | db = 'data' | 39 | db = 'data' |
|---|
| 40 | | 40 | |
|---|
| 41 | To have bbox reuse an existing rdfobj.Model, pass it in as | 41 | To have bbox reuse an existing rdfobj.Model, pass it in as |
|---|
| 42 | | 42 | |
|---|
| 43 | model = <rdfobj.Model Object> | 43 | model = <rdfobj.Model Object> |
|---|
| 44 | | 44 | |
|---|
| 45 | Setting 'verbose' to a true value turns on bbox's | 45 | Setting 'verbose' to a true value turns on bbox's |
|---|
| 46 | stream of consciousness. | 46 | stream of consciousness. |
|---|
| 47 | | 47 | |
|---|
| 48 | Setting 'always_visit' to a true value means that | 48 | Setting 'always_visit' to a true value means that |
|---|
| 49 | HTTP requests are always made in full, even if their HTTP | 49 | HTTP requests are always made in full, even if their HTTP |
|---|
| 50 | status code is unchanged (304) and etag/last-modified dates | 50 | status code is unchanged (304) and etag/last-modified dates |
|---|
| 51 | have not changed. (Useful for testing) | 51 | have not changed. (Useful for testing) |
|---|
| 52 | """ | 52 | """ |
|---|
| 53 | | 53 | |
|---|
| 54 | self._verbose = verbose | 54 | self._verbose = verbose |
|---|
| 55 | self._visit_true = always_visit | 55 | self._visit_true = always_visit |
|---|
| 56 | | 56 | |
|---|
| 57 | if db is None: | 57 | if db is None: |
|---|
| 58 | db = bbox.config.db | 58 | db = bbox.config.db |
|---|
| 59 | if store is None: | 59 | if store is None: |
|---|
| 60 | store = bbox.config.store | 60 | store = bbox.config.store |
|---|
| 61 | | 61 | |
|---|
| 62 | if model is not None: | 62 | if model is not None: |
|---|
| 63 | self.model = model | 63 | self.model = model |
|---|
| 64 | else: | 64 | else: |
|---|
| 65 | #if store is not None: | 65 | #if store is not None: |
|---|
| 66 | # os.chdir(store) | 66 | # os.chdir(store) |
|---|
| 67 | self.model = rdfobj.Model(db,db='hash') | 67 | self.model = rdfobj.Model(db,db='hash') |
|---|
| 68 | self.model.load(bbox.config.boot) | 68 | self.model.load(bbox.config.boot) |
|---|
| 69 | | 69 | |
|---|
| 70 | from rdfobj import fbox | 70 | from rdfobj import fbox |
|---|
| 71 | counter = self.model.fetch(fbox.Visit_Count) | 71 | counter = self.model.fetch(fbox.Visit_Count) |
|---|
| 72 | c = counter[fbox.count] | 72 | c = counter[fbox.count] |
|---|
| 73 | if c is None: c = 0 | 73 | if c is None: c = 0 |
|---|
| 74 | c = int(str(c))+1 | 74 | c = int(str(c))+1 |
|---|
| 75 | counter[fbox.count] = str(c) | 75 | counter[fbox.count] = str(c) |
|---|
| 76 | | 76 | |
|---|
| 77 | if counter is None: | 77 | if counter is None: |
|---|
| 78 | v = self.model.create(fbox.Visit_Count, uri=fbox.Visit_Count) | 78 | v = self.model.create(fbox.Visit_Count, uri=fbox.Visit_Count) |
|---|
| 79 | v[fbox.count] = 0 | 79 | v[fbox.count] = 0 |
|---|
| 80 | | 80 | |
|---|
| 81 | if spatial is not None: | 81 | if spatial is not None: |
|---|
| 82 | self.spatialStore = bbox.spatialStore.SpatialStore(database=spatial,model=self.model) | 82 | self.spatialStore = bbox.spatialStore.SpatialStore(database=spatial,model=self.model) |
|---|
| 83 | else: | 83 | else: |
|---|
| 84 | self.spatialStore = None | 84 | self.spatialStore = None |
|---|
| 85 | self.textindex = textindex | 85 | self.textindex = textindex |
|---|
| 86 | | 86 | |
|---|
| 87 | | 87 | |
|---|
| 88 | def mention(self,thought): | 88 | def mention(self,thought): |
|---|
| 89 | """If BBox is constructed with verbose=1, | 89 | """If BBox is constructed with verbose=1, |
|---|
| 90 | prints to STDOUT (currently) a record of what it's up to.""" | 90 | prints to STDOUT (currently) a record of what it's up to.""" |
|---|
| 91 | if self._verbose: | 91 | if self._verbose: |
|---|
| 92 | print(thought) | 92 | print(thought) |
|---|
| 93 | | 93 | |
|---|
| 94 | def read_subscriptions(self): | 94 | def read_subscriptions(self): |
|---|
| 95 | """read_subscriptions() picks up the latest RSS feed updates, | 95 | """read_subscriptions() picks up the latest RSS feed updates, |
|---|
| 96 | for every object of class fbox:Feed""" | 96 | for every object of class fbox:Feed""" |
|---|
| 97 | self.mention("checking subscriptions.") | 97 | self.mention("checking subscriptions.") |
|---|
| 98 | subs = self.subscriptions() | 98 | subs = self.subscriptions() |
|---|
| 99 | from rdfobj import fbox | 99 | from rdfobj import fbox |
|---|
| 100 | for s in subs: | 100 | for s in subs: |
|---|
| 101 | self.mention("reading "+str(s[fbox.channel])) | 101 | self.mention("reading "+str(s[fbox.channel])) |
|---|
| 102 | format = s[fbox.format].uri() | 102 | format = s[fbox.format].uri() |
|---|
| 103 | c = s[fbox.channel] | 103 | c = s[fbox.channel] |
|---|
| 104 | | 104 | |
|---|
| 105 | # see if we're actually due a visit | 105 | # see if we're actually due a visit |
|---|
| 106 | due = self.visit_scheduled(s) | 106 | due = self.visit_scheduled(s) |
|---|
| 107 | if due is None: | 107 | if due is None: |
|---|
| 108 | print "nothing due to look at!" | 108 | print "nothing due to look at!" |
|---|
| 109 | subs.next() | 109 | subs.next() |
|---|
| 110 | else: | 110 | else: |
|---|
| 111 | if format == fbox.rss: | 111 | if format == fbox.rss: |
|---|
| 112 | self.read_rss(c.uri(),subscription=s) | 112 | self.read_rss(c.uri(),subscription=s) |
|---|
| 113 | elif format == fbox.rdf: | 113 | elif format == fbox.rdf: |
|---|
| 114 | self.read_rdf(s[fbox.channel].uri(),subscription=s) | 114 | self.read_rdf(s[fbox.channel].uri(),subscription=s) |
|---|
| 115 | | 115 | |
|---|
| 116 | def read_rss(self,uri,context=None,subscription=None,xml=None,properties=None): | 116 | def read_rss(self,uri,context=None,subscription=None,xml=None,properties=None): |
|---|
| 117 | """Read updates from an RSS feed, | 117 | """Read updates from an RSS feed, |
|---|
| 118 | and construct a local copy of new objects found. | 118 | and construct a local copy of new objects found. |
|---|
| 119 | If a subscription object is passed in, | 119 | If a subscription object is passed in, |
|---|
| 120 | update it with HTTP status information | 120 | update it with HTTP status information |
|---|
| 121 | If spatial things are found, add them to the spatial index. | 121 | If spatial things are found, add them to the spatial index. |
|---|
| 122 | Returns a list of objects that were found at this URL | 122 | Returns a list of objects that were found at this URL |
|---|
| 123 | | 123 | |
|---|
| 124 | 'properties' is a dictionary of properties which, | 124 | 'properties' is a dictionary of properties which, |
|---|
| 125 | if found on an individual rss item, should be mapped to | 125 | if found on an individual rss item, should be mapped to |
|---|
| 126 | an rdf property on the outcoming object. | 126 | an rdf property on the outcoming object. |
|---|
| 127 | e.g. 'media_content':rss.link | 127 | e.g. 'media_content':rss.link |
|---|
| 128 | """ | 128 | """ |
|---|
| 129 | | 129 | |
|---|
| 130 | | 130 | |
|---|
| 131 | from rdfobj import rss, rdf, fbox, ical, dc, foaf, geo | 131 | from rdfobj import rss, rdf, fbox, ical, dc, foaf, geo |
|---|
| 132 | #if subscription is None: subscription = {} | 132 | #if subscription is None: subscription = {} |
|---|
| 133 | result = self.politely_get_uri(uri,subscription=subscription) | 133 | result = self.politely_get_uri(uri,subscription=subscription) |
|---|
| 134 | | 134 | |
|---|
| 135 | channel = self.model.fetch(uri) | 135 | channel = self.model.fetch(uri) |
|---|
| 136 | """If we got a feed object back from the request, then create a | 136 | """If we got a feed object back from the request, then create a |
|---|
| 137 | context for this visit to the feed, and store the entries that we | 137 | context for this visit to the feed, and store the entries that we |
|---|
| 138 | collected from it.""" | 138 | collected from it.""" |
|---|
| 139 | if self._visit_true: | 139 | if self._visit_true: |
|---|
| 140 | pass | 140 | pass |
|---|
| 141 | elif result['status'] != 200: | 141 | elif result['status'] != 200: |
|---|
| 142 | return [] | 142 | return [] |
|---|
| 143 | items = [] | 143 | items = [] |
|---|
| 144 | feed = feedparser.parse(result['data']) | 144 | feed = feedparser.parse(result['data']) |
|---|
| 145 | if feed.has_key('feed'): | 145 | if feed.has_key('feed'): |
|---|
| 146 | context = self.visit(uri) | 146 | context = self.visit(uri) |
|---|
| 147 | # existence of exact duplicates? | 147 | # existence of exact duplicates? |
|---|
| 148 | | 148 | |
|---|
| 149 | for e in feed.entries: | 149 | for e in feed.entries: |
|---|
| 150 | link = str(e.link) | 150 | link = str(e.link) |
|---|
| 151 | title = None | 151 | title = None |
|---|
| 152 | item = self.model.create( rss.item, uri=link, context = context ) | 152 | item = self.model.create( rss.item, uri=link, context = context ) |
|---|
| 153 | | 153 | |
|---|
| 154 | if e.has_key('summary'): item[rss.description] = str(e.summary) | 154 | if e.has_key('summary'): item[rss.description] = str(e.summary) |
|---|
| 155 | | 155 | |
|---|
| 156 | if e.has_key('content'): item[rss.description] = str(e.content) | 156 | if e.has_key('content'): item[rss.description] = str(e.content) |
|---|
| 157 | | 157 | |
|---|
| 158 | if e.has_key('title'): | 158 | if e.has_key('title'): |
|---|
| 159 | item[rss.title] = str(e.title) | 159 | item[rss.title] = str(e.title) |
|---|
| 160 | title = str(e.title) | 160 | title = str(e.title) |
|---|
| 161 | | 161 | |
|---|
| 162 | item[fbox.channel] = channel | 162 | item[fbox.channel] = channel |
|---|
| 163 | | 163 | |
|---|
| 164 | # d.entries[0].modified_parsed is common | 164 | # d.entries[0].modified_parsed is common |
|---|
| 165 | | 165 | |
|---|
| 166 | time_tuple = None | 166 | time_tuple = None |
|---|
| 167 | if e.has_key('modified_parsed'): | 167 | if e.has_key('modified_parsed'): |
|---|
| 168 | time_tuple = e.modified_parsed | 168 | time_tuple = e.modified_parsed |
|---|
| 169 | elif e.has_key('created_parsed'): | 169 | elif e.has_key('created_parsed'): |
|---|
| 170 | time_tuple = e.created_parsed | 170 | time_tuple = e.created_parsed |
|---|
| 171 | | 171 | |
|---|
| 172 | # item[ical.datetime] = some process with time_tuple and strftime | 172 | # item[ical.datetime] = some process with time_tuple and strftime |
|---|
| 173 | # d = datetime.datetime(time_tuple) | 173 | # d = datetime.datetime(time_tuple) |
|---|
| 174 | # ical_date = ical_datetime.datetime_to_string(d) | 174 | # ical_date = ical_datetime.datetime_to_string(d) |
|---|
| 175 | # print ical_date | 175 | # print ical_date |
|---|
| 176 | # item[ical.datetime] = ical_date | 176 | # item[ical.datetime] = ical_date |
|---|
| 177 | # not much use without a timestamp | 177 | # not much use without a timestamp |
|---|
| 178 | | 178 | |
|---|
| 179 | if time_tuple is None: | 179 | if time_tuple is None: |
|---|
| 180 | continue | 180 | continue |
|---|
| 181 | ical_enough = time.strftime("%Y%m%dT%H%M%SZ",time_tuple) | 181 | ical_enough = time.strftime("%Y%m%dT%H%M%SZ",time_tuple) |
|---|
| 182 | item[ical.datetime] = ical_enough | 182 | item[ical.datetime] = ical_enough |
|---|
| 183 | | 183 | |
|---|
| 184 | rdf_type = None | 184 | rdf_type = None |
|---|
| 185 | if e.has_key('rdf_type'): | 185 | if e.has_key('rdf_type'): |
|---|
| 186 | rdf_type = str(e['rdf_type']) | 186 | rdf_type = str(e['rdf_type']) |
|---|
| 187 | item[rdf.type] = rdf_type | 187 | item[rdf.type] = rdf_type |
|---|
| 188 | | 188 | |
|---|
| 189 | if e.has_key('geo_lat') and e.has_key('geo_long'): | 189 | if e.has_key('geo_lat') and e.has_key('geo_long'): |
|---|
| 190 | lat = str(e['geo_lat']) | 190 | lat = str(e['geo_lat']) |
|---|
| 191 | long = str(e['geo_long']) | 191 | long = str(e['geo_long']) |
|---|
| 192 | item[geo.lat] = lat | 192 | item[geo.lat] = lat |
|---|
| 193 | item[geo.long] = long | 193 | item[geo.long] = long |
|---|
| 194 | | 194 | |
|---|
| 195 | """Update the spatial index, if we have one.""" | 195 | """Update the spatial index, if we have one.""" |
|---|
| 196 | if self.spatialStore is not None: | 196 | if self.spatialStore is not None: |
|---|
| 197 | self.spatialStore.add_or_update_point(uri,type=rdf_type,name=title,x=long,y=lat) | 197 | self.spatialStore.add_or_update_point(uri,type=rdf_type,name=title,x=long,y=lat) |
|---|
| 198 | elif e.has_key('geo_line'): | 198 | elif e.has_key('geo_line'): |
|---|
| 199 | line = str(e['geo:line']) | 199 | line = str(e['geo:line']) |
|---|
| 200 | item[geo.line] = line | 200 | item[geo.line] = line |
|---|
| 201 | if self.spatialStore is not None: | 201 | if self.spatialStore is not None: |
|---|
| 202 | points = self.spatialStore.parse_points(line) | 202 | points = self.spatialStore.parse_points(line) |
|---|
| 203 | self.spatialStore.add_or_update_line(uri,points=points,type=rdf_type) | 203 | self.spatialStore.add_or_update_line(uri,points=points,type=rdf_type) |
|---|
| 204 | | 204 | |
|---|
| 205 | elif e.has_key('geo_polygon'): | 205 | elif e.has_key('geo_polygon'): |
|---|
| 206 | poly = str(e['geo:polygon']) | 206 | poly = str(e['geo:polygon']) |
|---|
| 207 | item[geo.polygon] = poly | 207 | item[geo.polygon] = poly |
|---|
| 208 | if self.spatialStore is not None: | 208 | if self.spatialStore is not None: |
|---|
| 209 | points = self.spatialStore.parse_points(poly) | 209 | points = self.spatialStore.parse_points(poly) |
|---|
| 210 | self.spatialStore.add_or_update_polygon(uri,points=points,type=rdf_type) | 210 | self.spatialStore.add_or_update_polygon(uri,points=points,type=rdf_type) |
|---|
| 211 | | 211 | |
|---|
| 212 | # add the optional rdf properties to new items | 212 | # add the optional rdf properties to new items |
|---|
| 213 | if properties is not None: | 213 | if properties is not None: |
|---|
| 214 | for k in properties.keys(): | 214 | for k in properties.keys(): |
|---|
| 215 | if e.has_key(k): | 215 | if e.has_key(k): |
|---|
| 216 | item[properties[k]] = e[k] | 216 | item[properties[k]] = e[k] |
|---|
| 217 | | 217 | |
|---|
| 218 | items.append(item) | 218 | items.append(item) |
|---|
| 219 | return items | 219 | return items |
|---|
| 220 | | 220 | |
|---|
| 221 | def read_rdf(self,uri,subscription=None,xml=None): | 221 | def read_rdf(self,uri,subscription=None,xml=None): |
|---|
| 222 | """Read updates from an RDF URL. | 222 | """Read updates from an RDF URL. |
|---|
| 223 | If a subscription object is passed in, | 223 | If a subscription object is passed in, |
|---|
| 224 | update it with HTTP status information | 224 | update it with HTTP status information |
|---|
| 225 | If spatial things are found, add them to the spatial index. | 225 | If spatial things are found, add them to the spatial index. |
|---|
| 226 | Returns a list of objects that were found at this URL.""" | 226 | Returns a list of objects that were found at this URL.""" |
|---|
| 227 | | 227 | |
|---|
| 228 | from rdfobj import geo, dc, rdf | 228 | from rdfobj import geo, dc, rdf |
|---|
| 229 | | 229 | |
|---|
| 230 | result = self.politely_get_uri(uri,subscription=subscription) | 230 | result = self.politely_get_uri(uri,subscription=subscription) |
|---|
| 231 | if self._visit_true: | 231 | if self._visit_true: |
|---|
| 232 | pass | 232 | pass |
|---|
| 233 | elif result['status'] != 200: | 233 | elif result['status'] != 200: |
|---|
| 234 | return | 234 | return |
|---|
| 235 | | 235 | |
|---|
| 236 | context = self.visit(uri) | 236 | context = self.visit(uri) |
|---|
| 237 | | 237 | |
|---|
| 238 | # we can't just use load() because we want the visit context, and to search for spatial things and index them while we're parsing... | 238 | # we can't just use load() because we want the visit context, and to search for spatial things and index them while we're parsing... |
|---|
| 239 | lats = longs = titles = types = {} | 239 | lats = longs = titles = types = {} |
|---|
| 240 | lines = shapes = [] | 240 | lines = shapes = [] |
|---|
| 241 | parser = RDF.Parser('raptor') | 241 | parser = RDF.Parser('raptor') |
|---|
| 242 | try: | 242 | try: |
|---|
| 243 | stream = parser.parse_as_stream(RDF.Uri(uri)) | 243 | stream = parser.parse_as_stream(RDF.Uri(uri)) |
|---|
| 244 | except: | 244 | except: |
|---|
| 245 | return [] | 245 | return [] |
|---|
| 246 | subjects = {} | 246 | subjects = {} |
|---|
| 247 | if stream: | 247 | if stream: |
|---|
| 248 | while not stream.end(): | 248 | while not stream.end(): |
|---|
| 249 | s = stream.current() | 249 | s = stream.current() |
|---|
| 250 | subjects[s.subject] = 1 | 250 | subjects[s.subject] = 1 |
|---|
| 251 | # check for statement existence | 251 | # check for statement existence |
|---|
| 252 | exists = None | 252 | exists = None |
|---|
| 254 | self.model.model.add_statement(s,context) | 254 | self.model.model.add_statement(s,context) |
|---|
| 255 | # pls don't blame me, i just want to get something working fast | 255 | # pls don't blame me, i just want to get something working fast |
|---|
| 256 | if self.spatialStore is not None: | 256 | if self.spatialStore is not None: |
|---|
| 257 | if s.predicate == RDF.Node(uri_string=str(geo.lat)): | 257 | if s.predicate == RDF.Node(uri_string=str(geo.lat)): |
|---|
| 258 | lats[str(s.subject)] = str(s.object) | 258 | lats[str(s.subject)] = str(s.object) |
|---|
| 259 | | 259 | |
|---|
| 260 | elif s.predicate == RDF.Node(uri_string=str(geo.long)): | 260 | elif s.predicate == RDF.Node(uri_string=str(geo.long)): |
|---|
| 261 | longs[str(s.subject)] = str(s.object) | 261 | longs[str(s.subject)] = str(s.object) |
|---|
| 262 | | 262 | |
|---|
| 263 | elif s.predicate == RDF.Node(uri_string=str(geo.line)): | 263 | elif s.predicate == RDF.Node(uri_string=str(geo.line)): |
|---|
| 264 | lines.append( ( str(s.subject),str(s.object) ) ) | 264 | lines.append( ( str(s.subject),str(s.object) ) ) |
|---|
| 265 | | 265 | |
|---|
| 266 | elif s.predicate == RDF.Node(uri_string=str(geo.polygon)): | 266 | elif s.predicate == RDF.Node(uri_string=str(geo.polygon)): |
|---|
| 267 | shapes.append( ( str(s.subject),str(s.object) ) ) | 267 | shapes.append( ( str(s.subject),str(s.object) ) ) |
|---|
| 268 | | 268 | |
|---|
| 269 | elif s.predicate == RDF.Node(uri_string=str(dc.title)): | 269 | elif s.predicate == RDF.Node(uri_string=str(dc.title)): |
|---|
| 270 | titles[str(s.subject)] = str(s.object) | 270 | titles[str(s.subject)] = str(s.object) |
|---|
| 271 | | 271 | |
|---|
| 272 | elif s.predicate == RDF.Node(uri_string=str(rdf.type)): | 272 | elif s.predicate == RDF.Node(uri_string=str(rdf.type)): |
|---|
| 273 | types[str(s.subject)] = str(s.object) | 273 | types[str(s.subject)] = str(s.object) |
|---|
| 274 | | 274 | |
|---|
| 275 | | 275 | |
|---|
| 276 | stream.next() | 276 | stream.next() |
|---|
| 277 | objects = [] | 277 | objects = [] |
|---|
| 278 | for s in subjects.keys(): | 278 | for s in subjects.keys(): |
|---|
| 279 | objects.append(self.model.fetch(s)) | 279 | objects.append(self.model.fetch(s)) |
|---|
| 280 | | 280 | |
|---|
| 281 | if self.spatialStore is not None: | 281 | if self.spatialStore is not None: |
|---|
| 282 | for k in lats.keys(): | 282 | for k in lats.keys(): |
|---|
| 283 | lat = lats[k] | 283 | lat = lats[k] |
|---|
| 284 | long = longs[k] | 284 | long = longs[k] |
|---|
| 285 | title = None | 285 | title = None |
|---|
| 286 | type = None | 286 | type = None |
|---|
| 287 | if titles.has_key(k): | 287 | if titles.has_key(k): |
|---|
| 288 | title = titles[k] | 288 | title = titles[k] |
|---|
| 289 | if types.has_key(k): | 289 | if types.has_key(k): |
|---|
| 290 | type = types[k] | 290 | type = types[k] |
|---|
| 291 | self.spatialStore.add_or_update_point(uri,type=type,name=title,x=long,y=lat) | 291 | self.spatialStore.add_or_update_point(uri,type=type,name=title,x=long,y=lat) |
|---|
| 292 | for l in lines: | 292 | for l in lines: |
|---|
| 293 | type = types[l[0]] | 293 | type = types[l[0]] |
|---|
| 294 | points = self.spatialStore.parse_points(l[1]) | 294 | points = self.spatialStore.parse_points(l[1]) |
|---|
| 295 | self.spatialStore.add_or_update_line(l[0],points=points,type=type) | 295 | self.spatialStore.add_or_update_line(l[0],points=points,type=type) |
|---|
| 296 | for l in shapes: | 296 | for l in shapes: |
|---|
| 297 | type = types[l[0]] | 297 | type = types[l[0]] |
|---|
| 298 | points = self.spatialStore.parse_points(l[1]) | 298 | points = self.spatialStore.parse_points(l[1]) |
|---|
| 299 | self.spatialStore.add_or_update_polygon(l[0],points=points,type=type) | 299 | self.spatialStore.add_or_update_polygon(l[0],points=points,type=type) |
|---|
| 300 | | 300 | |
|---|
| 301 | if self.textindex is not None: | 301 | if self.textindex is not None: |
|---|
| 302 | for o in objects: | 302 | for o in objects: |
|---|
| 303 | schema = o.rdf_type | 303 | schema = o.rdf_type |
|---|
| 304 | if schema is not None: | 304 | if schema is not None: |
|---|
| 305 | self.textindex.text_index(schema,o) | 305 | self.textindex.text_index(schema,o) |
|---|
| 306 | | 306 | |
|---|
| 307 | return objects | 307 | return objects |
|---|
| 308 | | 308 | |
|---|
| 309 | def politely_get_uri(self,uri,subscription=None): | 309 | def politely_get_uri(self,uri,subscription=None): |
|---|
| 310 | """Request a copy of the document at a url, | 310 | """Request a copy of the document at a url, |
|---|
| 311 | first checking that it has changed since what we record as | 311 | first checking that it has changed since what we record as |
|---|
| 312 | last-modified and the last etag that we have for it. | 312 | last-modified and the last etag that we have for it. |
|---|
| 313 | | 313 | |
|---|
| 314 | If a 'subscription' object is passed in, it gets | 314 | If a 'subscription' object is passed in, it gets |
|---|
| 315 | a new set of properties depending on the HTTP responses. | 315 | a new set of properties depending on the HTTP responses. |
|---|
| 316 | | 316 | |
|---|
| 317 | subscription[fbox.http_status] = str(result['status']) | 317 | subscription[fbox.http_status] = str(result['status']) |
|---|
| 318 | subscription[fbox.last_etag] = result['etag'] | 318 | subscription[fbox.last_etag] = result['etag'] |
|---|
| 319 | subscription[fbox.last_modified] = result['lastmodified'] | 319 | subscription[fbox.last_modified] = result['lastmodified'] |
|---|
| 320 | subscription[fbox.last_visited] = time.strftime("%Y%m%dT%H%M%SZ") | 320 | subscription[fbox.last_visited] = time.strftime("%Y%m%dT%H%M%SZ") |
|---|
| 321 | """ | 321 | """ |
|---|
| 322 | | 322 | |
|---|
| 323 | # we should deal with etag/last-mod politely here too @@TODO | 323 | # we should deal with etag/last-mod politely here too @@TODO |
|---|
| 324 | #visit = self.visit(uri) | 324 | #visit = self.visit(uri) |
|---|
| 325 | result = None | 325 | result = None |
|---|
| 326 | from rdfobj import fbox | 326 | from rdfobj import fbox |
|---|
| 327 | if subscription is None: | 327 | if subscription is None: |
|---|
| 328 | # we might just be using the parser without the context management | 328 | # we might just be using the parser without the context management |
|---|
| 329 | result = polite_request(str(uri)) | 329 | result = polite_request(str(uri)) |
|---|
| 330 | subscription = {'fake':1} | 330 | subscription = {'fake':1} |
|---|
| 331 | | 331 | |
|---|
| 332 | elif self._visit_true is not None: | 332 | elif self._visit_true is not None: |
|---|
| 333 | # we might always want to read the feed content (for debugging reasons) | 333 | # we might always want to read the feed content (for debugging reasons) |
|---|
| 334 | result = polite_request(str(uri)) | 334 | result = polite_request(str(uri)) |
|---|
| 335 | | 335 | |
|---|
| 336 | elif subscription[fbox.last_etag] is not None: | 336 | elif subscription[fbox.last_etag] is not None: |
|---|
| 337 | result = polite_request(str(uri),etag=str(subscription[fbox.last_etag])) | 337 | result = polite_request(str(uri),etag=str(subscription[fbox.last_etag])) |
|---|
| 338 | elif subscription[fbox.last_modified] is not None: | 338 | elif subscription[fbox.last_modified] is not None: |
|---|
| 339 | result = polite_request(str(uri),last_modified=str(subscription[fbox.last_modified])) | 339 | result = polite_request(str(uri),last_modified=str(subscription[fbox.last_modified])) |
|---|
| 340 | else: result = polite_request(str(uri)) | 340 | else: result = polite_request(str(uri)) |
|---|
| 341 | if result is None: | 341 | if result is None: |
|---|
| 342 | result = {'status':404} | 342 | result = {'status':404} |
|---|
| 343 | return result | 343 | return result |
|---|
| 344 | | 344 | |
|---|
| 345 | if result.has_key('status'): | 345 | if result.has_key('status'): |
|---|
| 346 | # this was a HTTP request | 346 | # this was a HTTP request |
|---|
| 347 | self.mention("received response: "+str(result['status'])) | 347 | self.mention("received response: "+str(result['status'])) |
|---|
| 348 | | 348 | |
|---|
| 349 | """Take actions about other kinds of HTTP statuses.(TODO)""" | 349 | """Take actions about other kinds of HTTP statuses.(TODO)""" |
|---|
| 350 | # handling different HTTP statuses. | 350 | # handling different HTTP statuses. |
|---|
| 351 | | 351 | |
|---|
| 352 | if subscription.has_key('fake'): | 352 | if subscription.has_key('fake'): |
|---|
| 353 | pass | 353 | pass |
|---|
| 354 | else: | 354 | else: |
|---|
| 355 | subscription[fbox.http_status] = str(result['status']) | 355 | subscription[fbox.http_status] = str(result['status']) |
|---|
| 356 | subscription[fbox.last_etag] = result['etag'] | 356 | subscription[fbox.last_etag] = result['etag'] |
|---|
| 357 | subscription[fbox.last_modified] = result['lastmodified'] | 357 | subscription[fbox.last_modified] = result['lastmodified'] |
|---|
| 358 | subscription[fbox.last_visited] = time.strftime("%Y%m%dT%H%M%SZ") | 358 | subscription[fbox.last_visited] = time.strftime("%Y%m%dT%H%M%SZ") |
|---|
| 359 | | 359 | |
|---|
| 360 | # a 'file:/' uri will only have result['data'] | 360 | # a 'file:/' uri will only have result['data'] |
|---|
| 361 | elif result['data'] is not None: | 361 | elif result['data'] is not None: |
|---|
| 362 | # pretend we have a positive HTTP status | 362 | # pretend we have a positive HTTP status |
|---|
| 363 | result['status'] = 200 | 363 | result['status'] = 200 |
|---|
| 364 | | 364 | |
|---|
| 365 | return result | 365 | return result |
|---|
| 366 | | 366 | |
|---|
| 367 | def subscriptions(self): | 367 | def subscriptions(self): |
|---|
| 368 | """Returns a list (Iterator type) of the URLs at which | 368 | """Returns a list (Iterator type) of the URLs at which |
|---|
| 369 | there is a feed that we are subscribed to (fbox:Feed type)""" | 369 | there is a feed that we are subscribed to (fbox:Feed type)""" |
|---|
| 370 | from rdfobj import fbox, rdf | 370 | from rdfobj import fbox, rdf |
|---|
| 371 | # workaround, as we want a list, not an Iterator which is sometimes empty | 371 | # workaround, as we want a list, not an Iterator which is sometimes empty |
|---|
| 372 | subs = self.model.search(rdf.type,fbox.Feed).list() | 372 | subs = self.model.search(rdf.type,fbox.Feed).list() |
|---|
| 373 | return subs | 373 | return subs |
|---|
| 374 | | 374 | |
|---|
| 375 | def subscription(self,uri): | 375 | def subscription(self,uri): |
|---|
| 376 | """Given a uri, returns the rdfobj which is the | 376 | """Given a uri, returns the rdfobj which is the |
|---|
| 377 | subscription it represents.""" | 377 | subscription it represents.""" |
|---|
| 378 | obj = self.model.fetch(uri) | 378 | obj = self.model.fetch(uri) |
|---|
| 379 | return obj | 379 | return obj |
|---|
| 380 | | 380 | |
|---|
| 381 | def items(self,uri,since=None,until=None): | 381 | def items(self,uri,since=None,until=None): |
|---|
| 382 | """Get items from a feed, optionally filtering by date. | 382 | """Get items from a feed, optionally filtering by date. |
|---|
| 383 | not completely implemented)""" | 383 | not completely implemented)""" |
|---|
| 384 | from rdfobj import fbox, dc, rss | 384 | from rdfobj import fbox, dc, rss |
|---|
| 385 | s = self.subscription(uri) | 385 | s = self.subscription(uri) |
|---|
| 386 | c = s[fbox.channel] | 386 | c = s[fbox.channel] |
|---|
| 387 | out = [] | 387 | out = [] |
|---|
| 388 | if since is not None: | 388 | if since is not None: |
|---|
| 389 | for i in c[rss.items]: | 389 | for i in c[rss.items]: |
|---|
| 390 | if i[dc.date] > since: | 390 | if i[dc.date] > since: |
|---|
| 391 | out.append(i) | 391 | out.append(i) |
|---|
| 392 | | 392 | |
|---|
| 393 | return c.rss_items | 393 | return c.rss_items |
|---|
| 394 | | 394 | |
|---|
| 395 | def subscribe(self,feed=None,format=None,interval=None): | 395 | def subscribe(self,feed=None,format=None,interval=None): |
|---|
| 396 | """subscribe() creates a subscription to a uri. | 396 | """subscribe() creates a subscription to a uri. |
|---|
| 397 | format] is either 'rss' or 'rdf', | 397 | format] is either 'rss' or 'rdf', |
|---|
| 398 | to be handled by feedparser in the read_rss() method | 398 | to be handled by feedparser in the read_rss() method |
|---|
| 399 | or by raptor in the read_rdf() method. | 399 | or by raptor in the read_rdf() method. |
|---|
| 400 | RDF is assumed if no format is specified. | 400 | RDF is assumed if no format is specified. |
|---|
| 401 | | 401 | |
|---|
| 402 | Interval is the maximum interval in minutes | 402 | Interval is the maximum interval in minutes |
|---|
| 403 | that a feed should be checked at. | 403 | that a feed should be checked at. |
|---|
| 404 | bbox sends polite HTTP requests so don't worry about | 404 | bbox sends polite HTTP requests so don't worry about |
|---|
| 405 | interval is value in minutes - defaults to 100 minutes.""" | 405 | interval is value in minutes - defaults to 100 minutes.""" |
|---|
| 406 | | 406 | |
|---|
| 407 | from rdfobj import fbox | 407 | from rdfobj import fbox |
|---|
| 408 | if feed is None: return | 408 | if feed is None: return |
|---|
| 409 | | 409 | |
|---|
| 410 | f = self.model.search(fbox.channel,feed).first() | 410 | f = self.model.search(fbox.channel,feed).first() |
|---|
| 411 | if f is not None: | 411 | if f is not None: |
|---|
| 412 | return | 412 | return |
|---|
| 413 | | 413 | |
|---|
| 414 | self.mention("subscribing to "+str(feed)) | 414 | self.mention("subscribing to "+str(feed)) |
|---|
| 415 | | 415 | |
|---|
| 416 | if format is None: | 416 | if format is None: |
|---|
| 417 | format = fbox.rdf | 417 | format = fbox.rdf |
|---|
| 418 | elif format == 'rss': | 418 | elif format == 'rss': |
|---|
| 419 | format = fbox.rss | 419 | format = fbox.rss |
|---|
| 420 | elif format == 'rdf': | 420 | elif format == 'rdf': |
|---|
| 421 | format = fbox.rdf | 421 | format = fbox.rdf |
|---|
| 422 | | 422 | |
|---|
| 423 | if interval is None: interval = str(100) | 423 | if interval is None: interval = str(100) |
|---|
| 424 | | 424 | |
|---|
| 425 | ff = self.model.create( fbox.Feed, uri=None ) | 425 | ff = self.model.create( fbox.Feed, uri=None ) |
|---|
| 426 | ff[fbox.channel] = str(feed) | 426 | ff[fbox.channel] = str(feed) |
|---|
| 427 | ff[fbox.format] = str(format) | 427 | ff[fbox.format] = str(format) |
|---|
| 428 | ff[fbox.interval] = interval | 428 | ff[fbox.interval] = interval |
|---|
| 429 | | 429 | |
|---|
| 430 | return ff | 430 | return ff |
|---|
| 431 | | 431 | |
|---|
| 432 | def update(self): | 432 | def update(self): |
|---|
| 433 | """Causes all the subscribed URLs to be visited for updates.""" | 433 | """Causes all the subscribed URLs to be visited for updates.""" |
|---|
| 434 | subs = self.subscriptions() | 434 | subs = self.subscriptions() |
|---|
| 435 | from rdfobj import fbox | 435 | from rdfobj import fbox |
|---|
| 436 | for s in subs: | 436 | for s in subs: |
|---|
| 437 | self.visit(s[fbox.channel]) | 437 | self.visit(s[fbox.channel]) |
|---|
| 438 | | 438 | |
|---|
| 439 | def visit(self,uri=None): | 439 | def visit(self,uri=None): |
|---|
| 440 | """Creates an anonymous object which records a visit | 440 | """Creates an anonymous object which records a visit |
|---|
| 441 | that we paid to a feed, including a counter of times visited. | 441 | that we paid to a feed, including a counter of times visited. |
|---|
| 442 | This object is used as a Redland context for all the | 442 | This object is used as a Redland context for all the |
|---|
| 443 | information collected from a feed during this visit.""" | 443 | information collected from a feed during this visit.""" |
|---|
| 444 | # redland had problems serialising models with bnode context uris | 444 | # redland had problems serialising models with bnode context uris |
|---|
| 445 | count = self.counter() | 445 | count = self.counter() |
|---|
| 446 | from rdfobj import fbox | 446 | from rdfobj import fbox |
|---|
| 447 | visit_uri = str(fbox.visit)+'/'+str(count) | 447 | visit_uri = str(fbox.visit)+'/'+str(count) |
|---|
| 448 | visit = self.model.create( fbox.Visit , visit_uri) | 448 | visit = self.model.create( fbox.Visit , visit_uri) |
|---|
| 449 | | 449 | |
|---|
| 450 | visit[fbox.source] = uri | 450 | visit[fbox.source] = uri |
|---|
| 451 | t = time.strftime("%Y%m%dT%H%M%SZ") | 451 | t = time.strftime("%Y%m%dT%H%M%SZ") |
|---|
| 452 | visit[fbox.timestamp] = t | 452 | visit[fbox.timestamp] = t |
|---|
| 453 | return RDF.Node(RDF.Uri(str(visit.uri()))) | 453 | return RDF.Node(RDF.Uri(str(visit.uri()))) |
|---|
| 454 | | 454 | |
|---|
| 455 | def user(self,token=None,nick=None,mbox=None): | 455 | def user(self,token=None,nick=None,mbox=None): |
|---|
| 456 | """Passed either a user's login token, mbox and name, | 456 | """Passed either a user's login token, mbox and name, |
|---|
| 457 | resolved to mutual exclusion in that order, | 457 | resolved to mutual exclusion in that order, |
|---|
| 458 | and returns any corresponding user / foaf:Person object. | 458 | and returns any corresponding user / foaf:Person object. |
|---|
| 459 | No security - handle this yourself elsewhere!""" | 459 | No security - handle this yourself elsewhere!""" |
|---|
| 460 | from rdfobj import foaf | 460 | from rdfobj import foaf |
|---|
| 461 | if token is not None: | 461 | if token is not None: |
|---|
| 462 | u = self.model.search(foaf.auth_token,token).first() | 462 | u = self.model.search(foaf.auth_token,token).first() |
|---|
| 463 | return u[foaf.alias] | 463 | return u[foaf.alias] |
|---|
| 464 | | 464 | |
|---|
| 465 | if mbox is not None: | 465 | if mbox is not None: |
|---|
| 466 | u = self.model.search(foaf.mbox,mbox).first() | 466 | u = self.model.search(foaf.mbox,mbox).first() |
|---|
| 467 | return u | 467 | return u |
|---|
| 468 | | 468 | |
|---|
| 469 | if nick is not None: | 469 | if nick is not None: |
|---|
| 470 | users = self.model.search(foaf.name,nick).list() | 470 | users = self.model.search(foaf.name,nick).list() |
|---|
| 471 | users.append( self.model.search(foaf.givenName,nick).list() ) | 471 | users.append( self.model.search(foaf.givenName,nick).list() ) |
|---|
| 472 | return users | 472 | return users |
|---|
| 473 | | 473 | |
|---|
| 474 | def add_user(self,nick=None,mbox=None,password=None): | 474 | def add_user(self,nick=None,mbox=None,password=None): |
|---|
| 475 | """ Create a new user foaf:Person""" | 475 | """ Create a new user foaf:Person""" |
|---|
| 476 | store = self.store | 476 | store = self.store |
|---|
| 477 | from rdfobj import foaf, wlan | 477 | from rdfobj import foaf, wlan |
|---|
| 478 | | 478 | |
|---|
| 479 | obj = self.model.create(foaf.Person,uri=uri) | 479 | obj = self.model.create(foaf.Person,uri=uri) |
|---|
| 480 | obj[foaf.mbox] = mbox | 480 | obj[foaf.mbox] = mbox |
|---|
| 481 | obj[foaf.nick] = nick | 481 | obj[foaf.nick] = nick |
|---|
| 482 | if page is not None: obj[foaf.homepage] = page | 482 | if page is not None: obj[foaf.homepage] = page |
|---|
| 483 | self.obj = obj | 483 | self.obj = obj |
|---|
| 484 | | 484 | |
|---|
| 485 | """ Create a kind of shadow user where we store the password and the logged-in token, so they won't get serialised accidentally along with the user. """ | 485 | """ Create a kind of shadow user where we store the password and the logged-in token, so they won't get serialised accidentally along with the user. """ |
|---|
| 486 | | 486 | |
|---|
| 487 | auth = store.create(foaf.AuthedPerson) | 487 | auth = store.create(foaf.AuthedPerson) |
|---|
| 488 | auth[foaf.password] = password | 488 | auth[foaf.password] = password |
|---|
| 489 | auth[foaf.nick] = nick | 489 | auth[foaf.nick] = nick |
|---|
| 490 | auth[foaf.alias] = obj | 490 | auth[foaf.alias] = obj |
|---|
| 491 | | 491 | |
|---|
| 492 | token = self.auth_token() | 492 | token = self.auth_token() |
|---|
| 493 | auth[foaf.auth_token] = token | 493 | auth[foaf.auth_token] = token |
|---|
| 494 | self.model.sync() | 494 | self.model.sync() |
|---|
| 495 | return token | 495 | return token |
|---|
| 496 | | 496 | |
|---|
| 497 | def auth_token(self): | 497 | def auth_token(self): |
|---|
| 498 | """Generate a random auth token.""" | 498 | """Generate a random auth token.""" |
|---|
| 499 | x = '' | 499 | x = '' |
|---|
| 500 | for n in range(0, 6): | 500 | for n in range(0, 6): |
|---|
| 501 | x = x + chr(65 + random.randint(0, 26)) | 501 | x = x + chr(65 + random.randint(0, 26)) |
|---|
| 502 | return x | 502 | return x |
|---|
| 503 | | 503 | |
|---|
| 504 | def visit_scheduled(self,sub): | 504 | def visit_scheduled(self,sub): |
|---|
| 505 | """Compare the last visited time, if that's applicable, | 505 | """Compare the last visited time, if that's applicable, |
|---|
| 506 | to the interval between events | 506 | to the interval between events |
|---|
| 507 | (rather than a schedule? perhaps we'll have to re-think this later.""" | 507 | (rather than a schedule? perhaps we'll have to re-think this later.""" |
|---|
| 508 | if self._visit_true is not None: | 508 | if self._visit_true is not None: |
|---|
| 509 | return 1 | 509 | return 1 |
|---|
| 510 | from rdfobj import fbox | 510 | from rdfobj import fbox |
|---|
| 511 | last = sub[fbox.last_visited] | 511 | last = sub[fbox.last_visited] |
|---|
| 512 | if last is None: | 512 | if last is None: |
|---|
| 513 | return 1 | 513 | return 1 |
|---|
| 514 | t = time.time() | 514 | t = time.time() |
|---|
| 515 | # convert last time simply from ical to epoch? | 515 | # convert last time simply from ical to epoch? |
|---|
| 516 | | 516 | |
|---|
| 517 | return 1 | 517 | return 1 |
|---|
| 518 | since = t - float(str(last)) | 518 | since = t - float(str(last)) |
|---|
| 519 | | 519 | |
|---|
| 520 | interval = sub[fbox.interval] | 520 | interval = sub[fbox.interval] |
|---|
| 521 | if interval is None: | 521 | if interval is None: |
|---|
| 522 | sub[fbox.interval] = str(100) | 522 | sub[fbox.interval] = str(100) |
|---|
| 523 | interval = sub[fbox.interval] | 523 | interval = sub[fbox.interval] |
|---|
| 524 | secs = int(str(interval))*60 | 524 | secs = int(str(interval))*60 |
|---|
| 525 | if since >= secs: | 525 | if since >= secs: |
|---|
| 526 | return 1 | 526 | return 1 |
|---|
| 527 | return None | 527 | return None |
|---|
| 528 | | 528 | |
|---|
| 529 | def counter(self): | 529 | def counter(self): |
|---|
|
|---|