| 68 | self.model.load(bbox.config.boot) | 66 | self.model.load(bbox.config.boot) |
|---|
| 69 | | 67 | |
|---|
| 70 | from rdfobj import fbox | 68 | from rdfobj import fbox |
|---|
| 71 | counter = self.model.fetch(fbox.Visit_Count) | 69 | counter = self.model.fetch(fbox.Visit_Count) |
|---|
| 72 | c = counter[fbox.count] | 70 | c = counter[fbox.count] |
|---|
| 73 | if c is None: c = 0 | 71 | if c is None: c = 0 |
|---|
| 74 | c = int(str(c))+1 | 72 | c = int(str(c))+1 |
|---|
| 75 | counter[fbox.count] = str(c) | 73 | counter[fbox.count] = str(c) |
|---|
| 76 | | 74 | |
|---|
| 77 | if counter is None: | 75 | if counter is None: |
|---|
| 78 | v = self.model.create(fbox.Visit_Count, uri=fbox.Visit_Count) | 76 | v = self.model.create(fbox.Visit_Count, uri=fbox.Visit_Count) |
|---|
| 79 | v[fbox.count] = 0 | 77 | v[fbox.count] = 0 |
|---|
| 80 | | 78 | |
|---|
| 81 | if spatial is not None: | 79 | if spatial is not None: |
|---|
| 82 | self.spatialStore = bbox.spatialStore.SpatialStore(database=spatial,model=self.model) | 80 | self.spatialStore = bbox.spatialStore.SpatialStore(database=spatial,model=self.model) |
|---|
| 83 | else: | 81 | else: |
|---|
| 84 | self.spatialStore = None | 82 | self.spatialStore = None |
|---|
| 85 | self.textindex = textindex | 83 | self.textindex = textindex |
|---|
| 86 | | 84 | |
|---|
| 87 | | 85 | |
|---|
| 88 | def mention(self,thought): | 86 | def mention(self,thought): |
|---|
| 89 | """If BBox is constructed with verbose=1, | 87 | """If BBox is constructed with verbose=1, |
|---|
| 90 | prints to STDOUT (currently) a record of what it's up to.""" | 88 | prints to STDOUT (currently) a record of what it's up to.""" |
|---|
| 91 | if self._verbose: | 89 | if self._verbose: |
|---|
| 92 | print(thought) | 90 | print(thought) |
|---|
| 93 | | 91 | |
|---|
| 94 | def read_subscriptions(self): | 92 | def read_subscriptions(self): |
|---|
| 95 | """read_subscriptions() picks up the latest RSS feed updates, | 93 | """read_subscriptions() picks up the latest RSS feed updates, |
|---|
| 96 | for every object of class fbox:Feed""" | 94 | for every object of class fbox:Feed""" |
|---|
| 97 | self.mention("checking subscriptions.") | 95 | self.mention("checking subscriptions.") |
|---|
| 98 | subs = self.subscriptions() | 96 | subs = self.subscriptions() |
|---|
| 99 | from rdfobj import fbox | 97 | from rdfobj import fbox |
|---|
| 100 | for s in subs: | 98 | for s in subs: |
|---|
| 101 | self.mention("reading "+str(s[fbox.channel])) | 99 | self.mention("reading "+str(s[fbox.channel])) |
|---|
| 102 | format = s[fbox.format].uri() | 100 | format = s[fbox.format].uri() |
|---|
| 103 | c = s[fbox.channel] | 101 | c = s[fbox.channel] |
|---|
| 104 | | 102 | |
|---|
| 105 | # see if we're actually due a visit | 103 | # see if we're actually due a visit |
|---|
| 106 | due = self.visit_scheduled(s) | 104 | due = self.visit_scheduled(s) |
|---|
| 107 | if due is None: | 105 | if due is None: |
|---|
| 108 | print "nothing due to look at!" | 106 | print "nothing due to look at!" |
|---|
| 109 | subs.next() | 107 | subs.next() |
|---|
| 110 | else: | 108 | else: |
|---|
| 111 | if format == fbox.rss: | 109 | if format == fbox.rss: |
|---|
| 112 | self.read_rss(c.uri(),subscription=s) | 110 | self.read_rss(c.uri(),subscription=s) |
|---|
| 113 | elif format == fbox.rdf: | 111 | elif format == fbox.rdf: |
|---|
| 114 | self.read_rdf(s[fbox.channel].uri(),subscription=s) | 112 | self.read_rdf(s[fbox.channel].uri(),subscription=s) |
|---|
| 115 | | 113 | |
|---|
| 116 | def read_rss(self,uri,context=None,subscription=None,xml=None,properties=None): | 114 | def read_rss(self,uri,context=None,subscription=None,xml=None,properties=None): |
|---|
| 117 | """Read updates from an RSS feed, | 115 | """Read updates from an RSS feed, |
|---|
| 118 | and construct a local copy of new objects found. | 116 | and construct a local copy of new objects found. |
|---|
| 119 | If a subscription object is passed in, | 117 | If a subscription object is passed in, |
|---|
| 120 | update it with HTTP status information | 118 | update it with HTTP status information |
|---|
| 121 | If spatial things are found, add them to the spatial index. | 119 | If spatial things are found, add them to the spatial index. |
|---|
| 122 | Returns a list of objects that were found at this URL | 120 | Returns a list of objects that were found at this URL |
|---|
| 123 | | 121 | |
|---|
| 124 | 'properties' is a dictionary of properties which, | 122 | 'properties' is a dictionary of properties which, |
|---|
| 125 | if found on an individual rss item, should be mapped to | 123 | if found on an individual rss item, should be mapped to |
|---|
| 126 | an rdf property on the outcoming object. | 124 | an rdf property on the outcoming object. |
|---|
| 127 | e.g. 'media_content':rss.link | 125 | e.g. 'media_content':rss.link |
|---|
| 128 | """ | 126 | """ |
|---|
| 129 | | 127 | |
|---|
| 130 | | 128 | |
|---|
| 131 | from rdfobj import rss, rdf, fbox, ical, dc, foaf, geo | 129 | from rdfobj import rss, rdf, fbox, ical, dc, foaf, geo |
|---|
| 132 | #if subscription is None: subscription = {} | 130 | #if subscription is None: subscription = {} |
|---|
| 133 | result = self.politely_get_uri(uri,subscription=subscription) | 131 | result = self.politely_get_uri(uri,subscription=subscription) |
|---|
| 134 | | 132 | |
|---|
| 135 | channel = self.model.fetch(uri) | 133 | channel = self.model.fetch(uri) |
|---|
| 136 | """If we got a feed object back from the request, then create a | 134 | """If we got a feed object back from the request, then create a |
|---|
| 137 | context for this visit to the feed, and store the entries that we | 135 | context for this visit to the feed, and store the entries that we |
|---|
| 138 | collected from it.""" | 136 | collected from it.""" |
|---|
| 139 | if self._visit_true: | 137 | if self._visit_true: |
|---|
| 140 | pass | 138 | pass |
|---|
| 141 | elif result['status'] != 200: | 139 | elif result['status'] != 200: |
|---|
| 142 | return [] | 140 | return [] |
|---|
| 143 | items = [] | 141 | items = [] |
|---|
| 144 | feed = feedparser.parse(result['data']) | 142 | feed = feedparser.parse(result['data']) |
|---|
| 145 | if feed.has_key('feed'): | 143 | if feed.has_key('feed'): |
|---|
| 146 | context = self.visit(uri) | 144 | context = self.visit(uri) |
|---|
| 147 | # existence of exact duplicates? | 145 | # existence of exact duplicates? |
|---|
| 148 | | 146 | |
|---|
| 149 | for e in feed.entries: | 147 | for e in feed.entries: |
|---|
| 150 | link = str(e.link) | 148 | link = str(e.link) |
|---|
| 151 | title = None | 149 | title = None |
|---|
| 152 | item = self.model.create( rss.item, uri=link, context = context ) | 150 | item = self.model.create( rss.item, uri=link, context = context ) |
|---|
| 153 | | 151 | |
|---|
| 154 | if e.has_key('summary'): item[rss.description] = str(e.summary) | 152 | if e.has_key('summary'): item[rss.description] = str(e.summary) |
|---|
| 155 | | 153 | |
|---|
| 156 | if e.has_key('content'): item[rss.description] = str(e.content) | 154 | if e.has_key('content'): item[rss.description] = str(e.content) |
|---|
| 157 | | 155 | |
|---|
| 158 | if e.has_key('title'): | 156 | if e.has_key('title'): |
|---|
| 159 | item[rss.title] = str(e.title) | 157 | item[rss.title] = str(e.title) |
|---|
| 160 | title = str(e.title) | 158 | title = str(e.title) |
|---|
| 161 | | 159 | |
|---|
| 162 | item[fbox.channel] = channel | 160 | item[fbox.channel] = channel |
|---|
| 163 | | 161 | |
|---|
| 164 | # d.entries[0].modified_parsed is common | 162 | # d.entries[0].modified_parsed is common |
|---|
| 165 | | 163 | |
|---|
| 166 | time_tuple = None | 164 | time_tuple = None |
|---|
| 167 | if e.has_key('modified_parsed'): | 165 | if e.has_key('modified_parsed'): |
|---|
| 168 | time_tuple = e.modified_parsed | 166 | time_tuple = e.modified_parsed |
|---|
| 169 | elif e.has_key('created_parsed'): | 167 | elif e.has_key('created_parsed'): |
|---|
| 170 | time_tuple = e.created_parsed | 168 | time_tuple = e.created_parsed |
|---|
| 171 | | 169 | |
|---|
| 172 | # item[ical.datetime] = some process with time_tuple and strftime | 170 | # item[ical.datetime] = some process with time_tuple and strftime |
|---|
| 173 | # d = datetime.datetime(time_tuple) | 171 | # d = datetime.datetime(time_tuple) |
|---|
| 174 | # ical_date = ical_datetime.datetime_to_string(d) | 172 | # ical_date = ical_datetime.datetime_to_string(d) |
|---|
| 175 | # print ical_date | 173 | # print ical_date |
|---|
| 176 | # item[ical.datetime] = ical_date | 174 | # item[ical.datetime] = ical_date |
|---|
| 177 | # not much use without a timestamp | 175 | # not much use without a timestamp |
|---|
| 178 | | 176 | |
|---|
| 179 | if time_tuple is None: | 177 | if time_tuple is None: |
|---|
| 180 | continue | 178 | continue |
|---|
| 181 | ical_enough = time.strftime("%Y%m%dT%H%M%SZ",time_tuple) | 179 | ical_enough = time.strftime("%Y%m%dT%H%M%SZ",time_tuple) |
|---|
| 182 | item[ical.datetime] = ical_enough | 180 | item[ical.datetime] = ical_enough |
|---|
| 183 | | 181 | |
|---|
| 184 | rdf_type = None | 182 | rdf_type = None |
|---|
| 185 | if e.has_key('rdf_type'): | 183 | if e.has_key('rdf_type'): |
|---|
| 186 | rdf_type = str(e['rdf_type']) | 184 | rdf_type = str(e['rdf_type']) |
|---|
| 187 | item[rdf.type] = rdf_type | 185 | item[rdf.type] = rdf_type |
|---|
| 188 | | 186 | |
|---|
| 189 | if e.has_key('geo_lat') and e.has_key('geo_long'): | 187 | if e.has_key('geo_lat') and e.has_key('geo_long'): |
|---|
| 190 | lat = str(e['geo_lat']) | 188 | lat = str(e['geo_lat']) |
|---|
| 191 | long = str(e['geo_long']) | 189 | long = str(e['geo_long']) |
|---|
| 192 | item[geo.lat] = lat | 190 | item[geo.lat] = lat |
|---|
| 193 | item[geo.long] = long | 191 | item[geo.long] = long |
|---|
| 194 | | 192 | |
|---|
| 195 | """Update the spatial index, if we have one.""" | 193 | """Update the spatial index, if we have one.""" |
|---|
| 196 | if self.spatialStore is not None: | 194 | if self.spatialStore is not None: |
|---|
| 197 | self.spatialStore.add_or_update_point(uri,type=rdf_type,name=title,x=long,y=lat) | 195 | self.spatialStore.add_or_update_point(uri,type=rdf_type,name=title,x=long,y=lat) |
|---|
| 198 | elif e.has_key('geo_line'): | 196 | elif e.has_key('geo_line'): |
|---|
| 199 | line = str(e['geo:line']) | 197 | line = str(e['geo:line']) |
|---|
| 200 | item[geo.line] = line | 198 | item[geo.line] = line |
|---|
| 201 | if self.spatialStore is not None: | 199 | if self.spatialStore is not None: |
|---|
| 202 | points = self.spatialStore.parse_points(line) | 200 | points = self.spatialStore.parse_points(line) |
|---|
| 203 | self.spatialStore.add_or_update_line(uri,points=points,type=rdf_type) | 201 | self.spatialStore.add_or_update_line(uri,points=points,type=rdf_type) |
|---|
| 204 | | 202 | |
|---|
| 205 | elif e.has_key('geo_polygon'): | 203 | elif e.has_key('geo_polygon'): |
|---|
| 206 | poly = str(e['geo:polygon']) | 204 | poly = str(e['geo:polygon']) |
|---|
| 207 | item[geo.polygon] = poly | 205 | item[geo.polygon] = poly |
|---|
| 208 | if self.spatialStore is not None: | 206 | if self.spatialStore is not None: |
|---|
| 209 | points = self.spatialStore.parse_points(poly) | 207 | points = self.spatialStore.parse_points(poly) |
|---|
| 210 | self.spatialStore.add_or_update_polygon(uri,points=points,type=rdf_type) | 208 | self.spatialStore.add_or_update_polygon(uri,points=points,type=rdf_type) |
|---|
| 211 | | 209 | |
|---|
| 212 | # add the optional rdf properties to new items | 210 | # add the optional rdf properties to new items |
|---|
| 213 | if properties is not None: | 211 | if properties is not None: |
|---|
| 214 | for k in properties.keys(): | 212 | for k in properties.keys(): |
|---|
| 215 | if e.has_key(k): | 213 | if e.has_key(k): |
|---|
| 216 | item[properties[k]] = e[k] | 214 | item[properties[k]] = e[k] |
|---|
| 217 | | 215 | |
|---|
| 218 | items.append(item) | 216 | items.append(item) |
|---|
| 219 | return items | 217 | return items |
|---|
| 220 | | 218 | |
|---|
| 221 | def read_rdf(self,uri,subscription=None,xml=None): | 219 | def read_rdf(self,uri,subscription=None,xml=None): |
|---|
| 222 | """Read updates from an RDF URL. | 220 | """Read updates from an RDF URL. |
|---|
| 223 | If a subscription object is passed in, | 221 | If a subscription object is passed in, |
|---|
| 224 | update it with HTTP status information | 222 | update it with HTTP status information |
|---|
| 225 | If spatial things are found, add them to the spatial index. | 223 | If spatial things are found, add them to the spatial index. |
|---|
| 226 | Returns a list of objects that were found at this URL.""" | 224 | Returns a list of objects that were found at this URL.""" |
|---|
| 227 | | 225 | |
|---|
| 228 | from rdfobj import geo, dc, rdf | 226 | from rdfobj import geo, dc, rdf |
|---|
| 229 | | 227 | |
|---|
| 230 | result = self.politely_get_uri(uri,subscription=subscription) | 228 | result = self.politely_get_uri(uri,subscription=subscription) |
|---|
| 231 | if self._visit_true: | 229 | if self._visit_true: |
|---|
| 232 | pass | 230 | pass |
|---|
| 233 | elif result['status'] != 200: | 231 | elif result['status'] != 200: |
|---|
| 234 | return | 232 | return |
|---|
| 235 | | 233 | |
|---|
| 236 | context = self.visit(uri) | 234 | context = self.visit(uri) |
|---|
| 237 | | 235 | |
|---|
| 238 | # we can't just use load() because we want the visit context, and to search for spatial things and index them while we're parsing... | 236 | # we can't just use load() because we want the visit context, and to search for spatial things and index them while we're parsing... |
|---|
| 239 | lats = longs = titles = types = {} | 237 | lats = longs = titles = types = {} |
|---|
| 240 | lines = shapes = [] | 238 | lines = shapes = [] |
|---|
| 241 | parser = RDF.Parser('raptor') | 239 | parser = RDF.Parser('raptor') |
|---|
| 242 | try: | 240 | try: |
|---|
| 243 | stream = parser.parse_as_stream(RDF.Uri(uri)) | 241 | stream = parser.parse_as_stream(RDF.Uri(uri)) |
|---|
| 244 | except: | 242 | except: |
|---|
| 245 | return [] | 243 | return [] |
|---|
| 246 | subjects = {} | 244 | subjects = {} |
|---|
| 247 | if stream: | 245 | if stream: |
|---|
| 248 | while not stream.end(): | 246 | while not stream.end(): |
|---|
| 249 | s = stream.current() | 247 | s = stream.current() |
|---|
| 250 | subjects[s.subject] = 1 | 248 | subjects[s.subject] = 1 |
|---|
| 251 | # check for statement existence | 249 | # check for statement existence |
|---|
| 252 | exists = None | 250 | exists = None |
|---|
| 253 | if self.model.model.contains_statement(s) != 0: | 251 | if self.model.model.contains_statement(s) != 0: |
|---|
| 254 | self.model.model.add_statement(s,context) | 252 | self.model.model.add_statement(s,context) |
|---|
| 255 | # pls don't blame me, i just want to get something working fast | 253 | # pls don't blame me, i just want to get something working fast |
|---|
| 256 | if self.spatialStore is not None: | 254 | if self.spatialStore is not None: |
|---|
| 257 | if s.predicate == RDF.Node(uri_string=str(geo.lat)): | 255 | if s.predicate == RDF.Node(uri_string=str(geo.lat)): |
|---|
| 258 | lats[str(s.subject)] = str(s.object) | 256 | lats[str(s.subject)] = str(s.object) |
|---|
| 259 | | 257 | |
|---|
| 260 | elif s.predicate == RDF.Node(uri_string=str(geo.long)): | 258 | elif s.predicate == RDF.Node(uri_string=str(geo.long)): |
|---|
| 261 | longs[str(s.subject)] = str(s.object) | 259 | longs[str(s.subject)] = str(s.object) |
|---|
| 262 | | 260 | |
|---|
| 263 | elif s.predicate == RDF.Node(uri_string=str(geo.line)): | 261 | elif s.predicate == RDF.Node(uri_string=str(geo.line)): |
|---|
| 264 | lines.append( ( str(s.subject),str(s.object) ) ) | 262 | lines.append( ( str(s.subject),str(s.object) ) ) |
|---|
| 265 | | 263 | |
|---|
| 266 | elif s.predicate == RDF.Node(uri_string=str(geo.polygon)): | 264 | elif s.predicate == RDF.Node(uri_string=str(geo.polygon)): |
|---|
| 267 | shapes.append( ( str(s.subject),str(s.object) ) ) | 265 | shapes.append( ( str(s.subject),str(s.object) ) ) |
|---|
| 268 | | 266 | |
|---|
| 269 | elif s.predicate == RDF.Node(uri_string=str(dc.title)): | 267 | elif s.predicate == RDF.Node(uri_string=str(dc.title)): |
|---|
| 270 | titles[str(s.subject)] = str(s.object) | 268 | titles[str(s.subject)] = str(s.object) |
|---|
| 271 | | 269 | |
|---|
| 272 | elif s.predicate == RDF.Node(uri_string=str(rdf.type)): | 270 | elif s.predicate == RDF.Node(uri_string=str(rdf.type)): |
|---|
| 273 | types[str(s.subject)] = str(s.object) | 271 | types[str(s.subject)] = str(s.object) |
|---|
| 274 | | 272 | |
|---|
| 275 | | 273 | |
|---|
| 276 | stream.next() | 274 | stream.next() |
|---|
| 277 | objects = [] | 275 | objects = [] |
|---|
| 278 | for s in subjects.keys(): | 276 | for s in subjects.keys(): |
|---|
| 279 | objects.append(self.model.fetch(s)) | 277 | objects.append(self.model.fetch(s)) |
|---|
| 280 | | 278 | |
|---|
| 281 | if self.spatialStore is not None: | 279 | if self.spatialStore is not None: |
|---|
| 282 | for k in lats.keys(): | 280 | for k in lats.keys(): |
|---|
| 283 | lat = lats[k] | 281 | lat = lats[k] |
|---|
| 284 | long = longs[k] | 282 | long = longs[k] |
|---|
| 285 | title = None | 283 | title = None |
|---|
| 286 | type = None | 284 | type = None |
|---|
| 287 | if titles.has_key(k): | 285 | if titles.has_key(k): |
|---|
| 288 | title = titles[k] | 286 | title = titles[k] |
|---|
| 289 | if types.has_key(k): | 287 | if types.has_key(k): |
|---|
| 290 | type = types[k] | 288 | type = types[k] |
|---|
| 291 | self.spatialStore.add_or_update_point(uri,type=type,name=title,x=long,y=lat) | 289 | self.spatialStore.add_or_update_point(uri,type=type,name=title,x=long,y=lat) |
|---|
| 292 | for l in lines: | 290 | for l in lines: |
|---|
| 293 | type = types[l[0]] | 291 | type = types[l[0]] |
|---|
| 294 | points = self.spatialStore.parse_points(l[1]) | 292 | points = self.spatialStore.parse_points(l[1]) |
|---|
| 295 | self.spatialStore.add_or_update_line(l[0],points=points,type=type) | 293 | self.spatialStore.add_or_update_line(l[0],points=points,type=type) |
|---|
| 296 | for l in shapes: | 294 | for l in shapes: |
|---|
| 297 | type = types[l[0]] | 295 | type = types[l[0]] |
|---|
| 298 | points = self.spatialStore.parse_points(l[1]) | 296 | points = self.spatialStore.parse_points(l[1]) |
|---|
| 299 | self.spatialStore.add_or_update_polygon(l[0],points=points,type=type) | 297 | self.spatialStore.add_or_update_polygon(l[0],points=points,type=type) |
|---|
| 300 | | 298 | |
|---|
| 301 | if self.textindex is not None: | 299 | if self.textindex is not None: |
|---|
| 302 | for o in objects: | 300 | for o in objects: |
|---|
| 303 | schema = o.rdf_type | 301 | schema = o.rdf_type |
|---|
| 304 | if schema is not None: | 302 | if schema is not None: |
|---|
| 305 | self.textindex.text_index(schema,o) | 303 | self.textindex.text_index(schema,o) |
|---|
| 306 | | 304 | |
|---|
| 307 | return objects | 305 | return objects |
|---|
| 308 | | 306 | |
|---|
| 309 | def politely_get_uri(self,uri,subscription=None): | 307 | def politely_get_uri(self,uri,subscription=None): |
|---|
| 310 | """Request a copy of the document at a url, | 308 | """Request a copy of the document at a url, |
|---|
| 311 | first checking that it has changed since what we record as | 309 | first checking that it has changed since what we record as |
|---|
| 312 | last-modified and the last etag that we have for it. | 310 | last-modified and the last etag that we have for it. |
|---|
| 313 | | 311 | |
|---|
| 314 | If a 'subscription' object is passed in, it gets | 312 | If a 'subscription' object is passed in, it gets |
|---|
| 315 | a new set of properties depending on the HTTP responses. | 313 | a new set of properties depending on the HTTP responses. |
|---|
| 316 | | 314 | |
|---|
| 317 | subscription[fbox.http_status] = str(result['status']) | 315 | subscription[fbox.http_status] = str(result['status']) |
|---|
| 318 | subscription[fbox.last_etag] = result['etag'] | 316 | subscription[fbox.last_etag] = result['etag'] |
|---|
| 319 | subscription[fbox.last_modified] = result['lastmodified'] | 317 | subscription[fbox.last_modified] = result['lastmodified'] |
|---|
| 320 | subscription[fbox.last_visited] = time.strftime("%Y%m%dT%H%M%SZ") | 318 | subscription[fbox.last_visited] = time.strftime("%Y%m%dT%H%M%SZ") |
|---|
| 321 | """ | 319 | """ |
|---|
| 322 | | 320 | |
|---|
| 323 | # we should deal with etag/last-mod politely here too @@TODO | 321 | # we should deal with etag/last-mod politely here too @@TODO |
|---|
| 324 | #visit = self.visit(uri) | 322 | #visit = self.visit(uri) |
|---|
| 325 | result = None | 323 | result = None |
|---|
| 326 | from rdfobj import fbox | 324 | from rdfobj import fbox |
|---|
| 327 | if subscription is None: | 325 | if subscription is None: |
|---|
| 328 | # we might just be using the parser without the context management | 326 | # we might just be using the parser without the context management |
|---|
| 329 | result = polite_request(str(uri)) | 327 | result = polite_request(str(uri)) |
|---|
| 330 | subscription = {'fake':1} | 328 | subscription = {'fake':1} |
|---|
| 331 | | 329 | |
|---|
| 332 | elif self._visit_true is not None: | 330 | elif self._visit_true is not None: |
|---|
| 333 | # we might always want to read the feed content (for debugging reasons) | 331 | # we might always want to read the feed content (for debugging reasons) |
|---|
| 334 | result = polite_request(str(uri)) | 332 | result = polite_request(str(uri)) |
|---|
| 335 | | 333 | |
|---|
| 336 | elif subscription[fbox.last_etag] is not None: | 334 | elif subscription[fbox.last_etag] is not None: |
|---|
| 337 | result = polite_request(str(uri),etag=str(subscription[fbox.last_etag])) | 335 | result = polite_request(str(uri),etag=str(subscription[fbox.last_etag])) |
|---|
| 338 | elif subscription[fbox.last_modified] is not None: | 336 | elif subscription[fbox.last_modified] is not None: |
|---|
| 339 | result = polite_request(str(uri),last_modified=str(subscription[fbox.last_modified])) | 337 | result = polite_request(str(uri),last_modified=str(subscription[fbox.last_modified])) |
|---|
| 340 | else: result = polite_request(str(uri)) | 338 | else: result = polite_request(str(uri)) |
|---|
| 341 | if result is None: | 339 | if result is None: |
|---|
| 342 | result = {'status':404} | 340 | result = {'status':404} |
|---|
| 343 | return result | 341 | return result |
|---|
| 344 | | 342 | |
|---|
| 345 | if result.has_key('status'): | 343 | if result.has_key('status'): |
|---|
| 346 | # this was a HTTP request | 344 | # this was a HTTP request |
|---|
| 347 | self.mention("received response: "+str(result['status'])) | 345 | self.mention("received response: "+str(result['status'])) |
|---|
| 348 | | 346 | |
|---|
| 349 | """Take actions about other kinds of HTTP statuses.(TODO)""" | 347 | """Take actions about other kinds of HTTP statuses.(TODO)""" |
|---|
| 350 | # handling different HTTP statuses. | 348 | # handling different HTTP statuses. |
|---|
| 351 | | 349 | |
|---|
| 352 | if subscription.has_key('fake'): | 350 | if subscription.has_key('fake'): |
|---|
| 353 | pass | 351 | pass |
|---|
| 354 | else: | 352 | else: |
|---|
| 355 | subscription[fbox.http_status] = str(result['status']) | 353 | subscription[fbox.http_status] = str(result['status']) |
|---|
| 356 | subscription[fbox.last_etag] = result['etag'] | 354 | subscription[fbox.last_etag] = result['etag'] |
|---|
| 357 | subscription[fbox.last_modified] = result['lastmodified'] | 355 | subscription[fbox.last_modified] = result['lastmodified'] |
|---|
| 358 | subscription[fbox.last_visited] = time.strftime("%Y%m%dT%H%M%SZ") | 356 | subscription[fbox.last_visited] = time.strftime("%Y%m%dT%H%M%SZ") |
|---|
| 359 | | 357 | |
|---|
| 360 | # a 'file:/' uri will only have result['data'] | 358 | # a 'file:/' uri will only have result['data'] |
|---|
| 361 | elif result['data'] is not None: | 359 | elif result['data'] is not None: |
|---|
| 362 | # pretend we have a positive HTTP status | 360 | # pretend we have a positive HTTP status |
|---|
| 363 | result['status'] = 200 | 361 | result['status'] = 200 |
|---|
| 364 | | 362 | |
|---|
| 365 | return result | 363 | return result |
|---|
| 366 | | 364 | |
|---|
| 367 | def subscriptions(self): | 365 | def subscriptions(self): |
|---|
| 368 | """Returns a list (Iterator type) of the URLs at which | 366 | """Returns a list (Iterator type) of the URLs at which |
|---|
| 369 | there is a feed that we are subscribed to (fbox:Feed type)""" | 367 | there is a feed that we are subscribed to (fbox:Feed type)""" |
|---|
| 370 | from rdfobj import fbox, rdf | 368 | from rdfobj import fbox, rdf |
|---|
| 371 | # workaround, as we want a list, not an Iterator which is sometimes empty | 369 | # workaround, as we want a list, not an Iterator which is sometimes empty |
|---|
| 372 | subs = self.model.search(rdf.type,fbox.Feed).list() | 370 | subs = self.model.search(rdf.type,fbox.Feed).list() |
|---|
| 373 | return subs | 371 | return subs |
|---|
| 374 | | 372 | |
|---|
| 375 | def subscription(self,uri): | 373 | def subscription(self,uri): |
|---|
| 376 | """Given a uri, returns the rdfobj which is the | 374 | """Given a uri, returns the rdfobj which is the |
|---|
| 377 | subscription it represents.""" | 375 | subscription it represents.""" |
|---|
| 378 | obj = self.model.fetch(uri) | 376 | obj = self.model.fetch(uri) |
|---|
| 379 | return obj | 377 | return obj |
|---|
| 380 | | 378 | |
|---|
| 381 | def items(self,uri,since=None,until=None): | 379 | def items(self,uri,since=None,until=None): |
|---|
| 382 | """Get items from a feed, optionally filtering by date. | 380 | """Get items from a feed, optionally filtering by date. |
|---|
| 383 | not completely implemented)""" | 381 | not completely implemented)""" |
|---|
| 384 | from rdfobj import fbox, dc, rss | 382 | from rdfobj import fbox, dc, rss |
|---|
| 385 | s = self.subscription(uri) | 383 | s = self.subscription(uri) |
|---|
| 386 | c = s[fbox.channel] | 384 | c = s[fbox.channel] |
|---|
| 387 | out = [] | 385 | out = [] |
|---|
| 388 | if since is not None: | 386 | if since is not None: |
|---|
| 389 | for i in c[rss.items]: | 387 | for i in c[rss.items]: |
|---|
| 390 | if i[dc.date] > since: | 388 | if i[dc.date] > since: |
|---|
| 391 | out.append(i) | 389 | out.append(i) |
|---|
| 392 | | 390 | |
|---|
| 393 | return c.rss_items | 391 | return c.rss_items |
|---|
| 394 | | 392 | |
|---|
| 395 | def subscribe(self,feed=None,format=None,interval=None): | 393 | def subscribe(self,feed=None,format=None,interval=None): |
|---|
| 396 | """subscribe() creates a subscription to a uri. | 394 | """subscribe() creates a subscription to a uri. |
|---|
| 397 | format] is either 'rss' or 'rdf', | 395 | format] is either 'rss' or 'rdf', |
|---|
| 398 | to be handled by feedparser in the read_rss() method | 396 | to be handled by feedparser in the read_rss() method |
|---|
| 399 | or by raptor in the read_rdf() method. | 397 | or by raptor in the read_rdf() method. |
|---|
| 400 | RDF is assumed if no format is specified. | 398 | RDF is assumed if no format is specified. |
|---|
| 401 | | 399 | |
|---|
| 402 | Interval is the maximum interval in minutes | 400 | Interval is the maximum interval in minutes |
|---|
| 403 | that a feed should be checked at. | 401 | that a feed should be checked at. |
|---|
| 404 | bbox sends polite HTTP requests so don't worry about | 402 | bbox sends polite HTTP requests so don't worry about |
|---|
| 405 | interval is value in minutes - defaults to 100 minutes.""" | 403 | interval is value in minutes - defaults to 100 minutes.""" |
|---|
| 406 | | 404 | |
|---|
| 407 | from rdfobj import fbox | 405 | from rdfobj import fbox |
|---|
| 408 | if feed is None: return | 406 | if feed is None: return |
|---|
| 409 | | 407 | |
|---|
| 410 | f = self.model.search(fbox.channel,feed).first() | 408 | f = self.model.search(fbox.channel,feed).first() |
|---|
| 411 | if f is not None: | 409 | if f is not None: |
|---|
| 412 | return | 410 | return |
|---|
| 413 | | 411 | |
|---|
| 414 | self.mention("subscribing to "+str(feed)) | 412 | self.mention("subscribing to "+str(feed)) |
|---|
| 415 | | 413 | |
|---|
| 416 | if format is None: | 414 | if format is None: |
|---|
| 417 | format = fbox.rdf | 415 | format = fbox.rdf |
|---|
| 418 | elif format == 'rss': | 416 | elif format == 'rss': |
|---|
| 419 | format = fbox.rss | 417 | format = fbox.rss |
|---|
| 420 | elif format == 'rdf': | 418 | elif format == 'rdf': |
|---|
| 421 | format = fbox.rdf | 419 | format = fbox.rdf |
|---|
| 422 | | 420 | |
|---|
| 423 | if interval is None: interval = str(100) | 421 | if interval is None: interval = str(100) |
|---|
| 424 | | 422 | |
|---|
| 425 | ff = self.model.create( fbox.Feed, uri=None ) | 423 | ff = self.model.create( fbox.Feed, uri=None ) |
|---|
| 426 | ff[fbox.channel] = str(feed) | 424 | ff[fbox.channel] = str(feed) |
|---|
| 427 | ff[fbox.format] = str(format) | 425 | ff[fbox.format] = str(format) |
|---|
| 428 | ff[fbox.interval] = interval | 426 | ff[fbox.interval] = interval |
|---|
| 429 | | 427 | |
|---|
| 430 | return ff | 428 | return ff |
|---|
| 431 | | 429 | |
|---|
| 432 | def update(self): | 430 | def update(self): |
|---|
| 433 | """Causes all the subscribed URLs to be visited for updates.""" | 431 | """Causes all the subscribed URLs to be visited for updates.""" |
|---|
| 434 | subs = self.subscriptions() | 432 | subs = self.subscriptions() |
|---|
| 435 | from rdfobj import fbox | 433 | from rdfobj import fbox |
|---|
| 436 | for s in subs: | 434 | for s in subs: |
|---|
| 437 | self.visit(s[fbox.channel]) | 435 | self.visit(s[fbox.channel]) |
|---|
| 438 | | 436 | |
|---|
| 439 | def visit(self,uri=None): | 437 | def visit(self,uri=None): |
|---|
| 440 | """Creates an anonymous object which records a visit | 438 | """Creates an anonymous object which records a visit |
|---|
| 441 | that we paid to a feed, including a counter of times visited. | 439 | that we paid to a feed, including a counter of times visited. |
|---|
| 442 | This object is used as a Redland context for all the | 440 | This object is used as a Redland context for all the |
|---|
| 443 | information collected from a feed during this visit.""" | 441 | information collected from a feed during this visit.""" |
|---|
| 444 | # redland had problems serialising models with bnode context uris | 442 | # redland had problems serialising models with bnode context uris |
|---|
| 445 | count = self.counter() | 443 | count = self.counter() |
|---|
| 446 | from rdfobj import fbox | 444 | from rdfobj import fbox |
|---|
| 447 | visit_uri = str(fbox.visit)+'/'+str(count) | 445 | visit_uri = str(fbox.visit)+'/'+str(count) |
|---|
| 448 | visit = self.model.create( fbox.Visit , visit_uri) | 446 | visit = self.model.create( fbox.Visit , visit_uri) |
|---|
| 449 | | 447 | |
|---|
| 450 | visit[fbox.source] = uri | 448 | visit[fbox.source] = uri |
|---|
| 451 | t = time.strftime("%Y%m%dT%H%M%SZ") | 449 | t = time.strftime("%Y%m%dT%H%M%SZ") |
|---|
| 452 | visit[fbox.timestamp] = t | 450 | visit[fbox.timestamp] = t |
|---|
| 453 | return RDF.Node(RDF.Uri(str(visit.uri()))) | 451 | return RDF.Node(RDF.Uri(str(visit.uri()))) |
|---|
| 454 | | 452 | |
|---|
| 455 | def user(self,token=None,nick=None,mbox=None): | 453 | def user(self,token=None,nick=None,mbox=None): |
|---|
| 456 | """Passed either a user's login token, mbox and name, | 454 | """Passed either a user's login token, mbox and name, |
|---|
| 457 | resolved to mutual exclusion in that order, | 455 | resolved to mutual exclusion in that order, |
|---|
| 458 | and returns any corresponding user / foaf:Person object. | 456 | and returns any corresponding user / foaf:Person object. |
|---|
| 459 | No security - handle this yourself elsewhere!""" | 457 | No security - handle this yourself elsewhere!""" |
|---|
| 460 | from rdfobj import foaf | 458 | from rdfobj import foaf |
|---|
| 461 | if token is not None: | 459 | if token is not None: |
|---|
| 462 | u = self.model.search(foaf.auth_token,token).first() | 460 | u = self.model.search(foaf.auth_token,token).first() |
|---|
| 463 | return u[foaf.alias] | 461 | return u[foaf.alias] |
|---|
| 464 | | 462 | |
|---|
| 465 | if mbox is not None: | 463 | if mbox is not None: |
|---|
| 466 | u = self.model.search(foaf.mbox,mbox).first() | 464 | u = self.model.search(foaf.mbox,mbox).first() |
|---|
| 467 | return u | 465 | return u |
|---|
| 468 | | 466 | |
|---|
| 469 | if nick is not None: | 467 | if nick is not None: |
|---|
| 470 | users = self.model.search(foaf.name,nick).list() | 468 | users = self.model.search(foaf.name,nick).list() |
|---|
| 471 | users.append( self.model.search(foaf.givenName,nick).list() ) | 469 | users.append( self.model.search(foaf.givenName,nick).list() ) |
|---|
| 472 | return users | 470 | return users |
|---|
| 473 | | 471 | |
|---|
| 474 | def add_user(self,nick=None,mbox=None,password=None): | 472 | def add_user(self,nick=None,mbox=None,password=None): |
|---|
| 475 | """ Create a new user foaf:Person""" | 473 | """ Create a new user foaf:Person""" |
|---|
| 476 | store = self.store | 474 | store = self.store |
|---|
| 477 | from rdfobj import foaf, wlan | 475 | from rdfobj import foaf, wlan |
|---|
| 478 | | 476 | |
|---|
| 479 | obj = self.model.create(foaf.Person,uri=uri) | 477 | obj = self.model.create(foaf.Person,uri=uri) |
|---|
| 480 | obj[foaf.mbox] = mbox | 478 | obj[foaf.mbox] = mbox |
|---|
| 481 | obj[foaf.nick] = nick | 479 | obj[foaf.nick] = nick |
|---|
| 482 | if page is not None: obj[foaf.homepage] = page | 480 | if page is not None: obj[foaf.homepage] = page |
|---|
| 483 | self.obj = obj | 481 | self.obj = obj |
|---|
| 484 | | 482 | |
|---|
| 485 | """ Create a kind of shadow user where we store the password and the logged-in token, so they won't get serialised accidentally along with the user. """ | 483 | """ Create a kind of shadow user where we store the password and the logged-in token, so they won't get serialised accidentally along with the user. """ |
|---|
| 486 | | 484 | |
|---|
| 487 | auth = store.create(foaf.AuthedPerson) | 485 | auth = store.create(foaf.AuthedPerson) |
|---|
| 488 | auth[foaf.password] = password | 486 | auth[foaf.password] = password |
|---|
| 489 | auth[foaf.nick] = nick | 487 | auth[foaf.nick] = nick |
|---|
| 490 | auth[foaf.alias] = obj | 488 | auth[foaf.alias] = obj |
|---|
| 491 | | 489 | |
|---|
| 492 | token = self.auth_token() | 490 | token = self.auth_token() |
|---|
| 493 | auth[foaf.auth_token] = token | 491 | auth[foaf.auth_token] = token |
|---|
| 494 | self.model.sync() | 492 | self.model.sync() |
|---|
| 495 | return token | 493 | return token |
|---|
| 496 | | 494 | |
|---|
| 497 | def auth_token(self): | 495 | def auth_token(self): |
|---|
| 498 | """Generate a random auth token.""" | 496 | """Generate a random auth token.""" |
|---|
| 499 | x = '' | 497 | x = '' |
|---|
| 500 | for n in range(0, 6): | 498 | for n in range(0, 6): |
|---|
| 501 | x = x + chr(65 + random.randint(0, 26)) | 499 | x = x + chr(65 + random.randint(0, 26)) |
|---|
| 502 | return x | 500 | return x |
|---|
| 503 | | 501 | |
|---|
| 504 | def visit_scheduled(self,sub): | 502 | def visit_scheduled(self,sub): |
|---|
| 505 | """Compare the last visited time, if that's applicable, | 503 | """Compare the last visited time, if that's applicable, |
|---|
| 506 | to the interval between events | 504 | to the interval between events |
|---|
| 507 | (rather than a schedule? perhaps we'll have to re-think this later.""" | 505 | (rather than a schedule? perhaps we'll have to re-think this later.""" |
|---|
| 508 | if self._visit_true is not None: | 506 | if self._visit_true is not None: |
|---|
| 509 | return 1 | 507 | return 1 |
|---|
| 510 | from rdfobj import fbox | 508 | from rdfobj import fbox |
|---|
| 511 | last = sub[fbox.last_visited] | 509 | last = sub[fbox.last_visited] |
|---|
| 512 | if last is None: | 510 | if last is None: |
|---|
| 513 | return 1 | 511 | return 1 |
|---|
| 514 | t = time.time() | 512 | t = time.time() |
|---|
| 515 | # convert last time simply from ical to epoch? | 513 | # convert last time simply from ical to epoch? |
|---|
| 516 | | 514 | |
|---|
| 517 | return 1 | 515 | return 1 |
|---|
| 518 | since = t - float(str(last)) | 516 | since = t - float(str(last)) |
|---|
| 519 | | 517 | |
|---|
| 520 | interval = sub[fbox.interval] | 518 | interval = sub[fbox.interval] |
|---|
| 521 | if interval is None: | 519 | if interval is None: |
|---|
| 522 | sub[fbox.interval] = str(100) | 520 | sub[fbox.interval] = str(100) |
|---|
| 523 | interval = sub[fbox.interval] | 521 | interval = sub[fbox.interval] |
|---|
| 524 | secs = int(str(interval))*60 | 522 | secs = int(str(interval))*60 |
|---|
| 525 | if since >= secs: | 523 | if since >= secs: |
|---|
| 526 | return 1 | 524 | return 1 |
|---|
| 527 | return None | 525 | return None |
|---|
| 528 | | 526 | |
|---|
| 529 | def counter(self): | 527 | def counter(self): |
|---|
| 530 | """Update the counter that's used to generate visit context URIs.""" | 528 | """Update the counter that's used to gener |
|---|