Changeset 188
- Timestamp:
- 08/16/08 23:06:46 (4 months ago)
- Files:
-
- trunk/shakespeare/controllers/site.py (modified) (1 diff)
- trunk/shakespeare/controllers/text.py (modified) (1 diff)
- trunk/shakespeare/format.py (modified) (1 diff)
- trunk/shakespeare/format_test.py (modified) (1 diff)
- trunk/shakespeare/model/dm.py (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/shakespeare/controllers/site.py
Revision 157 Revision 188 1 import logging 1 import logging 2 2 3 import genshi 3 import genshi 4 4 5 from shakespeare.lib.base import * 5 from shakespeare.lib.base import * 6 6 7 import shakespeare 7 import shakespeare 8 import shakespeare.index 8 import shakespeare.index 9 import shakespeare.format 9 import shakespeare.format 10 import shakespeare.concordance 10 import shakespeare.concordance 11 import shakespeare.model as model 11 import shakespeare.model as model 12 12 13 # import this after dm so that db connection is set 13 # import this after dm so that db connection is set 14 import annotater.store14 # import annotater.store 15 import annotater.marginalia15 # import annotater.marginalia 16 16 17 log = logging.getLogger(__name__) 17 log = logging.getLogger(__name__) 18 18 19 19 20 class SiteController(BaseController): 20 class SiteController(BaseController): 21 21 22 def index(self): 22 def index(self): 23 c.works_index = shakespeare.index.all 23 c.works_index = shakespeare.index.all 24 return render('index') 24 return render('index') 25 25 26 def guide(self): 26 def guide(self): 27 return render('guide') 27 return render('guide') 28 28 29 def concordance(self, word=None): 29 def concordance(self, word=None): 30 # TODO: support concordance/word 30 # TODO: support concordance/word 31 return self.concordance_index() 31 return self.concordance_index() 32 32 33 def concordance_index(self): 33 def concordance_index(self): 34 stats = shakespeare.concordance.Statistics() 34 stats = shakespeare.concordance.Statistics() 35 c.words = stats.keys() 35 c.words = stats.keys() 36 return render('concordance') 36 return render('concordance') 37 37 38 def concordance_word(self, word=None): 38 def concordance_word(self, word=None): 39 # TODO: sort by work etc 39 # TODO: sort by work etc 40 import shakespeare.textutils 40 import shakespeare.textutils 41 refs = [] 41 refs = [] 42 cc = shakespeare.concordance.Concordance() 42 cc = shakespeare.concordance.Concordance() 43 if word is not None: 43 if word is not None: 44 refs = list(cc.get(word)) 44 refs = list(cc.get(word)) 45 newrefs = [] 45 newrefs = [] 46 for ref in refs: 46 for ref in refs: 47 # we use the 'plain' format when building the concordance 47 # we use the 'plain' format when building the concordance 48 ff = ref.text.get_text() 48 ff = ref.text.get_text() 49 snippet = shakespeare.textutils.get_snippet(ff, ref.char_index) 49 snippet = shakespeare.textutils.get_snippet(ff, ref.char_index) 50 ref.snippet = snippet 50 ref.snippet = snippet 51 c.word = word 51 c.word = word 52 c.refs = refs 52 c.refs = refs 53 return render('concordance_by_word') 53 return render('concordance_by_word') 54 54 55 # 2008-04-26 (rgrp): none of these annotater related items 55 # 2008-04-26 (rgrp): none of these annotater related items 56 # seem to working properly 56 # seem to working properly 57 # think it is related to annotater so leaving this alone for time being 57 # think it is related to annotater so leaving this alone for time being 58 58 59 def marginalia(self): 59 def marginalia(self): 60 prefix = '/' + h.url_for('marginalia') 60 prefix = '/' + h.url_for('marginalia') 61 media_app = annotater.marginalia.MarginaliaMedia(prefix) 61 media_app = annotater.marginalia.MarginaliaMedia(prefix) 62 out = media_app(request.environ, self.start_response) 62 out = media_app(request.environ, self.start_response) 63 return out 63 return out 64 64 65 def annotation(self): 65 def annotation(self): 66 store = annotater.store.AnnotaterStore() 66 store = annotater.store.AnnotaterStore() 67 return store(request.environ, self.start_response) 67 return store(request.environ, self.start_response) 68 68 69 def view_annotate(self): 69 def view_annotate(self): 70 # only one name here ... 70 # only one name here ... 71 name = request.params.get('name') 71 name = request.params.get('name') 72 textobj = model.Material.byName(name) 72 textobj = model.Material.byName(name) 73 tfileobj = textobj.get_text() 73 tfileobj = textobj.get_text() 74 formatter = shakespeare.format.TextFormatterAnnotate() 74 formatter = shakespeare.format.TextFormatterAnnotate() 75 # not perfect in that we might have the application mounted somewhere 75 # not perfect in that we might have the application mounted somewhere 76 annotation_store_fqdn = wsgiref.util.application_uri(request.environ) 76 annotation_store_fqdn = wsgiref.util.application_uri(request.environ) 77 page_url = wsgiref.util.request_uri(request.environ) 77 page_url = wsgiref.util.request_uri(request.environ) 78 ttext = formatter.format(tfileobj, page_uri=page_url) 78 ttext = formatter.format(tfileobj, page_uri=page_url) 79 thtml = genshi.HTML(ttext) 79 thtml = genshi.HTML(ttext) 80 80 81 prefix = cfg.get('annotater', 'marginalia_prefix') 81 prefix = cfg.get('annotater', 'marginalia_prefix') 82 marginalia_media = annotater.marginalia.get_media_header(prefix, 82 marginalia_media = annotater.marginalia.get_media_header(prefix, 83 annotation_store_fqdn, 83 annotation_store_fqdn, 84 page_url) 84 page_url) 85 buttons = annotater.marginalia.get_buttons(page_url) 85 buttons = annotater.marginalia.get_buttons(page_url) 86 marginalia_media = genshi.HTML(marginalia_media) 86 marginalia_media = genshi.HTML(marginalia_media) 87 buttons = genshi.HTML(buttons) 87 buttons = genshi.HTML(buttons) 88 88 89 c.text_with_annotation=thtml 89 c.text_with_annotation=thtml 90 c.marginalia_media=marginalia_media 90 c.marginalia_media=marginalia_media 91 c.annotation_buttons=buttons 91 c.annotation_buttons=buttons 92 return render('view_annotate', strip_whitespace=False) 92 return render('view_annotate', strip_whitespace=False) 93 93 trunk/shakespeare/controllers/text.py
Revision 157 Revision 188 1 import logging 1 import logging 2 2 3 import genshi 3 import genshi 4 4 5 from shakespeare.lib.base import * 5 from shakespeare.lib.base import * 6 6 7 import shakespeare 7 import shakespeare 8 import shakespeare.index 8 import shakespeare.index 9 import shakespeare.format 9 import shakespeare.format 10 import shakespeare.concordance 10 import shakespeare.concordance 11 import shakespeare.model as model 11 import shakespeare.model as model 12 12 13 # import this after dm so that db connection is set 13 # import this after dm so that db connection is set 14 import annotater.store14 # import annotater.store 15 import annotater.marginalia15 # import annotater.marginalia 16 16 17 log = logging.getLogger(__name__) 17 log = logging.getLogger(__name__) 18 18 19 19 20 class TextController(BaseController): 20 class TextController(BaseController): 21 21 22 def index(self): 22 def index(self): 23 c.works_index = shakespeare.index.all 23 c.works_index = shakespeare.index.all 24 return render('text/index') 24 return render('text/index') 25 25 26 def view(self): 26 def view(self): 27 name = request.params.get('name', '') 27 name = request.params.get('name', '') 28 format = request.params.get('format', 'plain') 28 format = request.params.get('format', 'plain') 29 if format == 'annotate': 29 if format == 'annotate': 30 return self.view_annotate(name) 30 return self.view_annotate(name) 31 namelist = name.split() 31 namelist = name.split() 32 numtexts = len(namelist) 32 numtexts = len(namelist) 33 textlist = [model.Material.byName(tname) for tname in namelist] 33 textlist = [model.Material.byName(tname) for tname in namelist] 34 # special case (only return the first text) 34 # special case (only return the first text) 35 if format == 'raw': 35 if format == 'raw': 36 result = textlist[0].get_text().read() 36 result = textlist[0].get_text().read() 37 status = '200 OK' 37 status = '200 OK' 38 response.headers['Content-Type'] = 'text/plain' 38 response.headers['Content-Type'] = 'text/plain' 39 return result 39 return result 40 texts = [] 40 texts = [] 41 for item in textlist: 41 for item in textlist: 42 tfileobj = item.get_text() 42 tfileobj = item.get_text() 43 ttext = shakespeare.format.format_text(tfileobj, format) 43 ttext = shakespeare.format.format_text(tfileobj, format) 44 thtml = genshi.HTML(ttext) 44 thtml = genshi.HTML(ttext) 45 texts.append(thtml) 45 texts.append(thtml) 46 # would have assumed this would be 100.0/numtexts but for some reason 46 # would have assumed this would be 100.0/numtexts but for some reason 47 # you need to allow more room (maybe because of the scrollbars?) 47 # you need to allow more room (maybe because of the scrollbars?) 48 # result is not consistent across browsers ... 48 # result is not consistent across browsers ... 49 c.frame_width = 100.0/numtexts - 4.0 49 c.frame_width = 100.0/numtexts - 4.0 50 c.texts = texts 50 c.texts = texts 51 # set to not strip whitespace as o/w whitespace in pre tag gets removed 51 # set to not strip whitespace as o/w whitespace in pre tag gets removed 52 return render('text/view', strip_whitespace=False) 52 return render('text/view', strip_whitespace=False) 53 53 trunk/shakespeare/format.py
Revision 119 Revision 188 1 """ 1 """ 2 Format texts in a variety of ways 2 Format texts in a variety of ways 3 """ 3 """ 4 4 5 def format_text(fileobj, format): 5 def format_text(fileobj, format): 6 """Format a provided text in a variety of ways. 6 """Format a provided text in a variety of ways. 7 7 8 @format: the name specifying the format to use 8 @format: the name specifying the format to use 9 """ 9 """ 10 formatter = None 10 formatter = None 11 if format == 'plain': 11 if format == 'plain': 12 formatter = TextFormatterPlain() 12 formatter = TextFormatterPlain() 13 elif format == 'lineno': 13 elif format == 'lineno': 14 formatter = TextFormatterLineno() 14 formatter = TextFormatterLineno() 15 elif format == 'annotate': 15 elif format == 'annotate': 16 formatter = TextFormatterAnnotate() 16 formatter = TextFormatterAnnotate() 17 else: 17 else: 18 raise ValueError('Unknown format: %s' % format) 18 raise ValueError('Unknown format: %s' % format) 19 return formatter.format(fileobj) 19 return formatter.format(fileobj) 20 20 21 21 22 class TextFormatter(object): 22 class TextFormatter(object): 23 """Abstract base class for formatters. 23 """Abstract base class for formatters. 24 """ 24 """ 25 25 26 def format(self, file): 26 def format(self, file): 27 """Format the supplied text. 27 """Format the supplied text. 28 28 29 @file: file-like object containing a text in plain txt with utf-8 29 @file: file-like object containing a text in plain txt with utf-8 30 encoding 30 encoding 31 31 32 @return a string in unicode format with utf-8 encoding 32 @return a string in unicode format with utf-8 encoding 33 """ 33 """ 34 raise NotImplementedError() 34 raise NotImplementedError() 35 35 36 def escape_chars(self, text): 36 def escape_chars(self, text): 37 return text.replace('&', '&').replace('<', '<') 37 return text.replace('&', '&').replace('<', '<') 38 38 39 class TextFormatterPlain(TextFormatter): 39 class TextFormatterPlain(TextFormatter): 40 """Format the text as plain text (in an html <pre> tag). 40 """Format the text as plain text (in an html <pre> tag). 41 """ 41 """ 42 42 43 def format(self, file): 43 def format(self, file): 44 self.file = file 44 self.file = file 45 out = unicode(self.file.read(), 'utf-8') 45 out = unicode(self.file.read(), 'utf-8') 46 out = self.escape_chars(out) 46 out = self.escape_chars(out) 47 out = \ 47 out = \ 48 u''' 48 u''' 49 <pre> 49 <pre> 50 %s 50 %s 51 </pre>''' % out 51 </pre>''' % out 52 return out 52 return out 53 53 54 class TextFormatterLineno(TextFormatter): 54 class TextFormatterLineno(TextFormatter): 55 """Format the text to have line numbers. 55 """Format the text to have line numbers. 56 """ 56 """ 57 57 58 def format(self, file): 58 def format(self, file): 59 self.file = file 59 self.file = file 60 result = '' 60 result = '' 61 count = 0 61 count = 0 62 for line in self.file.readlines(): 62 for line in self.file.readlines(): 63 tlineno = unicode(count).ljust(4) # assume line no < 10000 63 tlineno = unicode(count).ljust(4) # assume line no < 10000 64 tline = unicode(line, 'utf-8').rstrip() 64 tline = unicode(line, 'utf-8').rstrip() 65 tline = self.escape_chars(tline) 65 tline = self.escape_chars(tline) 66 result += u'<pre id="%s">%s %s</pre>\n' % (count, tlineno, tline) 66 result += u'<pre id="%s">%s %s</pre>\n' % (count, tlineno, tline) 67 count += 1 67 count += 1 68 return result 68 return result 69 69 70 70 71 import annotater.marginalia72 class TextFormatterAnnotate(TextFormatter):73 """Format the text in a manner suitable for marginalia annotation.74 """75 76 def format(self, file, **kwargs):77 self.file = file78 # todo chunking79 line_numberer = TextFormatterLineno()80 text_with_linenos = line_numberer.format(self.file)81 values = {82 'content' : text_with_linenos,83 'id' : 'm0',84 }85 for key in kwargs:86 values[key] = kwargs[key]87 result = annotater.marginalia.format_entry(**values)88 return result89 trunk/shakespeare/format_test.py
Revision 119 Revision 188 1 import StringIO 1 import StringIO 2 import shakespeare.format 2 import shakespeare.format 3 3 4 4 5 starttext = unicode('''Blah \xc3\xa6 5 starttext = unicode('''Blah \xc3\xa6 6 blah & blah''', 'utf-8') 6 blah & blah''', 'utf-8') 7 7 8 sometext = starttext.replace('&', '&') 8 sometext = starttext.replace('&', '&') 9 9 10 class TestTextFormatter: 10 class TestTextFormatter: 11 formatter = shakespeare.format.TextFormatter() 11 formatter = shakespeare.format.TextFormatter() 12 12 13 def test_escape_chars(self): 13 def test_escape_chars(self): 14 out = self.formatter.escape_chars(starttext) 14 out = self.formatter.escape_chars(starttext) 15 assert out == sometext 15 assert out == sometext 16 16 17 17 18 class TestTextFormatterPlain: 18 class TestTextFormatterPlain: 19 fileobj = StringIO.StringIO(starttext.encode('utf-8')) 19 fileobj = StringIO.StringIO(starttext.encode('utf-8')) 20 formatter = shakespeare.format.TextFormatterPlain() 20 formatter = shakespeare.format.TextFormatterPlain() 21 exp = u''' 21 exp = u''' 22 <pre> 22 <pre> 23 %s 23 %s 24 </pre>''' % sometext 24 </pre>''' % sometext 25 25 26 def test_format(self): 26 def test_format(self): 27 out = self.formatter.format(self.fileobj) 27 out = self.formatter.format(self.fileobj) 28 assert out == self.exp 28 assert out == self.exp 29 29 30 30 31 class TestTextFormatterLineno: 31 class TestTextFormatterLineno: 32 fileobj = StringIO.StringIO(starttext.encode('utf-8')) 32 fileobj = StringIO.StringIO(starttext.encode('utf-8')) 33 formatter = shakespeare.format.TextFormatterLineno() 33 formatter = shakespeare.format.TextFormatterLineno() 34 exp = u'''<pre id="0">0 Blah \xe6</pre> 34 exp = u'''<pre id="0">0 Blah \xe6</pre> 35 <pre id="1">1 blah & blah</pre> 35 <pre id="1">1 blah & blah</pre> 36 ''' 36 ''' 37 37 38 def test_format(self): 38 def test_format(self): 39 out = self.formatter.format(self.fileobj) 39 out = self.formatter.format(self.fileobj) 40 assert out == self.exp 40 assert out == self.exp 41 41 42 42 43 class TestTextFormatterAnnotate:44 45 fileobj = StringIO.StringIO(starttext.encode('utf-8'))46 formatter = shakespeare.format.TextFormatterAnnotate()47 48 def test_format(self):49 self.fileobj.seek(0)50 page_url = 'http://somethingelse.com/'51 newtitle = 'New Title'52 out = self.formatter.format(53 self.fileobj,54 page_uri=page_url,55 title=newtitle,56 )57 print '"%s"' % out.encode('utf-8')58 assert page_url in out59 assert newtitle in out60 assert TestTextFormatterLineno.exp in out61 # test valid xml62 import genshi63 outxml = genshi.XML(out)64 65 66 def test_text_format(): 43 def test_text_format(): 67 formatlist = [ ('plain', TestTextFormatterPlain), 44 formatlist = [ ('plain', TestTextFormatterPlain), 68 ('lineno', TestTextFormatterLineno), 45 ('lineno', TestTextFormatterLineno), 69 ] 46 ] 70 for item in formatlist: 47 for item in formatlist: 71 fileobj = StringIO.StringIO(starttext.encode('utf-8')) 48 fileobj = StringIO.StringIO(starttext.encode('utf-8')) 72 tout = shakespeare.format.format_text(fileobj, item[0]) 49 tout = shakespeare.format.format_text(fileobj, item[0]) 73 assert tout == item[1].exp 50 assert tout == item[1].exp 74 51 trunk/shakespeare/model/dm.py
Revision 187 Revision 188 1 """ 1 """ 2 Domain model 2 Domain model 3 3 4 Material contains all data we have including shakespeare texts. A text is taken 4 Material contains all data we have including shakespeare texts. A text is taken 5 to be a specific version of a work. e.g. the 1623 folio of King Richard III. 5 to be a specific version of a work. e.g. the 1623 folio of King Richard III. 6 6 7 We may in future add a Work object to refer to 'abstract' work of which a given 7 We may in future add a Work object to refer to 'abstract' work of which a given 8 text is a version. 8 text is a version. 9 """ 9 """ 10 from pylons import config 10 from pylons import config 11 from sqlalchemy import Column, MetaData, Table, types, ForeignKey 11 from sqlalchemy import Column, MetaData, Table, types, ForeignKey 12 from sqlalchemy import orm 12 from sqlalchemy import orm 13 from sqlalchemy.orm import relation, backref 13 from sqlalchemy.orm import relation, backref 14 14 15 # make sure config is registered 15 # make sure config is registered 16 import shakespeare 16 import shakespeare 17 shakespeare.conf() 17 shakespeare.conf() 18 18 19 metadata = MetaData() 19 metadata = MetaData() 20 Session = orm.scoped_session(orm.sessionmaker( 20 Session = orm.scoped_session(orm.sessionmaker( 21 autoflush=True, 21 autoflush=True, 22 transactional=False, 22 transactional=False, 23 bind=config['pylons.g'].sa_engine 23 bind=config['pylons.g'].sa_engine 24 )) 24 )) 25 25 26 import shakespeare 26 import shakespeare 27 import shakespeare.cache 27 import shakespeare.cache 28 28 29 # import other sqlobject items30 from annotater.model import Annotation31 import annotater.model32 29 33 material_table = Table('material', metadata, 30 material_table = Table('material', metadata, 34 Column('id', types.Integer, primary_key=True), 31 Column('id', types.Integer, primary_key=True), 35 Column('name', types.String(255)), 32 Column('name', types.String(255)), 36 Column('title', types.String(255)), 33 Column('title', types.String(255)), 37 Column('creator', types.String(255)), 34 Column('creator', types.String(255)), 38 Column('url', types.String(255)), 35 Column('url', types.String(255)), 39 Column('notes', types.Text()) 36 Column('notes', types.Text()) 40 ) 37 ) 41 38 42 # TODO: indices on word and occurences 39 # TODO: indices on word and occurences 43 statistic_table = Table('statistic', metadata, 40 statistic_table = Table('statistic', metadata, 44 Column('id', types.Integer, primary_key=True), 41 Column('id', types.Integer, primary_key=True), 45 Column('material_id', types.Integer, ForeignKey('material.id')), 42 Column('material_id', types.Integer, ForeignKey('material.id')), 46 Column('word', types.String(50)), 43 Column('word', types.String(50)), 47 Column('freq', types.Integer), 44 Column('freq', types.Integer), 48 ) 45 ) 49 46 50 47 51 from ConfigParser import SafeConfigParser 48 from ConfigParser import SafeConfigParser 52 49 53 50 54 51 55 class Material(object): 52 class Material(object): 56 """Material related to Shakespeare (usually text of works and ancillary 53 """Material related to Shakespeare (usually text of works and ancillary 57 matter such as introductions). 54 matter such as introductions). 58 55 59 NB: can not use 'text' as class name as it is an sql reserved word 56 NB: can not use 'text' as class name as it is an sql reserved word 60 57 61 @attribute name: a unique name identifying the material 58 @attribute name: a unique name identifying the material 62 59 63 TODO: mutiple creators ?? 60 TODO: mutiple creators ?? 64 """ 61 """ 65 62 66 # TODO: remove (just here for sqlobject bkwards compat) 63 # TODO: remove (just here for sqlobject bkwards compat) 67 @classmethod 64 @classmethod 68 def byName(self, name): 65 def byName(self, name): 69 return self.query.filter_by(name=name).first() 66 return self.query.filter_by(name=name).first() 70 67 71 def get_text(self, format=None): 68 def get_text(self, format=None): 72 '''Get text (if any) associated with this material. 69 '''Get text (if any) associated with this material. 73 70 74 # ignore format for time being 71 # ignore format for time being 75 ''' 72 ''' 76 import pkg_resources 73 import pkg_resources 77 pkg = 'shksprdata' 74 pkg = 'shksprdata' 78 # default to plain txt format (TODO: generalise this) 75 # default to plain txt format (TODO: generalise this) 79 path = 'texts/%s.txt' % self.name 76 path = 'texts/%s.txt' % self.name 80 fileobj = pkg_resources.resource_stream(pkg, path) 77 fileobj = pkg_resources.resource_stream(pkg, path) 81 return fileobj 78 return fileobj 82 79 83 def get_cache_path(self, format): 80 def get_cache_path(self, format): 84 """Get path within cache to data file associated with this material. 81 """Get path within cache to data file associated with this material. 85 @format: the version ('plain', original='' etc) 82 @format: the version ('plain', original='' etc) 86 """ 83 """ 87 return shakespeare.cache.default.path(self.url, format) 84 return shakespeare.cache.default.path(self.url, format) 88 85 89 @classmethod 86 @classmethod 90 def load_from_metadata(self, fileobj): 87 def load_from_metadata(self, fileobj): 91 cfgp = SafeConfigParser() 88 cfgp = SafeConfigParser() 92 cfgp.readfp(fileobj) 89 cfgp.readfp(fileobj) 93 for section in cfgp.sections(): 90 for section in cfgp.sections(): 94 item = Material.byName(section) 91 item = Material.byName(section) 95 if item is None: 92 if item is None: 96 item = Material(name=section) 93 item = Material(name=section) 97 assert item is not None 94 assert item is not None 98 for key, val in cfgp.items(section): 95 for key, val in cfgp.items(section): 99 setattr(item, key, val) 96 setattr(item, key, val) 100 Session.flush() 97 Session.flush() 101 98 102 class Statistic(object): 99 class Statistic(object): 103 pass 100 pass 104 101 105 # Map each domain model class to its corresponding relational table. 102 # Map each domain model class to its corresponding relational table. 106 mapper = Session.mapper 103 mapper = Session.mapper 107 mapper(Material, material_table) 104 mapper(Material, material_table) 108 mapper(Statistic, statistic_table, properties={ 105 mapper(Statistic, statistic_table, properties={ 109 'text':relation(Material, backref='statistics') 106 'text':relation(Material, backref='statistics') 110 }) 107 }) 111 108
