Changeset 119
- Timestamp:
- 04/09/07 15:00:07 (1 year ago)
- Files:
-
- trunk/src/shakespeare/format.py (modified) (1 diff)
- trunk/src/shakespeare/format_test.py (modified) (1 diff)
- trunk/src/shakespeare/template/view_annotate.html (modified) (1 diff)
- trunk/src/shakespeare/wsgiplain.py (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/src/shakespeare/format.py
Revision 113 Revision 119 1 """ 1 """ 2 Format texts in a variety of ways 2 Format texts in a variety of ways 3 """ 3 """ 4 4 5 def format_text(fileobj, format): 5 def format_text(fileobj, format): 6 """Format a provided text in a variety of ways. 6 """Format a provided text in a variety of ways. 7 7 8 @format: the name specifying the format to use 8 @format: the name specifying the format to use 9 """ 9 """ 10 formatter = None 10 formatter = None 11 if format == 'plain': 11 if format == 'plain': 12 formatter = TextFormatterPlain( fileobj)12 formatter = TextFormatterPlain() 13 elif format == 'lineno': 13 elif format == 'lineno': 14 formatter = TextFormatterLineno( fileobj)14 formatter = TextFormatterLineno() 15 elif format == 'annotate': 15 elif format == 'annotate': 16 formatter = TextFormatterAnnotate( fileobj)16 formatter = TextFormatterAnnotate() 17 else: 17 else: 18 raise ValueError('Unknown format: %s' % format) 18 raise ValueError('Unknown format: %s' % format) 19 return formatter.format( )19 return formatter.format(fileobj) 20 20 21 21 22 class TextFormatter(object): 22 class TextFormatter(object): 23 """Abstract base class for formatters. 23 """Abstract base class for formatters. 24 """ 24 """ 25 25 26 def __init__(self, file=None): 26 def format(self, file): 27 """ 27 """Format the supplied text. 28 28 @file: file-like object containing a text in plain txt with utf-8 29 @file: file-like object containing a text in plain txt with utf-8 29 encoding 30 encoding 30 """31 self.file = file32 31 33 def format(self): 32 @return a string in unicode format with utf-8 encoding 34 """Format the supplied text. 35 36 The returned string will be in unicode format with utf-8 encoding 37 """ 33 """ 38 raise NotImplementedError() 34 raise NotImplementedError() 39 35 40 def escape_chars(self, text): 36 def escape_chars(self, text): 41 return text.replace('&', '&').replace('<', '<') 37 return text.replace('&', '&').replace('<', '<') 42 38 43 class TextFormatterPlain(TextFormatter): 39 class TextFormatterPlain(TextFormatter): 44 """Format the text as plain text (in an html <pre> tag). 40 """Format the text as plain text (in an html <pre> tag). 45 """ 41 """ 46 42 47 def format(self): 43 def format(self, file): 44 self.file = file 48 out = unicode(self.file.read(), 'utf-8') 45 out = unicode(self.file.read(), 'utf-8') 49 out = self.escape_chars(out) 46 out = self.escape_chars(out) 50 out = \ 47 out = \ 51 u''' 48 u''' 52 <pre> 49 <pre> 53 %s 50 %s 54 </pre>''' % out 51 </pre>''' % out 55 return out 52 return out 56 53 57 class TextFormatterLineno(TextFormatter): 54 class TextFormatterLineno(TextFormatter): 58 """Format the text to have line numbers. 55 """Format the text to have line numbers. 59 """ 56 """ 60 57 61 def format(self): 58 def format(self, file): 59 self.file = file 62 result = '' 60 result = '' 63 count = 0 61 count = 0 64 for line in self.file.readlines(): 62 for line in self.file.readlines(): 65 tlineno = unicode(count).ljust(4) # assume line no < 10000 63 tlineno = unicode(count).ljust(4) # assume line no < 10000 66 tline = unicode(line, 'utf-8').rstrip() 64 tline = unicode(line, 'utf-8').rstrip() 67 tline = self.escape_chars(tline) 65 tline = self.escape_chars(tline) 68 result += u'<pre id="%s">%s %s</pre>\n' % (count, tlineno, tline) 66 result += u'<pre id="%s">%s %s</pre>\n' % (count, tlineno, tline) 69 count += 1 67 count += 1 70 return result 68 return result 71 69 72 70 71 import annotater.marginalia 73 class TextFormatterAnnotate(TextFormatter): 72 class TextFormatterAnnotate(TextFormatter): 74 """Format the text in a manner suitable for marginalia annotation. 73 """Format the text in a manner suitable for marginalia annotation. 75 """ 74 """ 76 entry_template = u'''77 <div id="%(id)s" class="hentry">78 <h3 class="entry-title">%(title)s</h3>79 <div class="entry-content">80 %(content)s81 </div><!-- /entry-content -->82 <p class="metadata">83 <a rel="bookmark" href="%(page_url)s#%(id)s">#</a>84 <span class="author">%(author)s</span>85 </p>86 <div class="notes">87 <button class="createAnnotation" onclick="createAnnotation('%(id)s',true)" title="Click here to create an annotation">></button>88 <ol>89 <li></li>90 </ol>91 </div><!-- /notes -->92 </div><!-- /hentry -->93 '''94 75 95 def format(self): 76 def format(self, file, **kwargs): 96 line_numberer = TextFormatterLineno(self.file) 77 self.file = file 97 text_with_linenos = line_numberer.format() 98 # todo chunking 78 # todo chunking 79 line_numberer = TextFormatterLineno() 80 text_with_linenos = line_numberer.format(self.file) 99 values = { 81 values = { 100 'content' : text_with_linenos, 82 'content' : text_with_linenos, 101 'title' : 'Test Stuff', 83 'id' : 'm0', 102 'id' : 'm2', 103 'page_url' : 'http://localhost:8080/', 104 'author' : 'Nemo', 105 } 84 } 106 result = self.entry_template % values 85 for key in kwargs: 86 values[key] = kwargs[key] 87 result = annotater.marginalia.format_entry(**values) 107 return result 88 return result 108 89 trunk/src/shakespeare/format_test.py
Revision 113 Revision 119 1 import StringIO 1 import StringIO 2 import shakespeare.format 2 import shakespeare.format 3 3 4 4 5 starttext = unicode('''Blah \xc3\xa6 5 starttext = unicode('''Blah \xc3\xa6 6 blah & blah''', 'utf-8') 6 blah & blah''', 'utf-8') 7 7 8 sometext = starttext.replace('&', '&') 8 sometext = starttext.replace('&', '&') 9 9 10 class TestTextFormatter: 10 class TestTextFormatter: 11 formatter = shakespeare.format.TextFormatter() 11 formatter = shakespeare.format.TextFormatter() 12 12 13 def test_escape_chars(self): 13 def test_escape_chars(self): 14 out = self.formatter.escape_chars(starttext) 14 out = self.formatter.escape_chars(starttext) 15 assert out == sometext 15 assert out == sometext 16 16 17 17 18 class TestTextFormatterPlain: 18 class TestTextFormatterPlain: 19 fileobj = StringIO.StringIO(starttext.encode('utf-8')) 19 fileobj = StringIO.StringIO(starttext.encode('utf-8')) 20 formatter = shakespeare.format.TextFormatterPlain( fileobj)20 formatter = shakespeare.format.TextFormatterPlain() 21 exp = u''' 21 exp = u''' 22 <pre> 22 <pre> 23 %s 23 %s 24 </pre>''' % sometext 24 </pre>''' % sometext 25 25 26 def test_format(self): 26 def test_format(self): 27 out = self.formatter.format( )27 out = self.formatter.format(self.fileobj) 28 assert out == self.exp 28 assert out == self.exp 29 29 30 30 31 class TestTextFormatterLineno: 31 class TestTextFormatterLineno: 32 fileobj = StringIO.StringIO(starttext.encode('utf-8')) 32 fileobj = StringIO.StringIO(starttext.encode('utf-8')) 33 formatter = shakespeare.format.TextFormatterLineno( fileobj)33 formatter = shakespeare.format.TextFormatterLineno() 34 exp = u'''<pre id="0">0 Blah \xe6</pre> 34 exp = u'''<pre id="0">0 Blah \xe6</pre> 35 <pre id="1">1 blah & blah</pre> 35 <pre id="1">1 blah & blah</pre> 36 ''' 36 ''' 37 37 38 def test_format(self): 38 def test_format(self): 39 out = self.formatter.format( )39 out = self.formatter.format(self.fileobj) 40 assert out == self.exp 40 assert out == self.exp 41 41 42 42 43 class TestTextFormatterAnnotate: 43 class TestTextFormatterAnnotate: 44 44 45 fileobj = StringIO.StringIO(starttext.encode('utf-8')) 45 fileobj = StringIO.StringIO(starttext.encode('utf-8')) 46 formatter = shakespeare.format.TextFormatterAnnotate(fileobj) 46 formatter = shakespeare.format.TextFormatterAnnotate() 47 exp = u''' 48 <div id="m2" class="hentry"> 49 <h3 class="entry-title">Test Stuff</h3> 50 <div class="entry-content"> 51 <pre id="0">0 Blah \xe6</pre> 52 <pre id="1">1 blah & blah</pre> 53 54 </div><!-- /entry-content --> 55 <p class="metadata"> 56 <a rel="bookmark" href="http://localhost:8080/#m2">#</a> 57 <span class="author">Nemo</span> 58 </p> 59 <div class="notes"> 60 <button class="createAnnotation" onclick="createAnnotation('m2',true)" title="Click here to create an annotation">></button> 61 <ol> 62 <li></li> 63 </ol> 64 </div><!-- /notes --> 65 </div><!-- /hentry --> 66 ''' 67 47 68 def test_format(self): 48 def test_format(self): 69 out = self.formatter.format() 49 self.fileobj.seek(0) 50 page_url = 'http://somethingelse.com/' 51 newtitle = 'New Title' 52 out = self.formatter.format( 53 self.fileobj, 54 page_uri=page_url, 55 title=newtitle, 56 ) 70 print '"%s"' % out.encode('utf-8') 57 print '"%s"' % out.encode('utf-8') 71 print '"%s"' % self.exp.encode('utf-8')58 assert page_url in out 72 assert out == self.exp59 assert newtitle in out 73 60 assert TestTextFormatterLineno.exp in out 74 def test_valid_xml(self):61 # test valid xml 75 import genshi 62 import genshi 76 outxml = genshi.XML( self.exp)63 outxml = genshi.XML(out) 77 64 78 65 79 def test_text_format(): 66 def test_text_format(): 80 formatlist = [ ('plain', TestTextFormatterPlain), 67 formatlist = [ ('plain', TestTextFormatterPlain), 81 ('lineno', TestTextFormatterLineno), 68 ('lineno', TestTextFormatterLineno), 82 ('annotate', TestTextFormatterAnnotate),83 ] 69 ] 84 for item in formatlist: 70 for item in formatlist: 85 fileobj = StringIO.StringIO(starttext.encode('utf-8')) 71 fileobj = StringIO.StringIO(starttext.encode('utf-8')) 86 tout = shakespeare.format.format_text(fileobj, item[0]) 72 tout = shakespeare.format.format_text(fileobj, item[0]) 87 assert tout == item[1].exp 73 assert tout == item[1].exp 88 74 trunk/src/shakespeare/template/view_annotate.html
Revision 113 Revision 119 1 <html xmlns:py="http://genshi.edgewall.org/" 1 <html xmlns:py="http://genshi.edgewall.org/" 2 xmlns:xi="http://www.w3.org/2001/XInclude"> 2 xmlns:xi="http://www.w3.org/2001/XInclude"> 3 3 4 <py:def function="page_title">View Works - Annotate</py:def> 4 <py:def function="page_title">View Works - Annotate</py:def> 5 <head> 5 <head> 6 <py:def function="page_specific_css"> 6 <py:def function="page_specific_css"> 7 ${marginalia_media} 7 ${marginalia_media} 8 <style type="text/css" py:def="page_specific_css">9 body10 {11 margin: 0;12 padding: 0;13 }14 15 div.frame16 {17 height: 90%;18 overflow: auto;19 float: left;20 padding: 1em;21 }22 23 div.singleview24 {25 padding: 2em;26 padding-left: 10%;27 padding-right: 10%;28 }29 30 /* ensure we take up the whole width */31 div.multiview32 {33 width: 100%;34 height: 90%;35 }36 </style>37 </py:def> 8 </py:def> 38 </head> 9 </head> 39 10 40 <div py:match="content"> 11 <div py:match="content"> 41 <div id="annotation-controls"> 12 <div id="annotation-buttons"> 42 <form> 13 ${annotation_buttons} 43 <input type="button" onclick='showAllAnnotations( "http://localhost:8080/#*")' value="Show Annotations" /><br /> 44 <input type="button" onclick='hideAllAnnotations( "http://localhost:8080/#*")' value="Hide Annotations" /><br /> 45 </form> 46 </div> 14 </div> 47 <div py:if="len(texts) == 1" class="singleview"> 15 <div> 48 ${texts[0]} 16 ${text_with_annotation} 49 </div> 50 <div class="multiview" py:if="len(texts) > 1" > 51 <div py:for="text in texts" class="frame" 52 style="width: ${frame_width}%;" > 53 ${text} 54 </div> 55 </div> 17 </div> 56 </div> 18 </div> 57 19 58 <xi:include href="layout.html" /> 20 <xi:include href="layout.html" /> 59 </html> 21 </html> trunk/src/shakespeare/wsgiplain.py
Revision 109 Revision 119 1 """ 1 """ 2 Web interface to view and analyze shakespeare texts. 2 Web interface to view and analyze shakespeare texts. 3 """ 3 """ 4 import os 4 import os 5 import wsgiref.util 5 6 6 import paste.request 7 import paste.request 7 import genshi 8 import genshi 8 import genshi.template 9 import genshi.template 9 10 10 import shakespeare 11 import shakespeare 11 import shakespeare.index 12 import shakespeare.index 12 import shakespeare.format 13 import shakespeare.format 13 import shakespeare.concordance 14 import shakespeare.concordance 14 import shakespeare.dm 15 import shakespeare.dm 15 16 16 # import this after dm so that db connection is set 17 # import this after dm so that db connection is set 17 import annotater.store 18 import annotater.store 18 import annotater.marginalia 19 import annotater.marginalia 19 20 20 21 21 cfg = shakespeare.conf() 22 cfg = shakespeare.conf() 22 template_path = cfg.get('web', 'template_dir') 23 template_path = cfg.get('web', 'template_dir') 23 template_loader = genshi.template.TemplateLoader([template_path], 24 template_loader = genshi.template.TemplateLoader([template_path], 24 auto_reload=True) 25 auto_reload=True) 25 26 26 class ShakespeareWebInterface(object): 27 class ShakespeareWebInterface(object): 27 28 28 def response(self, result): 29 def response(self, result): 29 status = '200 OK' 30 status = '200 OK' 30 headers = [('Content-type','text/html')] 31 headers = [('Content-type','text/html')] 31 self.start_response(status, headers) 32 self.start_response(status, headers) 32 return [result] 33 return [result] 33 34 34 def __call__(self, environ, start_response): 35 def __call__(self, environ, start_response): 35 self.path = environ['PATH_INFO'] 36 self.path = environ['PATH_INFO'] 37 self.environ = environ 36 self.start_response = start_response 38 self.start_response = start_response 37 self.queryinfo = paste.request.parse_formvars(environ) 39 self.queryinfo = paste.request.parse_formvars(environ) 38 if self.path == '/': 40 if self.path == '/': 39 return self.index() 41 return self.index() 40 elif self.path.startswith('/guide'): 42 elif self.path.startswith('/guide'): 41 return self.guide() 43 return self.guide() 42 elif self.path.startswith('/view'): 44 elif self.path.startswith('/view'): 43 name = self.queryinfo.get('name', '') 45 name = self.queryinfo.get('name', '') 44 format = self.queryinfo.get('format', 'plain') 46 format = self.queryinfo.get('format', 'plain') 45 return self.view(name, format) 47 return self.view(name, format) 46 elif self.path.startswith('/concordance/word'): # order matters 48 elif self.path.startswith('/concordance/word'): # order matters 47 word = self.queryinfo.get('word', None) 49 word = self.queryinfo.get('word', None) 48 return self.concordance_word(word) 50 return self.concordance_word(word) 49 elif self.path.startswith('/concordance'): 51 elif self.path.startswith('/concordance'): 50 return self.concordance_index() 52 return self.concordance_index() 51 elif self.path.startswith('/annotation'): 53 elif self.path.startswith('/annotation'): 52 store = annotater.store.AnnotaterStore() 54 store = annotater.store.AnnotaterStore() 53 return store(environ, start_response) 55 return store(environ, start_response) 54 elif self.path.startswith('/marginalia'): 56 elif self.path.startswith('/marginalia'): 55 prefix = cfg.get('annotater', 'marginalia_prefix') 57 prefix = cfg.get('annotater', 'marginalia_prefix') 56 media_app = annotater.marginalia.MarginaliaMedia(prefix) 58 media_app = annotater.marginalia.MarginaliaMedia(prefix) 57 return media_app(environ, start_response) 59 return media_app(environ, start_response) 58 else: 60 else: 59 # change to 404 or similar 61 # change to 404 or similar 60 return self.response('Error') 62 return self.response('Error') 61 63 62 def index(self): 64 def index(self): 63 try: 65 try: 64 index = shakespeare.index.all 66 index = shakespeare.index.all 65 tmpl = template_loader.load('index.html') 67 tmpl = template_loader.load('index.html') 66 result = tmpl.generate(works_index=index).render() 68 result = tmpl.generate(works_index=index).render() 67 except Exception, inst: 69 except Exception, inst: 68 result = '<p><strong>There was an error: ' + str(inst) + '</strong></p>' 70 result = '<p><strong>There was an error: ' + str(inst) + '</strong></p>' 69 return self.response(result) 71 return self.response(result) 70 72 71 def guide(self): 73 def guide(self): 72 template = template_loader.load('guide.html') 74 template = template_loader.load('guide.html') 73 result = template.generate().render() 75 result = template.generate().render() 74 return self.response(result) 76 return self.response(result) 75 77 76 def view(self, name, format='plain'): 78 def view(self, name, format='plain'): 79 if format == 'annotate': 80 return self.view_annotate(name) 77 import shakespeare.dm 81 import shakespeare.dm 78 namelist = name.split() 82 namelist = name.split() 79 numtexts = len(namelist) 83 numtexts = len(namelist) 80 textlist = [shakespeare.dm.Material.byName(tname) for tname in namelist] 84 textlist = [shakespeare.dm.Material.byName(tname) for tname in namelist] 81 # special case (only return the first text) 85 # special case (only return the first text) 82 if format == 'raw': 86 if format == 'raw': 83 tpath = textlist[0].get_cache_path('plain') 87 tpath = textlist[0].get_cache_path('plain') 84 result = file(tpath).read() 88 result = file(tpath).read() 85 status = '200 OK' 89 status = '200 OK' 86 headers = [('Content-type','text/plain')] 90 headers = [('Content-type','text/plain')] 87 self.start_response(status, headers) 91 self.start_response(status, headers) 88 return [result] 92 return [result] 89 texts = [] 93 texts = [] 90 for item in textlist: 94 for item in textlist: 91 tpath = item.get_cache_path('plain') 95 tpath = item.get_cache_path('plain') 92 tfileobj = file(tpath) 96 tfileobj = file(tpath) 93 ttext = shakespeare.format.format_text(tfileobj, format) 97 ttext = shakespeare.format.format_text(tfileobj, format) 94 thtml = genshi.XML(ttext) 98 thtml = genshi.XML(ttext) 95 texts.append(thtml) 99 texts.append(thtml) 96 # would have assumed this would be 100.0/numtexts but for some reason 100 # would have assumed this would be 100.0/numtexts but for some reason 97 # you need to allow more room (maybe because of the scrollbars?) 101 # you need to allow more room (maybe because of the scrollbars?) 98 # result is not consistent across browsers ... 102 # result is not consistent across browsers ... 99 frame_width = 100.0/numtexts - 4.0 103 frame_width = 100.0/numtexts - 4.0 100 if format == 'annotate': 104 template = template_loader.load('view.html') 101 template = template_loader.load('view_annotate.html') 105 result = template.generate(frame_width=frame_width, texts=texts) 102 prefix = cfg.get('annotater', 'marginalia_prefix') 103 ## TODO: remove hardcoded application fqdn 104 marginalia_media = annotater.marginalia.get_media_header(prefix, 105 'http://localhost:8080/') 106 marginalia_media = genshi.HTML(marginalia_media) 107 result = template.generate( 108 frame_width=frame_width, 109 texts=texts, 110 marginalia_media=marginalia_media, 111 ) 112 else: 113 template = template_loader.load('view.html') 114 result = template.generate(frame_width=frame_width, texts=texts) 115 # set to not strip whitespace as o/w whitespace in pre tag gets removed 106 # set to not strip whitespace as o/w whitespace in pre tag gets removed 107 return self.response(result.render('html', strip_whitespace=False)) 108 109 def view_annotate(self, name): 110 import shakespeare.dm 111 # only one name here ... 112 textobj = shakespeare.dm.Material.byName(name) 113 tpath = textobj.get_cache_path('plain') 114 tfileobj = file(tpath) 115 formatter = shakespeare.format.TextFormatterAnnotate() 116 # not perfect in that we might have the application mounted somewhere 117 annotation_store_fqdn = wsgiref.util.application_uri(self.environ) 118 page_url = wsgiref.util.request_uri(self.environ) 119 ttext = formatter.format(tfileobj, page_uri=page_url) 120 thtml = genshi.HTML(ttext) 121 122 template = template_loader.load('view_annotate.html') 123 prefix = cfg.get('annotater', 'marginalia_prefix') 124 marginalia_media = annotater.marginalia.get_media_header(prefix, 125 annotation_store_fqdn, 126 page_url) 127 buttons = annotater.marginalia.get_buttons(page_url) 128 marginalia_media = genshi.HTML(marginalia_media) 129 buttons = genshi.HTML(buttons) 130 result = template.generate( 131 text_with_annotation=thtml, 132 marginalia_media=marginalia_media, 133 annotation_buttons=buttons, 134 ) 116 return self.response(result.render('html', strip_whitespace=False)) 135 return self.response(result.render('html', strip_whitespace=False)) 117 136 118 def concordance_index(self): 137 def concordance_index(self): 119 stats = shakespeare.concordance.Statistics() 138 stats = shakespeare.concordance.Statistics() 120 words = stats.keys() 139 words = stats.keys() 121 template = template_loader.load('concordance.html') 140 template = template_loader.load('concordance.html') 122 result = template.generate(words=words) 141 result = template.generate(words=words) 123 return self.response(result.render()) 142 return self.response(result.render()) 124 143 125 def concordance_word(self, word=None): 144 def concordance_word(self, word=None): 126 # TODO: sort by work etc 145 # TODO: sort by work etc 127 import shakespeare.textutils 146 import shakespeare.textutils 128 refs = [] 147 refs = [] 129 cc = shakespeare.concordance.Concordance() 148 cc = shakespeare.concordance.Concordance() 130 if word is not None: 149 if word is not None: 131 refs = list(cc.get(word)) 150 refs = list(cc.get(word)) 132 newrefs = [] 151 newrefs = [] 133 for ref in refs: 152 for ref in refs: 134 # we use the 'plain' format when building the concordance 153 # we use the 'plain' format when building the concordance 135 tpath = ref.text.get_cache_path('plain') 154 tpath = ref.text.get_cache_path('plain') 136 ff = file(tpath) 155 ff = file(tpath) 137 snippet = shakespeare.textutils.get_snippet(ff, ref.char_index) 156 snippet = shakespeare.textutils.get_snippet(ff, ref.char_index) 138 ref.snippet = snippet 157 ref.snippet = snippet 139 template = template_loader.load('concordance_by_word.html') 158 template = template_loader.load('concordance_by_word.html') 140 result = template.generate(word=word, refs=refs) 159 result = template.generate(word=word, refs=refs) 141 return self.response(result.render()) 160 return self.response(result.render()) 142 161
