Changeset 72
- Timestamp:
- 12/27/06 17:09:08 (3 years ago)
- Location:
- trunk/src/shakespeare
- Files:
-
- 4 modified
-
concordance.py (modified) (7 diffs)
-
concordance_test.py (modified) (4 diffs)
-
dm.py (modified) (2 diffs)
-
dm_test.py (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/shakespeare/concordance.py
r51 r72 22 22 """ 23 23 sqlcc = shakespeare.dm.Concordance 24 sqlstat = shakespeare.dm.Statistic 24 25 25 26 def __init__(self, filter_names=None): … … 29 30 """ 30 31 self._filter_names = filter_names 31 # piece of sql to use in select to filter texts 32 self._sql_filter = True 32 self.sqlcc_filter = self._make_filter(self.sqlcc) 33 self.sqlstat_filter = self._make_filter(self.sqlstat) 34 35 def _make_filter(self, sqlobj): 36 sql_filter = True 33 37 if self._filter_names is not None: 34 38 arglist = [] 35 39 for name in self._filter_names: 36 newarg = s elf.sqlcc.q.textID == self._name2id(name)40 newarg = sqlobj.q.textID == self._name2id(name) 37 41 arglist.append(newarg) 38 self._sql_filter = sqlobject.OR(*arglist) 42 sql_filter = sqlobject.OR(*arglist) 43 return sql_filter 39 44 40 45 def _name2id(self, name): … … 42 47 43 48 def keys(self): 44 """Return list of words in concordance49 """Return list of *distinct* words in concordance/statistics 45 50 """ 46 # distinct does not help us because we need to DISTINCT word 47 # but can't do this with sqlobject 48 all = self.sqlcc.select(self._sql_filter, 49 orderBy=self.sqlcc.q.word, 50 distinct=True) 51 all = self.sqlstat.select(self.sqlstat_filter, 52 orderBy=self.sqlstat.q.word, 53 ) 51 54 words = [ xx.word for xx in list(all) ] 52 55 distinct = list(set(words)) … … 63 66 @return: sqlobject query list 64 67 """ 65 select = self.sqlcc.select(sqlobject.AND(self. _sql_filter, self.sqlcc.q.word==word))68 select = self.sqlcc.select(sqlobject.AND(self.sqlcc_filter, self.sqlcc.q.word==word)) 66 69 return select 67 70 … … 69 72 70 73 def get(self, word): 71 select = self.sql cc.select(72 sqlobject.AND(self. _sql_filter, self.sqlcc.q.word==word)74 select = self.sqlstat.select( 75 sqlobject.AND(self.sqlstat_filter, self.sqlstat.q.word==word) 73 76 ) 74 return select.count() 77 total = 0 78 for stat in select: 79 total += stat.occurrences 80 return total 75 81 76 82 class ConcordanceBuilder(object): … … 111 117 lineCount = 0 112 118 charIndex = 0 119 stats = {} 113 120 trans = shakespeare.dm.Concordance._connection.transaction() 114 121 for line in text.readlines(): … … 122 129 line=lineCount, 123 130 char_index=charIndex+match.start()) 131 stats[word] = stats.get(word, 0) + 1 124 132 lineCount += 1 125 133 charIndex += len(line) 126 134 trans.commit() 135 trans = shakespeare.dm.Concordance._connection.transaction() 136 for word, value in stats.items(): 137 tresults = shakespeare.dm.Statistic.select( 138 sqlobject.AND( 139 shakespeare.dm.Statistic.q.textID == dmText.id, 140 shakespeare.dm.Statistic.q.word == word 141 )) 142 try: 143 dbstat = list(tresults)[0] 144 dbstat.occurrences += value 145 except: 146 shakespeare.dm.Statistic( 147 connection=trans, 148 text=dmText, 149 word=word, 150 occurrences=value 151 ) 152 trans.commit() 153 127 154 128 155 def remove_text(self, name): -
trunk/src/shakespeare/concordance_test.py
r40 r72 15 15 I had in charge at my depart for France, 16 16 As procurator to your excellence, 17 A fake imperial line. 17 18 """ 18 19 name = 'test-concordance' … … 20 21 21 22 # ['work_id', 'line-no', 'character-index'] } 23 # incomplete 22 24 expConcordance = { 23 'fake' : [ (name, 0, 2), (name, 0, 7) ],25 'fake' : [ (name, 0, 2), (name, 0, 7), (name, 5, 136) ], 24 26 'suffolk' : [ (name, 1, 17), ], 25 27 'high' : [ (name, 2, 37), ], … … 27 29 } 28 30 31 # incomplete 29 32 expStats = { 30 'fake' : 2, 33 'fake' : 3, 34 'imperial' : 2, 31 35 'suffolk' : 1, 32 36 'high' : 1, … … 62 66 for key, value in self.expConcordance.items(): 63 67 listing = list(self.concordance.get(key)) 64 listing.reverse()65 out = [ (xx.text.name, xx.line, xx.char_index) for xx in listing ]66 assert out ==value68 assert len(listing) == len(value) 69 for xx in listing: 70 assert (xx.text.name, xx.line, xx.char_index) in value 67 71 68 72 def test_stats(self): 69 73 for key, value in self.expStats.items(): 70 74 out = self.statistics.get(key) 75 print key 71 76 assert out == value 72 77 -
trunk/src/shakespeare/dm.py
r51 r72 20 20 Material.createTable(ifNotExists=True) 21 21 Concordance.createTable(ifNotExists=True) 22 Statistic.createTable(ifNotExists=True) 22 23 23 24 def cleandb(): 25 Statistic.dropTable(ifExists=True) 24 26 Concordance.dropTable(ifExists=True) 25 27 Material.dropTable(ifExists=True) … … 63 65 text_index = sqlobject.DatabaseIndex('text') 64 66 67 class Statistic(sqlobject.SQLObject): 68 69 text = sqlobject.ForeignKey('Material') 70 word = sqlobject.StringCol(length=50) 71 occurrences = sqlobject.IntCol(default=1) 72 73 word_index = sqlobject.DatabaseIndex('word') 74 text_index = sqlobject.DatabaseIndex('text') 75 65 76 66 77 # auto create db tables on import -
trunk/src/shakespeare/dm_test.py
r51 r72 1 import sqlobject 2 1 3 import shakespeare.dm 2 4 … … 47 49 assert self.text == out1.text 48 50 51 class TestStatistic: 52 53 def setup_class(self): 54 self.name = 'test-123' 55 self.title = 'Hamlet' 56 self.text = shakespeare.dm.Material(name=self.name, title=self.title) 57 self.word = 'jones' 58 self.occurrences = 5 59 self.cc1 = shakespeare.dm.Statistic( 60 text=self.text, 61 word=self.word, 62 occurrences=self.occurrences 63 ) 64 65 def teardown_class(self): 66 shakespeare.dm.Statistic.delete(self.cc1.id) 67 shakespeare.dm.Material.delete(self.text.id) 68 69 def test1(self): 70 out1 = shakespeare.dm.Statistic.get(self.cc1.id) 71 assert self.text == out1.text 72 assert out1.occurrences == self.occurrences 73 74 def test_select(self): 75 tresults = shakespeare.dm.Statistic.select( 76 sqlobject.AND( 77 shakespeare.dm.Statistic.q.textID == self.text.id, 78 shakespeare.dm.Statistic.q.word == self.word, 79 )) 80 num = tresults.count() 81 assert num == 1 82
