Changeset 74
- Timestamp:
- 12/27/06 18:02:06 (3 years ago)
- Location:
- trunk/src/shakespeare
- Files:
-
- 2 modified
-
concordance.py (modified) (3 diffs)
-
concordance_test.py (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/shakespeare/concordance.py
r72 r74 89 89 90 90 words_to_ignore = [ 91 # 'a', 'the', 'and', 'as', 'are', 'be', 'but', ' d', 'in'91 # 'a', 'the', 'and', 'as', 'are', 'be', 'but', 'in' 92 92 ] 93 non_words = [ 94 'd', # accus'd 95 't', 96 ] 97 98 def is_roman_numeral(self, word): 99 digits = [ 'i', 'ii', 'iii', 'iv', 'v', 'vi', 'vii', 'viii', 'ix' ] 100 others = [ 'l', 'x', 'c' ] 101 if word == 'i': return False # exception because this conflicts with I 102 while word[0] in others: 103 if len(word) == 1: 104 return True 105 else: 106 word = word[1:] 107 return word in digits 108 109 def ignore_word(self, word): 110 "Return True if this word should not be added to the concordance." 111 bool1 = word in self.words_to_ignore 112 bool2 = word in self.non_words 113 # do roman numerals 114 bool3 = self.is_roman_numeral(word) 115 return bool1 or bool2 or bool3 93 116 94 117 def _text_already_done(self, text): … … 122 145 for match in self.word_regex.finditer(line): 123 146 word = match.group().lower() # case insensitive 124 if word in self.words_to_ignore:147 if self.ignore_word(word): 125 148 continue 126 149 shakespeare.dm.Concordance(connection=trans, … … 164 187 for rec in recs: 165 188 shakespeare.dm.Concordance.delete(rec.id) 189 stats = shakespeare.dm.Statistic.select( 190 shakespeare.dm.Statistic.q.textID==dmText.id 191 ) 192 for stat in stats: 193 shakespeare.dm.Statistic.delete(stat.id) 166 194 -
trunk/src/shakespeare/concordance_test.py
r72 r74 63 63 assert exp == out 64 64 65 def test_is_roman_numeral(self): 66 testvals = [ 'ii', 'v', 'vi', 'xi', 'xx', 'xxi', 'xlvi', 'c', 'cvi' ] 67 for val in testvals: 68 assert self.builder.is_roman_numeral(val) 69 70 def test_ignore_word(self): 71 testvals = [ 'd', 't' ] 72 for val in testvals: 73 assert self.builder.ignore_word(val) 74 65 75 def test_concordance(self): 66 76 for key, value in self.expConcordance.items():
