Changeset 72

Show
Ignore:
Timestamp:
12/27/06 18:09:08 (2 years ago)
Author:
rgrp
Message:

Create Statistic domain object separate from the Concordance object to try and improve performance.

Generating concordance page is very slow. Tracked this down to (a) generation of the word list (which involved iterating over every item in the concordance table) (b) caculating statistics from concorance table using count(*).

By creating a dedicated Statistic object holding occurrences per text and word hoped to improve both of these. With respect to item (a) had a fairly good speed improvement from 23s to ~3s on my mac osx using the sonnets as the corpora. On (b) did not get much improvement as still have to do one db read per word and count(*) is pretty efficient (the cost using SQLObject is all the db reads not the original db query). On my local machine still takes ~ 30s to load the concordance page :( -- looks like caching the html may be the simplest way forward.

  • trunk/src/shakespeare/dm.py: add Statistic domain object
  • trunk/src/shakespeare/dm_test.py: add relevant tests
  • trunk/src/shakespeare/concordance.py:
    • ConcordanceBuilder?.add_text: adapted it to write values into Statistic object
    • ConcordanceBase?.keys(): use Statistic objects to get word lists rather than Concordance objects
    • Statistics.get: use Statistic rather than Concordance (ironically was simpler when using Concordance)
    • make various related changes (_sql_filter -> sqlcc_filter and sqlstat_filter etc)
  • trunk/src/shakespeare/concordance_test.py: no new tests but some minor fixes to old ones (*not* related to the other changes though)

Previously had a Statistics object in shakespeare.concordance

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/src/shakespeare/concordance.py

    Revision 51 Revision 72
    1""" 1""" 
    2Concordance (and statistics) for texts in database. 2Concordance (and statistics) for texts in database. 
    3 3 
    4To build concordance use ConcordanceBuilder.  To access concordance/statistics 4To build concordance use ConcordanceBuilder.  To access concordance/statistics 
    5use Concordance/Statistics class.  Concordance and statistics are provided as 5use Concordance/Statistics class.  Concordance and statistics are provided as 
    6dictionaries keyed by words. 6dictionaries keyed by words. 
    7 7 
    8NB: all word keys have been lower-cased in order to render them 8NB: all word keys have been lower-cased in order to render them 
    9case-insensitive 9case-insensitive 
    10""" 10""" 
    11import re 11import re 
    12 12 
    13import sqlobject 13import sqlobject 
    14 14 
    15import shakespeare.index 15import shakespeare.index 
    16import shakespeare.cache 16import shakespeare.cache 
    17 17 
    18 18 
    19class ConcordanceBase(object): 19class ConcordanceBase(object): 
    20    """ 20    """ 
    21    TODO: caching?? 21    TODO: caching?? 
    22    """ 22    """ 
    23    sqlcc = shakespeare.dm.Concordance 23    sqlcc = shakespeare.dm.Concordance 
      24    sqlstat = shakespeare.dm.Statistic 
    24 25 
    25    def __init__(self, filter_names=None): 26    def __init__(self, filter_names=None): 
    26        """ 27        """ 
    27        @param filter_names: a list of id names with which to filter results 28        @param filter_names: a list of id names with which to filter results 
    28            (i.e. only return results relating to those texts) 29            (i.e. only return results relating to those texts) 
    29        """ 30        """ 
    30        self._filter_names = filter_names 31        self._filter_names = filter_names 
    31        # piece of sql to use in select to filter texts  32         self.sqlcc_filter = self._make_filter(self.sqlcc) 
    32        self._sql_filter = True  33         self.sqlstat_filter = self._make_filter(self.sqlstat) 
       34  
       35     def _make_filter(self, sqlobj): 
       36         sql_filter = True 
    33        if self._filter_names is not None: 37        if self._filter_names is not None: 
    34            arglist = [] 38            arglist = [] 
    35            for name in self._filter_names: 39            for name in self._filter_names: 
    36                newarg = self.sqlcc.q.textID == self._name2id(name) 40                newarg = sqlobj.q.textID == self._name2id(name) 
    37                arglist.append(newarg) 41                arglist.append(newarg) 
    38            self._sql_filter = sqlobject.OR(*arglist)  42             sql_filter = sqlobject.OR(*arglist) 
       43         return sql_filter 
    39     44     
    40    def _name2id(self, name): 45    def _name2id(self, name): 
    41        return shakespeare.dm.Material.byName(name).id 46        return shakespeare.dm.Material.byName(name).id 
    42 47 
    43    def keys(self): 48    def keys(self): 
    44        """Return list of words in concordance 49        """Return list of *distinct* words in concordance/statistics 
    45        """ 50        """ 
    46        # distinct does not help us because we need to DISTINCT word 51        all = self.sqlstat.select(self.sqlstat_filter, 
    47        # but can't do this with sqlobject 52                           orderBy=self.sqlstat.q.word, 
    48        all = self.sqlcc.select(self._sql_filter, 53                           ) 
    49                           orderBy=self.sqlcc.q.word,   
    50                           distinct=True)   
    51        words = [ xx.word for xx in list(all) ] 54        words = [ xx.word for xx in list(all) ] 
    52        distinct = list(set(words)) 55        distinct = list(set(words)) 
    53        distinct.sort() 56        distinct.sort() 
    54        return distinct 57        return distinct 
    55 58 
    56 59 
    57class Concordance(ConcordanceBase): 60class Concordance(ConcordanceBase): 
    58    """Concordance by word for a set of texts 61    """Concordance by word for a set of texts 
    59    """ 62    """ 
    60 63 
    61    def get(self, word): 64    def get(self, word): 
    62        """Get list of occurrences for word 65        """Get list of occurrences for word 
    63        @return: sqlobject query list  66        @return: sqlobject query list  
    64        """ 67        """ 
    65        select = self.sqlcc.select(sqlobject.AND(self._sql_filter, self.sqlcc.q.word==word)) 68        select = self.sqlcc.select(sqlobject.AND(self.sqlcc_filter, self.sqlcc.q.word==word)) 
    66        return select 69        return select 
    67 70 
    68class Statistics(ConcordanceBase): 71class Statistics(ConcordanceBase): 
    69 72 
    70    def get(self, word): 73    def get(self, word): 
    71        select = self.sqlcc.select( 74        select = self.sqlstat.select( 
    72            sqlobject.AND(self._sql_filter, self.sqlcc.q.word==word) 75            sqlobject.AND(self.sqlstat_filter, self.sqlstat.q.word==word) 
    73            ) 76            ) 
    74        return select.count()  77         total = 0 
       78         for stat in select: 
       79             total += stat.occurrences 
       80         return total 
    75 81 
    76class ConcordanceBuilder(object): 82class ConcordanceBuilder(object): 
    77    """Build a concordance and associated statistics for a set of texts. 83    """Build a concordance and associated statistics for a set of texts. 
    78     84     
    79    """ 85    """ 
    80 86 
    81    # multiline, unicode and ignorecase 87    # multiline, unicode and ignorecase 
    82    word_regex = re.compile(r'\b(\w+)\b', re.U | re.M | re.I) 88    word_regex = re.compile(r'\b(\w+)\b', re.U | re.M | re.I) 
    83 89 
    84    words_to_ignore = [  90    words_to_ignore = [  
    85        # 'a', 'the', 'and', 'as', 'are', 'be', 'but', 'd', 'in' 91        # 'a', 'the', 'and', 'as', 'are', 'be', 'but', 'd', 'in' 
    86                        ] 92                        ] 
    87 93 
    88    def _text_already_done(self, text): 94    def _text_already_done(self, text): 
    89        numrecs = shakespeare.dm.Concordance.select( 95        numrecs = shakespeare.dm.Concordance.select( 
    90                shakespeare.dm.Concordance.q.textID==text.id 96                shakespeare.dm.Concordance.q.textID==text.id 
    91                ).count() 97                ).count() 
    92        return numrecs > 0 98        return numrecs > 0 
    93 99 
    94    def add_text(self, name, text=None): 100    def add_text(self, name, text=None): 
    95        """Add a text to the concordance. 101        """Add a text to the concordance. 
    96        @param name: name of text to add 102        @param name: name of text to add 
    97        @param text: [optional] a file-like object containing text data. If not 103        @param text: [optional] a file-like object containing text data. If not 
    98            provided will default to using file in cache associated with named 104            provided will default to using file in cache associated with named 
    99            text 105            text 
    100        """ 106        """ 
    101        dmText = shakespeare.dm.Material.byName(name) 107        dmText = shakespeare.dm.Material.byName(name) 
    102        if self._text_already_done(dmText): 108        if self._text_already_done(dmText): 
    103            msg = 'Have already added to concordance text: %s' % dmText 109            msg = 'Have already added to concordance text: %s' % dmText 
    104            # raise ValueError(msg) 110            # raise ValueError(msg) 
    105            print msg 111            print msg 
    106            print 'Skipping' 112            print 'Skipping' 
    107            return 113            return 
    108        if text is None: 114        if text is None: 
    109            tpath = dmText.get_cache_path('plain') 115            tpath = dmText.get_cache_path('plain') 
    110            text = file(tpath) 116            text = file(tpath) 
    111        lineCount = 0 117        lineCount = 0 
    112        charIndex = 0 118        charIndex = 0 
      119        stats = {} 
    113        trans = shakespeare.dm.Concordance._connection.transaction() 120        trans = shakespeare.dm.Concordance._connection.transaction() 
    114        for line in text.readlines(): 121        for line in text.readlines(): 
    115            for match in self.word_regex.finditer(line): 122            for match in self.word_regex.finditer(line): 
    116                word = match.group().lower() # case insensitive 123                word = match.group().lower() # case insensitive 
    117                if word in self.words_to_ignore: 124                if word in self.words_to_ignore: 
    118                    continue 125                    continue 
    119                shakespeare.dm.Concordance(connection=trans, 126                shakespeare.dm.Concordance(connection=trans, 
    120                                           text=dmText, 127                                           text=dmText, 
    121                                           word=word, 128                                           word=word, 
    122                                           line=lineCount, 129                                           line=lineCount, 
    123                                           char_index=charIndex+match.start()) 130                                           char_index=charIndex+match.start()) 
      131                stats[word] = stats.get(word, 0) + 1 
    124            lineCount += 1 132            lineCount += 1 
    125            charIndex += len(line) 133            charIndex += len(line) 
    126        trans.commit() 134        trans.commit() 
      135        trans = shakespeare.dm.Concordance._connection.transaction() 
      136        for word, value in stats.items(): 
      137            tresults  = shakespeare.dm.Statistic.select( 
      138                sqlobject.AND( 
      139                    shakespeare.dm.Statistic.q.textID == dmText.id, 
      140                    shakespeare.dm.Statistic.q.word == word 
      141                    )) 
      142            try: 
      143                dbstat = list(tresults)[0] 
      144                dbstat.occurrences += value 
      145            except: 
      146                shakespeare.dm.Statistic( 
      147                        connection=trans, 
      148                        text=dmText, 
      149                        word=word, 
      150                        occurrences=value 
      151                        ) 
      152        trans.commit() 
      153 
    127 154 
    128    def remove_text(self, name): 155    def remove_text(self, name): 
    129        """Remove a text from the concordance. 156        """Remove a text from the concordance. 
    130 157 
    131        @param name: as for add_text 158        @param name: as for add_text 
    132        """ 159        """ 
    133        dmText = shakespeare.dm.Material.byName(name) 160        dmText = shakespeare.dm.Material.byName(name) 
    134        recs = shakespeare.dm.Concordance.select( 161        recs = shakespeare.dm.Concordance.select( 
    135                shakespeare.dm.Concordance.q.textID==dmText.id 162                shakespeare.dm.Concordance.q.textID==dmText.id 
    136                ) 163                ) 
    137        for rec in recs: 164        for rec in recs: 
    138            shakespeare.dm.Concordance.delete(rec.id) 165            shakespeare.dm.Concordance.delete(rec.id) 
    139 166 
  • trunk/src/shakespeare/concordance_test.py

    Revision 40 Revision 72
    1import unittest 1import unittest 
    2import StringIO 2import StringIO 
    3import tempfile 3import tempfile 
    4 4 
    5 5 
    6import shakespeare.index 6import shakespeare.index 
    7import shakespeare.concordance 7import shakespeare.concordance 
    8 8 
    9class TestConcordancer: 9class TestConcordancer: 
    10 10 
    11    inText = \ 11    inText = \ 
    12"""A fake fake line 12"""A fake fake line 
    13SUFFOLK. 13SUFFOLK. 
    14As by your high imperial Majesty 14As by your high imperial Majesty 
    15I had in charge at my depart for France, 15I had in charge at my depart for France, 
    16As procurator to your excellence, 16As procurator to your excellence, 
      17A fake imperial line. 
    17""" 18""" 
    18    name = 'test-concordance' 19    name = 'test-concordance' 
    19    title = 'Hamlet' 20    title = 'Hamlet' 
    20     21     
    21    # ['work_id', 'line-no', 'character-index'] } 22    # ['work_id', 'line-no', 'character-index'] } 
      23    # incomplete 
    22    expConcordance = { 24    expConcordance = { 
    23        'fake' : [ (name, 0, 2), (name, 0, 7) ], 25        'fake' : [ (name, 0, 2), (name, 0, 7), (name, 5, 136) ], 
    24        'suffolk' : [ (name, 1, 17), ], 26        'suffolk' : [ (name, 1, 17), ], 
    25        'high' : [ (name, 2, 37), ], 27        'high' : [ (name, 2, 37), ], 
    26        'word_that_is_not_there' : [], 28        'word_that_is_not_there' : [], 
    27        } 29        } 
    28 30 
      31    # incomplete 
    29    expStats = { 32    expStats = { 
    30        'fake' : 2,  33         'fake' : 3, 
       34         'imperial' : 2, 
    31        'suffolk' : 1, 35        'suffolk' : 1, 
    32        'high' : 1, 36        'high' : 1, 
    33        'word_that_is_not_there' : 0, 37        'word_that_is_not_there' : 0, 
    34        } 38        } 
    35 39 
    36    def setup_class(cls): 40    def setup_class(cls): 
    37        cls.builder = shakespeare.concordance.ConcordanceBuilder() 41        cls.builder = shakespeare.concordance.ConcordanceBuilder() 
    38        # try deleting it first so as to be more robust to errors 42        # try deleting it first so as to be more robust to errors 
    39        # does not seem to work with the class methods 43        # does not seem to work with the class methods 
    40        # cls.teardown_class(cls) 44        # cls.teardown_class(cls) 
    41        cls.text = shakespeare.dm.Material(name=cls.name, title=cls.title) 45        cls.text = shakespeare.dm.Material(name=cls.name, title=cls.title) 
    42        cls.builder.add_text(cls.name, StringIO.StringIO(cls.inText)) 46        cls.builder.add_text(cls.name, StringIO.StringIO(cls.inText)) 
    43        cls.concordance = shakespeare.concordance.Concordance([cls.name]) 47        cls.concordance = shakespeare.concordance.Concordance([cls.name]) 
    44        cls.statistics = shakespeare.concordance.Statistics([cls.name]) 48        cls.statistics = shakespeare.concordance.Statistics([cls.name]) 
    45 49 
    46    def teardown_class(cls): 50    def teardown_class(cls): 
    47        # allow us to deal with left over stuff from previous errors 51        # allow us to deal with left over stuff from previous errors 
    48        try: 52        try: 
    49            cls.builder.remove_text(cls.name) 53            cls.builder.remove_text(cls.name) 
    50            tmp = shakespeare.dm.Material.byName(cls.name) 54            tmp = shakespeare.dm.Material.byName(cls.name) 
    51            shakespeare.dm.Material.delete(tmp.id) 55            shakespeare.dm.Material.delete(tmp.id) 
    52        except: 56        except: 
    53            pass 57            pass 
    54 58 
    55    def test__process_line(self): 59    def test__process_line(self): 
    56        line = 'the - quick, brown. fox-jumped over$ the_lazy do8g.' 60        line = 'the - quick, brown. fox-jumped over$ the_lazy do8g.' 
    57        exp = ['the', 'quick', 'brown', 'fox', 'jumped', 'over', 'the_lazy', 'do8g' ] 61        exp = ['the', 'quick', 'brown', 'fox', 'jumped', 'over', 'the_lazy', 'do8g' ] 
    58        out = self.builder.word_regex.findall(line) 62        out = self.builder.word_regex.findall(line) 
    59        assert exp == out 63        assert exp == out 
    60 64 
    61    def test_concordance(self): 65    def test_concordance(self): 
    62        for key, value in self.expConcordance.items(): 66        for key, value in self.expConcordance.items(): 
    63            listing = list(self.concordance.get(key)) 67            listing = list(self.concordance.get(key)) 
    64            listing.reverse(68            assert len(listing) == len(value
    65            out = [ (xx.text.name, xx.line, xx.char_index) for xx in listing ] 69            for xx in listing: 
    66            assert out == value 70                assert (xx.text.name, xx.line, xx.char_index) in value 
    67 71 
    68    def test_stats(self): 72    def test_stats(self): 
    69        for key, value in self.expStats.items(): 73        for key, value in self.expStats.items(): 
    70            out = self.statistics.get(key) 74            out = self.statistics.get(key) 
      75            print key 
    71            assert out == value 76            assert out == value 
    72 77 
    73    def test_keys(self): 78    def test_keys(self): 
    74        words = self.concordance.keys() 79        words = self.concordance.keys() 
    75        assert 'a' == words[0] 80        assert 'a' == words[0] 
    76        assert 'your' == words[-1] 81        assert 'your' == words[-1] 
    77        assert 22 == len(words) 82        assert 22 == len(words) 
  • trunk/src/shakespeare/dm.py

    Revision 51 Revision 72
    1""" 1""" 
    2Domain model 2Domain model 
    3 3 
    4Material contains all data we have including shakespeare texts. A text is taken 4Material contains all data we have including shakespeare texts. A text is taken 
    5to be a specific version of a work. e.g. the 1623 folio of King Richard III. 5to be a specific version of a work. e.g. the 1623 folio of King Richard III. 
    6 6 
    7We may in future add a Work object to refer to 'abstract' work of which a given 7We may in future add a Work object to refer to 'abstract' work of which a given 
    8text is a version. 8text is a version. 
    9""" 9""" 
    10import sqlobject 10import sqlobject 
    11 11 
    12import shakespeare 12import shakespeare 
    13import shakespeare.cache 13import shakespeare.cache 
    14 14 
    15uri = shakespeare.conf().get('db', 'uri') 15uri = shakespeare.conf().get('db', 'uri') 
    16__connection__ = sqlobject.connectionForURI(uri) 16__connection__ = sqlobject.connectionForURI(uri) 
    17 17 
    18# note we run this at bottom of module to auto create db tables on import 18# note we run this at bottom of module to auto create db tables on import 
    19def createdb(): 19def createdb(): 
    20    Material.createTable(ifNotExists=True) 20    Material.createTable(ifNotExists=True) 
    21    Concordance.createTable(ifNotExists=True) 21    Concordance.createTable(ifNotExists=True) 
      22    Statistic.createTable(ifNotExists=True) 
    22 23 
    23def cleandb(): 24def cleandb(): 
      25    Statistic.dropTable(ifExists=True) 
    24    Concordance.dropTable(ifExists=True) 26    Concordance.dropTable(ifExists=True) 
    25    Material.dropTable(ifExists=True) 27    Material.dropTable(ifExists=True) 
    26 28 
    27def rebuilddb(): 29def rebuilddb(): 
    28    cleandb() 30    cleandb() 
    29    createdb() 31    createdb() 
    30 32 
    31class Material(sqlobject.SQLObject): 33class Material(sqlobject.SQLObject): 
    32    """Material related to Shakespeare (usually text of works and ancillary 34    """Material related to Shakespeare (usually text of works and ancillary 
    33    matter such as introductions). 35    matter such as introductions). 
    34 36 
    35    NB: can not use 'text' as class name as it is an sql reserved word 37    NB: can not use 'text' as class name as it is an sql reserved word 
    36 38 
    37    @attribute name: a unique name identifying the material 39    @attribute name: a unique name identifying the material 
    38     40     
    39    TODO: mutiple creators ?? 41    TODO: mutiple creators ?? 
    40    """ 42    """ 
    41     43     
    42    name = sqlobject.StringCol(alternateID=True) 44    name = sqlobject.StringCol(alternateID=True) 
    43    title = sqlobject.StringCol(default=None, length=255) 45    title = sqlobject.StringCol(default=None, length=255) 
    44    # creator rather than author to fit with dublin core 46    # creator rather than author to fit with dublin core 
    45    creator = sqlobject.StringCol(default=None, length=255) 47    creator = sqlobject.StringCol(default=None, length=255) 
    46    url = sqlobject.StringCol(default=None, length=255) 48    url = sqlobject.StringCol(default=None, length=255) 
    47    notes = sqlobject.StringCol(default=None) 49    notes = sqlobject.StringCol(default=None) 
    48 50 
    49    def get_cache_path(self, format): 51    def get_cache_path(self, format): 
    50        """Get path within cache to data file associated with this material. 52        """Get path within cache to data file associated with this material. 
    51        @format: the version ('plain', original='' etc) 53        @format: the version ('plain', original='' etc) 
    52        """ 54        """ 
    53        return shakespeare.cache.default.path(self.url, format) 55        return shakespeare.cache.default.path(self.url, format) 
    54 56 
    55class Concordance(sqlobject.SQLObject): 57class Concordance(sqlobject.SQLObject): 
    56 58 
    57    text = sqlobject.ForeignKey('Material') 59    text = sqlobject.ForeignKey('Material') 
    58    word = sqlobject.StringCol(length=50) 60    word = sqlobject.StringCol(length=50) 
    59    line = sqlobject.IntCol() 61    line = sqlobject.IntCol() 
    60    char_index = sqlobject.IntCol() 62    char_index = sqlobject.IntCol() 
    61 63 
    62    word_index = sqlobject.DatabaseIndex('word') 64    word_index = sqlobject.DatabaseIndex('word') 
    63    text_index = sqlobject.DatabaseIndex('text') 65    text_index = sqlobject.DatabaseIndex('text') 
    64 66 
      67class Statistic(sqlobject.SQLObject): 
      68 
      69    text = sqlobject.ForeignKey('Material') 
      70    word = sqlobject.StringCol(length=50) 
      71    occurrences = sqlobject.IntCol(default=1) 
      72 
      73    word_index = sqlobject.DatabaseIndex('word') 
      74    text_index = sqlobject.DatabaseIndex('text') 
      75 
    65 76 
    66# auto create db tables on import 77# auto create db tables on import 
    67createdb() 78createdb() 
    68 79 
  • trunk/src/shakespeare/dm_test.py

    Revision 51 Revision 72
      1import sqlobject 
      2 
    1import shakespeare.dm 3import shakespeare.dm 
    2 4 
    3class TestMaterial: 5class TestMaterial: 
    4 6 
    5    def setup_class(self): 7    def setup_class(self): 
    6        self.name = 'test-123' 8        self.name = 'test-123' 
    7        self.title = 'Hamlet' 9        self.title = 'Hamlet' 
    8        self.url = 'http://www.openshakespeare.org/blah.txt' 10        self.url = 'http://www.openshakespeare.org/blah.txt' 
    9        self.text = shakespeare.dm.Material(name=self.name, 11        self.text = shakespeare.dm.Material(name=self.name, 
    10                title=self.title, url=self.url) 12                title=self.title, url=self.url) 
    11 13 
    12    def teardown_class(self): 14    def teardown_class(self): 
    13        shakespeare.dm.Material.delete(self.text.id) 15        shakespeare.dm.Material.delete(self.text.id) 
    14     16     
    15    def test1(self): 17    def test1(self): 
    16        txtid = self.text.id 18        txtid = self.text.id 
    17        txt2 = shakespeare.dm.Material.get(txtid) 19        txt2 = shakespeare.dm.Material.get(txtid) 
    18        txt3 = shakespeare.dm.Material.byName(self.name) 20        txt3 = shakespeare.dm.Material.byName(self.name) 
    19        assert self.text.id == txt2.id 21        assert self.text.id == txt2.id 
    20        assert self.text.id == txt3.id 22        assert self.text.id == txt3.id 
    21     23     
    22    def test_get_cache_path(self): 24    def test_get_cache_path(self): 
    23        out = self.text.get_cache_path('plain') 25        out = self.text.get_cache_path('plain') 
    24        # do not want anything too specific or we end up duplicating cache_test 26        # do not want anything too specific or we end up duplicating cache_test 
    25        assert len(out) > 0 27        assert len(out) > 0 
    26 28 
    27class TestConcordance: 29class TestConcordance: 
    28 30 
    29    def setup_class(self): 31    def setup_class(self): 
    30        self.name = 'test-123' 32        self.name = 'test-123' 
    31        self.title = 'Hamlet' 33        self.title = 'Hamlet' 
    32        self.text = shakespeare.dm.Material(name=self.name, title=self.title) 34        self.text = shakespeare.dm.Material(name=self.name, title=self.title) 
    33        word = 'jones' 35        word = 'jones' 
    34        line = 20 36        line = 20 
    35        char_index = 500 37        char_index = 500 
    36        self.cc1 = shakespeare.dm.Concordance(text=self.text, 38        self.cc1 = shakespeare.dm.Concordance(text=self.text, 
    37                                         word=word, 39                                         word=word, 
    38                                         line=line, 40                                         line=line, 
    39                                         char_index=char_index) 41                                         char_index=char_index) 
    40 42 
    41    def teardown_class(self): 43    def teardown_class(self): 
    42        shakespeare.dm.Concordance.delete(self.cc1.id) 44        shakespeare.dm.Concordance.delete(self.cc1.id) 
    43        shakespeare.dm.Material.delete(self.text.id) 45        shakespeare.dm.Material.delete(self.text.id) 
    44 46 
    45    def test1(self): 47    def test1(self): 
    46        out1 = shakespeare.dm.Concordance.get(self.cc1.id) 48        out1 = shakespeare.dm.Concordance.get(self.cc1.id) 
    47        assert self.text == out1.text 49        assert self.text == out1.text 
    48 50 
      51class TestStatistic: 
      52 
      53    def setup_class(self): 
      54        self.name = 'test-123' 
      55        self.title = 'Hamlet' 
      56        self.text = shakespeare.dm.Material(name=self.name, title=self.title) 
      57        self.word = 'jones' 
      58        self.occurrences = 5 
      59        self.cc1 = shakespeare.dm.Statistic( 
      60                text=self.text, 
      61                word=self.word, 
      62                occurrences=self.occurrences 
      63                ) 
      64 
      65    def teardown_class(self): 
      66        shakespeare.dm.Statistic.delete(self.cc1.id) 
      67        shakespeare.dm.Material.delete(self.text.id) 
      68 
      69    def test1(self): 
      70        out1 = shakespeare.dm.Statistic.get(self.cc1.id) 
      71        assert self.text == out1.text 
      72        assert out1.occurrences == self.occurrences 
      73 
      74    def test_select(self): 
      75        tresults  = shakespeare.dm.Statistic.select( 
      76            sqlobject.AND( 
      77                shakespeare.dm.Statistic.q.textID == self.text.id, 
      78                shakespeare.dm.Statistic.q.word == self.word, 
      79                )) 
      80        num = tresults.count() 
      81        assert num == 1 
      82