Changeset 169
- Timestamp:
- 07/19/08 19:43:05 (1 month ago)
- Files:
-
- trunk/shakespeare.egg-info/paste_deploy_config.ini_tmpl (modified) (1 diff)
- trunk/shakespeare/__init__.py (modified) (1 diff)
- trunk/shakespeare/cli.py (modified) (1 diff)
- trunk/shakespeare/search.py (copied) (copied from milton/textsearch.py) (1 diff)
- trunk/shakespeare/tests/search_test.py (added)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/shakespeare.egg-info/paste_deploy_config.ini_tmpl
Revision 155 Revision 169 1 # 1 # 2 # shakespeare - Pylons configuration 2 # shakespeare - Pylons configuration 3 # 3 # 4 # The %(here)s variable will be replaced with the parent directory of this file 4 # The %(here)s variable will be replaced with the parent directory of this file 5 # 5 # 6 [DEFAULT] 6 [DEFAULT] 7 debug = true 7 debug = true 8 email_to = you@yourdomain.com 8 email_to = you@yourdomain.com 9 smtp_server = localhost 9 smtp_server = localhost 10 error_email_from = paste@localhost 10 error_email_from = paste@localhost 11 11 12 12 13 # Cache directory where cached copies of downloaded materials can be stored 13 # Cache directory where cached copies of downloaded materials can be stored 14 # 14 # 15 # This directory needs to be semi-permanent so do *not* put under a location 15 # This directory needs to be semi-permanent so do *not* put under a location 16 # such as /tmp. 16 # such as /tmp. 17 # 17 # 18 # At present should be different from the app's cache_dir 18 # At present should be different from the app's cache_dir 19 cachedir = ./cache 19 cachedir = cache 20 21 # Directory for Xapian search index 22 search_index_dir = searchindex 23 20 24 21 [server:main] 25 [server:main] 22 use = egg:Paste#http 26 use = egg:Paste#http 23 host = 0.0.0.0 27 host = 0.0.0.0 24 port = 5000 28 port = 5000 25 29 26 [app:main] 30 [app:main] 27 use = egg:shakespeare 31 use = egg:shakespeare 28 full_stack = true 32 full_stack = true 29 cache_dir = %(here)s/data 33 cache_dir = %(here)s/data 30 beaker.session.key = shakespeare 34 beaker.session.key = shakespeare 31 beaker.session.secret = ${app_instance_secret} 35 beaker.session.secret = ${app_instance_secret} 32 app_instance_uuid = ${app_instance_uuid} 36 app_instance_uuid = ${app_instance_uuid} 33 37 34 # If you'd like to fine-tune the individual locations of the cache data dirs 38 # If you'd like to fine-tune the individual locations of the cache data dirs 35 # for the Cache data, or the Session saves, un-comment the desired settings 39 # for the Cache data, or the Session saves, un-comment the desired settings 36 # here: 40 # here: 37 #beaker.cache.data_dir = %(here)s/data/cache 41 #beaker.cache.data_dir = %(here)s/data/cache 38 #beaker.session.data_dir = %(here)s/data/sessions 42 #beaker.session.data_dir = %(here)s/data/sessions 39 43 40 # WARNING: *THE LINE BELOW MUST BE UNCOMMENTED ON A PRODUCTION ENVIRONMENT* 44 # WARNING: *THE LINE BELOW MUST BE UNCOMMENTED ON A PRODUCTION ENVIRONMENT* 41 # Debug mode will enable the interactive debugging tool, allowing ANYONE to 45 # Debug mode will enable the interactive debugging tool, allowing ANYONE to 42 # execute malicious code after an exception is raised. 46 # execute malicious code after an exception is raised. 43 set debug = false 47 set debug = false 44 48 45 # using sqlite in memory leads to thread issues when using db ... 49 # using sqlite in memory leads to thread issues when using db ... 46 # sqlobject.dburi = sqlite:///:memory: 50 # sqlobject.dburi = sqlite:///:memory: 47 sqlobject.dburi = postgres://<username>:<password>@localhost/<your-dbname> 51 sqlobject.dburi = postgres://<username>:<password>@localhost/<your-dbname> 48 52 49 # Logging configuration 53 # Logging configuration 50 [loggers] 54 [loggers] 51 keys = root 55 keys = root 52 56 53 [handlers] 57 [handlers] 54 keys = console 58 keys = console 55 59 56 [formatters] 60 [formatters] 57 keys = generic 61 keys = generic 58 62 59 [logger_root] 63 [logger_root] 60 level = INFO 64 level = INFO 61 handlers = console 65 handlers = console 62 66 63 [handler_console] 67 [handler_console] 64 class = StreamHandler 68 class = StreamHandler 65 args = (sys.stderr,) 69 args = (sys.stderr,) 66 level = NOTSET 70 level = NOTSET 67 formatter = generic 71 formatter = generic 68 72 69 [formatter_generic] 73 [formatter_generic] 70 format = %(asctime)s %(levelname)-5.5s [%(name)s] %(message)s 74 format = %(asctime)s %(levelname)-5.5s [%(name)s] %(message)s 71 75 72 76 73 [misc] 77 [misc] 74 # directory where we can store all local copies of texts 78 # directory where we can store all local copies of texts 75 cachedir = ./cache 79 cachedir = ./cache 76 80 77 [db] 81 [db] 78 # sqlobject database uri. see sqlobject documentation for details 82 # sqlobject database uri. see sqlobject documentation for details 79 # uri = postgres://user:pass@host/dbname 83 # uri = postgres://user:pass@host/dbname 80 uri = sqlite:/:memory: 84 uri = sqlite:/:memory: 81 85 82 [web] 86 [web] 83 # directory where the templates used by web front end are kept 87 # directory where the templates used by web front end are kept 84 template_dir = ./src/shakespeare/template 88 template_dir = ./src/shakespeare/template 85 89 86 [annotater] 90 [annotater] 87 # url at which marginalia files (css/js etc) should be mounted 91 # url at which marginalia files (css/js etc) should be mounted 88 marginalia_prefix = /marginalia 92 marginalia_prefix = /marginalia trunk/shakespeare/__init__.py
Revision 155 Revision 169 1 ''' 1 ''' 2 Introduction 2 Introduction 3 ************ 3 ************ 4 4 5 The Open Shakespeare package provides a full open set of shakespeare's works 5 The Open Shakespeare package provides a full open set of shakespeare's works 6 (often in multiple versions) along with ancillary material, a variety of tools 6 (often in multiple versions) along with ancillary material, a variety of tools 7 and a python API. 7 and a python API. 8 8 9 Specifically in addition to the works themselves (often in multiple versions) 9 Specifically in addition to the works themselves (often in multiple versions) 10 there is an introduction, a chronology, explanatory notes, a concordance and 10 there is an introduction, a chronology, explanatory notes, a concordance and 11 search facilities. 11 search facilities. 12 12 13 All material is open source/open knowledge so that anyone can use, redistribute 13 All material is open source/open knowledge so that anyone can use, redistribute 14 and reuse these materials freely. For exact details of the license under which 14 and reuse these materials freely. For exact details of the license under which 15 this package is made available please see COPYING.txt. 15 this package is made available please see COPYING.txt. 16 16 17 Open Shakespeare has been developed under the aegis of the Open Knowledge 17 Open Shakespeare has been developed under the aegis of the Open Knowledge 18 Foundation (http://www.okfn.org/). 18 Foundation (http://www.okfn.org/). 19 19 20 Contact the Project 20 Contact the Project 21 ******************* 21 ******************* 22 22 23 Please mail info@okfn.org or join the okfn-discuss mailing list: 23 Please mail info@okfn.org or join the okfn-discuss mailing list: 24 24 25 http://lists.okfn.org/listinfo/okfn-discuss 25 http://lists.okfn.org/listinfo/okfn-discuss 26 26 27 27 28 Installation and Setup 28 Installation and Setup 29 ********************** 29 ********************** 30 30 31 1. Install the code 31 1. Install the code 32 =================== 32 =================== 33 33 34 1.1: (EITHER) Install using setup.py (preferred) 34 1.1: (EITHER) Install using setup.py (preferred) 35 ------------------------------------------------ 35 ------------------------------------------------ 36 36 37 Install ``shakespeare`` using easy_install:: 37 Install ``shakespeare`` using easy_install:: 38 38 39 easy_install shakespeare 39 easy_install shakespeare 40 40 41 NB: If you don't have easy_install you can get from here: 41 NB: If you don't have easy_install you can get from here: 42 42 43 <http://peak.telecommunity.com/DevCenter/EasyInstall#installation-instructions> 43 <http://peak.telecommunity.com/DevCenter/EasyInstall#installation-instructions> 44 44 45 45 46 1.2 (OR) Get the code straight from subversion 46 1.2 (OR) Get the code straight from subversion 47 ------------------------------------------------ 47 ------------------------------------------------ 48 48 49 1. Check out the subversion trunk:: 49 1. Check out the subversion trunk:: 50 50 51 svn co https://knowledgeforge.net/shakespeare/svn/trunk 51 svn co https://knowledgeforge.net/shakespeare/svn/trunk 52 52 53 2. Do:: 53 2. Do:: 54 54 55 sudo python setup.py develop 55 sudo python setup.py develop 56 56 57 57 58 2. Setup Package 58 Getting Started 59 ================ 59 *************** 60 60 61 Make a config file as follows:: 61 As a user: 62 ========== 62 63 63 paster make-config shakespeare config.ini 64 1. Basic setup 65 -------------- 66 67 To access most of the main features of Open Shakespeare you need a database. 68 For this an other bits and bobs of configuration you will need a configuration 69 file. 70 71 You can make a config file as follows:: 72 73 paster make-config shakespeare {your-config.ini} 64 74 65 Tweak the config file as appropriate and then setup the application:: 75 Tweak the config file as appropriate and then setup the application:: 66 76 67 paster setup-app config.ini 77 paster setup-app config.ini 68 78 69 79 [TODO: this should be part of setup-app] 70 3. Initialize the system 71 ======================== 72 80 73 Run:: 81 Run:: 74 82 75 $ shakespeare-admin db create 83 $ shakespeare-admin db create 76 $ shakespeare-admin db init 84 $ shakespeare-admin db init 77 85 78 If you want to build the concordance do:: 86 2. Extras 87 --------- 79 88 80 $ shakespeare-admin concordance 89 1. Search index. [TODO] 81 90 82 NB: This may take some time to run so be patient. TIP: using sqlite building 91 2. You can start a web server to provide a easy-to-use web interface to the 83 the concordance really **does** seem to run forever so recommend using 92 shakespeare material and facilities by doing:: 84 postgresql or mysql if you are going to build the concordance. 85 86 87 Getting Started 88 *************** 89 90 As a user: 91 ========== 92 93 Start up the web interface by running the webserver:: 94 93 95 $ paster serve {your-config.ini} 94 $ paster serve {your-config.ini} 96 95 97 NB: {your-config.ini} should be replaced with the name of the config file you 96 NB: {your-config.ini} should be replaced with the name of the config file you 98 created earlier. 97 created earlier. 99 98 100 99 101 As a developer: 100 As a developer: 102 =============== 101 =============== 103 102 104 0. Copy development.ini.tmpl to development.ini and edit to your taste. 103 0. Setup 104 -------- 105 105 106 1. Check out the administrative commands: $ bin/shakespeare-admin help. 106 Follow the basic steps above put with an ini file named: development.ini 107 108 NB: you'll probably want to change log levels to debug. 109 110 1. Check out the administrative commands 111 ---------------------------------------- 112 113 $ bin/shakespeare-admin help. 107 114 108 2. Run the tests using either py.test of nosetests:: 115 2. Run the tests using either py.test of nosetests:: 116 ---------------------------------------------------- 109 117 110 $ nosetests shakespeare 118 $ nosetests shakespeare 111 ''' 119 ''' 112 __version__ = '0.5dev' 120 __version__ = '0.5dev' 113 __application_name__ = 'shakespeare' 121 __application_name__ = 'shakespeare' 114 122 115 def conf(): 123 def conf(): 116 import os 124 import os 117 defaultPath = os.path.abspath('./development.ini') 125 defaultPath = os.path.abspath('./development.ini') 118 envVarName = __application_name__.upper() + 'CONF' 126 envVarName = __application_name__.upper() + 'CONF' 119 confPath = os.environ.get(envVarName, defaultPath) 127 confPath = os.environ.get(envVarName, defaultPath) 120 if not os.path.exists(confPath): 128 if not os.path.exists(confPath): 121 raise ValueError('No Configuration file exists at: %s' % confPath) 129 raise ValueError('No Configuration file exists at: %s' % confPath) 122 130 123 # register the config 131 # register the config 124 import paste.deploy 132 import paste.deploy 125 import shakespeare.config.environment 133 import shakespeare.config.environment 126 pasteconf = paste.deploy.appconfig('config:' + confPath) 134 pasteconf = paste.deploy.appconfig('config:' + confPath) 127 135 128 shakespeare.config.environment.load_environment(pasteconf.global_conf, 136 shakespeare.config.environment.load_environment(pasteconf.global_conf, 129 pasteconf.local_conf) 137 pasteconf.local_conf) 130 from pylons import config 138 from pylons import config 131 conf = config 139 conf = config 132 140 133 # import ConfigParser 141 # import ConfigParser 134 # conf = ConfigParser.SafeConfigParser() 142 # conf = ConfigParser.SafeConfigParser() 135 # conf.read(confPath) 143 # conf.read(confPath) 136 144 137 return conf 145 return conf 138 146 trunk/shakespeare/cli.py
Revision 155 Revision 169 1 #!/usr/bin/env python 1 #!/usr/bin/env python 2 2 3 import cmd 3 import cmd 4 import os 4 import os 5 import StringIO 5 import StringIO 6 6 7 class ShakespeareAdmin(cmd.Cmd): 7 class ShakespeareAdmin(cmd.Cmd): 8 """ 8 """ 9 TODO: self.verbose option and associated self._print 9 TODO: self.verbose option and associated self._print 10 """ 10 """ 11 12 def __init__(self, verbose=False): 13 # cmd.Cmd is not a new style class 14 cmd.Cmd.__init__(self) 15 self.verbose = verbose 11 16 12 prompt = 'The Bard > ' 17 prompt = 'The Bard > ' 13 18 14 def run_interactive(self, line=None): 19 def run_interactive(self, line=None): 15 """Run an interactive session. 20 """Run an interactive session. 16 """ 21 """ 17 print 'Welcome to shakespeare-admin interactive mode\n' 22 print 'Welcome to shakespeare-admin interactive mode\n' 18 self.do_about() 23 self.do_about() 19 print 'Type: "?" or "help" for help on commands.\n' 24 print 'Type: "?" or "help" for help on commands.\n' 20 while 1: 25 while 1: 21 try: 26 try: 22 self.cmdloop() 27 self.cmdloop() 23 break 28 break 24 except KeyboardInterrupt: 29 except KeyboardInterrupt: 25 raise 30 raise 26 31 27 def do_help(self, line=None): 32 def do_help(self, line=None): 28 cmd.Cmd.do_help(self, line) 33 cmd.Cmd.do_help(self, line) 29 34 30 def do_about(self, line=None): 35 def do_about(self, line=None): 31 import shakespeare 36 import shakespeare 32 version = shakespeare.__version__ 37 version = shakespeare.__version__ 33 about = \ 38 about = \ 34 '''Open Shakespeare version %s. Copyright the Open Knowledge Foundation. 39 '''Open Shakespeare version %s. Copyright the Open Knowledge Foundation. 35 Open Shakespeare is open-knowledge and open-source. See COPYING for details. 40 Open Shakespeare is open-knowledge and open-source. See COPYING for details. 36 41 37 For more information about the package run `info`. 42 For more information about the package run `info`. 38 ''' % version 43 ''' % version 39 print about 44 print about 40 45 41 def do_quit(self, line=None): 46 def do_quit(self, line=None): 42 sys.exit() 47 sys.exit() 43 48 44 def do_EOF(self, *args): 49 def do_EOF(self, *args): 45 print '' 50 print '' 46 sys.exit() 51 sys.exit() 47 52 48 # ================= 53 # ================= 49 # Commands 54 # Commands 50 55 51 def do_db(self, line=None): 56 def do_db(self, line=None): 52 actions = [ 'create', 'clean', 'rebuild', 'init' ] 57 actions = [ 'create', 'clean', 'rebuild', 'init' ] 53 if line is None or line not in actions: 58 if line is None or line not in actions: 54 self.help_db() 59 self.help_db() 55 return 1 60 return 1 56 import shakespeare.model 61 import shakespeare.model 57 if line == 'init': 62 if line == 'init': 58 import pkg_resources 63 import pkg_resources 59 pkg = 'shksprdata' 64 pkg = 'shksprdata' 60 meta = pkg_resources.resource_stream(pkg, 'texts/metadata.txt') 65 meta = pkg_resources.resource_stream(pkg, 'texts/metadata.txt') 61 shakespeare.model.Material.load_from_metadata(meta) 66 shakespeare.model.Material.load_from_metadata(meta) 62 else: 67 else: 63 shakespeare.model.__dict__[line+'db']() 68 shakespeare.model.__dict__[line+'db']() 64 69 65 def help_db(self, line=None): 70 def help_db(self, line=None): 66 usage = \ 71 usage = \ 67 '''db { create | clean | rebuild | init } 72 '''db { create | clean | rebuild | init } 68 ''' 73 ''' 69 print usage 74 print usage 70 75 71 def do_gutenberg(self, line=None): 76 def do_gutenberg(self, line=None): 72 import shakespeare.gutenberg 77 import shakespeare.gutenberg 73 helper = shakespeare.gutenberg.Helper(verbose=True) 78 helper = shakespeare.gutenberg.Helper(verbose=True) 74 if not line: 79 if not line: 75 helper.execute() 80 helper.execute() 76 elif line == 'print_index': 81 elif line == 'print_index': 77 import pprint 82 import pprint 78 pprint.pprint(helper.get_index()) 83 pprint.pprint(helper.get_index()) 79 else: 84 else: 80 msg = 'Unknown argument %s' % line 85 msg = 'Unknown argument %s' % line 81 raise Exception(msg) 86 raise Exception(msg) 82 87 83 def help_gutenberg(self, line=None): 88 def help_gutenberg(self, line=None): 84 usage = \ 89 usage = \ 85 """ 90 """ 86 Download and process all Project Gutenberg shakespeare texts""" 91 Download and process all Project Gutenberg shakespeare texts""" 87 print usage 92 print usage 88 93 89 def do_moby(self, line=None): 94 def do_moby(self, line=None): 90 import shakespeare.moby 95 import shakespeare.moby 91 helper = shakespeare.moby.Helper(verbose=True) 96 helper = shakespeare.moby.Helper(verbose=True) 92 if not line: 97 if not line: 93 helper.execute() 98 helper.execute() 94 elif line == 'print_index': 99 elif line == 'print_index': 95 import pprint 100 import pprint 96 pprint.pprint(helper.get_index()) 101 pprint.pprint(helper.get_index()) 97 else: 102 else: 98 msg = 'Unknown argument %s' % line 103 msg = 'Unknown argument %s' % line 99 raise Exception(msg) 104 raise Exception(msg) 100 105 101 def help_moby(self, line=None): 106 def help_moby(self, line=None): 102 usage = \ 107 usage = \ 103 ''' 108 ''' 104 Download and process all Moby/Bosak shakespeare texts''' 109 Download and process all Moby/Bosak shakespeare texts''' 105 print usage 110 print usage 106 111 107 def _init_index(self): 112 def _init_index(self): 108 import shakespeare.index 113 import shakespeare.index 109 self._index = shakespeare.index.all 114 self._index = shakespeare.index.all 110 115 111 def _filter_index(self, line): 116 def _filter_index(self, line): 112 """Filter items in index return only those whose id (url) is in line 117 """Filter items in index return only those whose id (url) is in line 113 If line is empty or None return all items 118 If line is empty or None return all items 114 """ 119 """ 115 if line: 120 if line: 116 textsToAdd = [] 121 textsToAdd = [] 117 textNames = line.split() 122 textNames = line.split() 118 for item in self._index: 123 for item in self._index: 119 if item.name in textNames: 124 if item.name in textNames: 120 textsToAdd.append(item) 125 textsToAdd.append(item) 121 return textsToAdd 126 return textsToAdd 122 else: 127 else: 123 self._init_index() 128 self._init_index() 124 return self._index 129 return self._index 125 130 126 def do_index(self, line): 131 def do_index(self, line): 127 self._init_index() 132 self._init_index() 128 header = \ 133 header = \ 129 ''' +-------------------+ 134 ''' +-------------------+ 130 | Index of Material | 135 | Index of Material | 131 +-------------------+ 136 +-------------------+ 132 137 133 ''' 138 ''' 134 print header 139 print header 135 for row in self._index: 140 for row in self._index: 136 print row.name.ljust(35), row.title 141 print row.name.ljust(35), row.title 137 142 138 def help_index(self, line=None): 143 def help_index(self, line=None): 139 usage = \ 144 usage = \ 140 '''Print index of Shakespeare texts to stdout''' 145 '''Print index of Shakespeare texts to stdout''' 141 print usage 146 print usage 142 147 143 def do_concordance(self, line=None): 148 def do_concordance(self, line=None): 144 self._init_index() 149 self._init_index() 145 print 'Making concordance (this may take some time ...):' 150 print 'Making concordance (this may take some time ...):' 146 from shakespeare.concordance import ConcordanceBuilder 151 from shakespeare.concordance import ConcordanceBuilder 147 import time 152 import time 148 start = end = 0 153 start = end = 0 149 start = time.time() 154 start = time.time() 150 cc = ConcordanceBuilder() 155 cc = ConcordanceBuilder() 151 textsToAdd = [] 156 textsToAdd = [] 152 if line is not None: 157 if line is not None: 153 textsToAdd = self._filter_index(line) 158 textsToAdd = self._filter_index(line) 154 else: 159 else: 155 def gut_non_folio(material): 160 def gut_non_folio(material): 156 return '_gut' in material.name and 'gut_f' not in material.name 161 return '_gut' in material.name and 'gut_f' not in material.name 157 textsToAdd = filter(gut_non_folio, self._index) 162 textsToAdd = filter(gut_non_folio, self._index) 158 for item in textsToAdd: 163 for item in textsToAdd: 159 print 'Adding: %s (%s)' % (item.name, item.title) 164 print 'Adding: %s (%s)' % (item.name, item.title) 160 cc.add_text(item.name) 165 cc.add_text(item.name) 161 end = time.time() 166 end = time.time() 162 timetaken = end - start 167 timetaken = end - start 163 print 'Finished. Time taken was %ss' % timetaken 168 print 'Finished. Time taken was %ss' % timetaken 164 169 165 def help_concordance(self, line=None): 170 def help_concordance(self, line=None): 166 usage = \ 171 usage = \ 167 '''Create a concordance 172 '''Create a concordance 168 173 169 If no arguments supplied then use all non-folio gutenberg shakespeare texts. 174 If no arguments supplied then use all non-folio gutenberg shakespeare texts. 170 Otherwise arguments should be a space seperated list of work name ids 175 Otherwise arguments should be a space seperated list of work name ids 171 ''' 176 ''' 172 print usage 177 print usage 173 178 174 def do_runserver(self, line=None): 179 def do_runserver(self, line=None): 175 self.help_runserver() 180 self.help_runserver() 176 181 177 def help_runserver(self, line=None): 182 def help_runserver(self, line=None): 178 usage = \ 183 usage = \ 179 '''This command has been DEPRECATED. 184 '''This command has been DEPRECATED. 180 185 181 Please use `paster serve` to run a server now, e.g.:: 186 Please use `paster serve` to run a server now, e.g.:: 182 187 183 paster serve <my-config.ini> 188 paster serve <my-config.ini> 184 ''' 189 ''' 185 print usage 190 print usage 186 191 187 def do_info(self, line=None): 192 def do_info(self, line=None): 188 import shakespeare 193 import shakespeare 189 info = shakespeare.__doc__ 194 info = shakespeare.__doc__ 190 print 195 print 191 print ' ## Open Shakespeare ##' 196 print ' ## Open Shakespeare ##' 192 print info 197 print info 193 198 194 def help_info(self, line=None): 199 def help_info(self, line=None): 195 print 'Information about this package.' 200 print 'Information about this package.' 196 201 202 def do_search_add(self, line=None): 203 path = line.strip() 204 if not os.path.exists(path): 205 print '"%s" is not an existent path' % path 206 return 1 207 if os.path.isdir(path): 208 fns = os.listdir(path) 209 fns = filter(lambda x: x.endswith('.txt'), fns) 210 works = [ os.path.join(path, fn) for fn in fns ] 211 else: 212 works = [ path ] 213 import shakespeare.search 214 index = shakespeare.search.SearchIndex.default_index() 215 for work in works: 216 if self.verbose: 217 print 'Processing %s' % work 218 fileobj = open(work) 219 index.add_item(fileobj) 220 221 def help_search_add(self, line=None): 222 info = '''search_add {path} 223 224 Add contents of {path} (file itself or all text files in directory if 225 directory) to the search index.''' 226 print info 227 228 def do_search_add_all(self): 229 # TODO: automatically add all texts listed in index 230 pass 231 232 def do_search(self, line=None): 233 import shakespeare.search 234 index = shakespeare.search.SearchIndex.default_index() 235 query = line.strip() 236 if not query: 237 print 'No search term supplied.' 238 return 1 239 matches = index.search(query) 240 print "%i results found." % matches.get_matches_estimated() 241 print "Results 1-%i:" % matches.size() 242 243 for m in matches: 244 print 245 print '%i: %i%% docid=%i' % (m.rank + 1, m.percent, m.docid) 246 print m.document.get_data() 247 248 def help_search(self, line=None): 249 info = 'Supply a query with which to search the search index.' 250 print info 251 197 def main(): 252 def main(): 198 import optparse 253 import optparse 199 usage = \ 254 usage = \ 200 '''%prog [options] <command> 255 '''%prog [options] <command> 201 256 202 Run about or help for details.''' 257 Run about or help for details.''' 203 parser = optparse.OptionParser(usage) 258 parser = optparse.OptionParser(usage) 204 parser.add_option('-v', '--verbose', dest='verbose', help='Be verbose', 259 parser.add_option('-v', '--verbose', dest='verbose', help='Be verbose', 205 action='store_true', default=False) 260 action='store_true', default=False) 206 options, args = parser.parse_args() 261 options, args = parser.parse_args() 207 262 208 if len(args) == 0: 263 if len(args) == 0: 209 parser.print_help() 264 parser.print_help() 210 return 1 265 return 1 211 else: 266 else: 212 cmd = ShakespeareAdmin( )267 cmd = ShakespeareAdmin(verbose=options.verbose) 213 args = ' '.join(args) 268 args = ' '.join(args) 214 args = args.replace('-','_') 269 args = args.replace('-','_') 215 cmd.onecmd(args) 270 cmd.onecmd(args) 216 271 trunk/shakespeare/search.py
Revision 165 Revision 169 1 # !/usr/bin/env python1 # Support for indexing and searching texts using xapian 2 2 import os 3 3 4 import xapian 4 import xapian 5 5 6 if len(sys.argv) < 2:6 class SearchIndex(object): 7 print >> sys.stderr, "Missing a search term" % sys.argv[0]7 def __init__(self, index_dir): 8 sys.exit(1)8 self.index_dir = index_dir 9 9 10 try: 10 @classmethod 11 # Open the database for searching. 11 def config_index_dir(self): 12 database = xapian.Database('./index') 12 '''Get the search index directory specified in the config.''' 13 import shakespeare 14 conf = shakespeare.conf() 15 index_dir = conf['search_index_dir'] 16 return index_dir 13 17 14 # Start an enquire session. 18 @classmethod 15 enquire = xapian.Enquire(database) 19 def default_index(self): 20 '''Return a SearchIndex instance initialized with the path specified in 21 the configuration file. 22 ''' 23 index_dir = self.config_index_dir() 24 if not os.path.exists(index_dir): 25 os.makedirs(index_dir) 26 return SearchIndex(index_dir) 16 27 17 # Take the search argument and turn into a Xapian query 28 def add_item(self, fileobj): 18 query_string = sys.argv[1] 29 # TODO: remove this comment as no longer relevant (?) 19 for arg in sys.argv[2:]: 30 #create the folder for a writable db: alter path 20 query_string += ' ' 31 document = xapian.WritableDatabase (self.index_dir, xapian.DB_CREATE_OR_OPEN) 21 query_string += arg 32 indexer = xapian.TermGenerator() 33 stemmer = xapian.Stem("english") 34 indexer.set_stemmer(stemmer) 35 36 para = '' 37 try: 38 for line in fileobj: 39 line = line.strip() 40 if line == '': 41 if para != '': 42 doc = xapian.Document() 43 doc.set_data(para) 44 45 indexer.set_document(doc) 46 indexer.index_text(para) 47 48 # Add the document to the database. 49 document.add_document(doc) 50 para = '' 51 else: 52 if para != '': 53 para += ' ' 54 para += line 55 except StopIteration: 56 # TODO: what is happening here? 57 pass 58 print Stopped 59 60 def search(self, query_string): 61 # Open the database for searching. 62 database = xapian.Database(self.index_dir) 63 64 # Start an enquire session. 65 enquire = xapian.Enquire(database) 22 66 23 # Parse the query string to produce a Xapian::Query object. 67 # Parse the query string to produce a Xapian::Query object. 24 qp = xapian.QueryParser()68 qp = xapian.QueryParser() 25 stemmer = xapian.Stem("english")69 stemmer = xapian.Stem("english") 26 qp.set_stemmer(stemmer)70 qp.set_stemmer(stemmer) 27 qp.set_database(database)71 qp.set_database(database) 28 qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)72 qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME) 29 query = qp.parse_query(query_string)73 query = qp.parse_query(query_string) 30 print "Parsed query is: %s" % query.get_description()74 print "Parsed query is: %s" % query.get_description() 31 75 32 # Find the top 10 results for the query. 76 # Find the top 10 results for the query. 33 enquire.set_query(query) 77 enquire.set_query(query) 34 matches = enquire.get_mset(0, 10) 78 matches = enquire.get_mset(0, 10) 79 return matches 35 80 81 @classmethod 82 def print_matches(self, matches): 83 # Display the results. 84 print "%i results found." % matches.get_matches_estimated() 85 print "Results 1-%i:" % matches.size() 36 86 37 # Display the results. 87 for m in matches: 38 print "%i results found." % matches.get_matches_estimated() 88 print "%i: %i%% docid=%i [%s]" % (m.rank + 1, m.percent, m.docid, m.document.get_data()) 39 print "Results 1-%i:" % matches.size() 40 89 41 for m in matches:42 print "%i: %i%% docid=%i [%s]" % (m.rank + 1, m.percent, m.docid, m.document.get_data())43 44 except Exception, e:45 print >> sys.stderr, "Exception: %s" % str(e)46 sys.exit(1)47
