[Zope-CVS] CVS: Products/ZCTextIndex/tests - mhindex.py:1.9
Guido van Rossum
guido@python.org
Thu, 23 May 2002 00:35:54 -0400
Update of /cvs-repository/Products/ZCTextIndex/tests
In directory cvs.zope.org:/tmp/cvs-serv22125
Modified Files:
mhindex.py
Log Message:
Add usage message and -h option.
Add -w and -W option to dump the word list (by word and by wid,
respectively).
Except KeyboardInterrupt from unqualified except clauses.
=== Products/ZCTextIndex/tests/mhindex.py 1.8 => 1.9 ===
-"""MH mail indexer."""
+"""MH mail indexer.
+
+To index messages from a single folder (messages defaults to 'all'):
+ mhindex.py [options] -u +folder [messages ...]
+
+To bulk index all messages from several folders:
+ mhindex.py [options] -b folder ...
+
+To execute a single query:
+ mhindex.py [options] query
+
+To enter interactive query mode:
+ mhindex.py [options]
+
+Common options:
+ -d FILE -- specify the Data.fs to use (default ~/.Data.fs)
+ -w -- dump the word list in alphabetical order and exit
+ -W -- dump the word list ordered by word id and exit
+
+Indexing options:
+ -O -- do a prescan on the data to compute optimal word id assignments;
+ this is only useful the first time the Data.fs is used
+ -t N -- commit a transaction after every N messages (default 20000)
+ -p N -- pack after every N commits (by default no packing is done)
+
+Querying options:
+ -m N -- show at most N matching lines from the message (default 3)
+ -n N -- show the N best matching messages (default 3)
+"""
import os
import re
@@ -36,10 +64,11 @@
def main():
try:
- opts, args = getopt.getopt(sys.argv[1:], "bd:m:n:Op:t:u")
+ opts, args = getopt.getopt(sys.argv[1:], "bd:hm:n:Op:t:uwW")
except getopt.error, msg:
print msg
- sys.exit(2)
+ print "use -h for help"
+ return 2
update = 0
bulk = 0
optimize = 0
@@ -48,11 +77,15 @@
datafs = os.path.expanduser(DATAFS)
pack = 0
trans = 20000
+ dumpwords = dumpwids = 0
for o, a in opts:
if o == "-b":
bulk = 1
if o == "-d":
datafs = a
+ if o == "-h":
+ print __doc__
+ return
if o == "-m":
maxlines = int(a)
if o == "-n":
@@ -65,7 +98,17 @@
trans = ont(a)
if o == "-u":
update = 1
+ if o == "-w":
+ dumpwords = 1
+ if o == "-W":
+ dumpwids = 1
ix = Indexer(datafs, writable=update or bulk, trans=trans, pack=pack)
+ if dumpwords:
+ ix.dumpwords()
+ if dumpwids:
+ ix.dumpwids()
+ if dumpwords or dumpwids:
+ return
if bulk:
if optimize:
ix.optimize(args)
@@ -127,6 +170,17 @@
self.maxdocid = 0
print len(self.docpaths), "Document ids"
print len(self.path2docid), "Pathnames"
+ print self.index.lexicon.length(), "Words"
+
+ def dumpwids(self):
+ lexicon = self.index.lexicon
+ for wid in lexicon.wids():
+ print "%10d %s" % (wid, lexicon.get_word(wid))
+
+ def dumpwords(self):
+ lexicon = self.index.lexicon
+ for word in lexicon.words():
+ print "%10d %s" % (lexicon.get_wid(word), word)
def close(self):
self.root = None
@@ -162,6 +216,8 @@
continue
try:
results, n = self.timequery(text, top + nbest)
+ except KeyboardInterrupt:
+ raise
except:
reportexc()
text = ""
@@ -367,6 +423,8 @@
self.getheaders(m, L)
try:
self.getmsgparts(m, L, 0)
+ except KeyboardInterrupt:
+ raise
except:
print "(getmsgparts failed:)"
reportexc()
@@ -471,4 +529,4 @@
traceback.print_exc()
if __name__ == "__main__":
- main()
+ sys.exit(main())