[Zope-CVS] CVS: Products/ZCTextIndex/tests - mailtest.py:1.1.2.13
Jeremy Hylton
jeremy@zope.com
Wed, 1 May 2002 19:15:45 -0400
Update of /cvs-repository/Products/ZCTextIndex/tests
In directory cvs.zope.org:/tmp/cvs-serv9229/tests
Modified Files:
Tag: TextIndexDS9-branch
mailtest.py
Log Message:
More features for mailtest.py.
-b NNN -- return NNN best matches (default is 10)
if a query is run in verbose mode, print out 5 lines of context after
the docid and score.
add the message subject and author to the text of the message. this
change makes it easier to run tests and get a rough sense for whether
the ranking makes sense.
=== Products/ZCTextIndex/tests/mailtest.py 1.1.2.12 => 1.1.2.13 ===
-p NNN -- pack <data.fs> every NNN messages (default: 500), and at end
-p 0 -- don't pack at all
+ -b NNN -- return the NNN best matches (default is 10)
The script either indexes or queries depending on whether -q or -i is
passed as an option.
@@ -51,7 +52,13 @@
total_bytes = 0
def __init__(self, msg):
- self.text = msg.fp.read()
+ subject = msg.getheader('subject', '')
+ author = msg.getheader('from', '')
+ if author:
+ summary = "%s (%s)\n" % (subject, author)
+ else:
+ summary = "%s\n" % subject
+ self.text = summary + msg.fp.read()
Message.total_bytes += len(self.text)
def index(rt, mboxfile, db):
@@ -113,9 +120,24 @@
def query(rt, query_str):
idx = rt["index"]
- results = idx.query(query_str)
- for r in results.items():
- print r
+ docs = rt["documents"]
+ results = idx.query(query_str, BEST)
+ print "query:", query_str
+ print "# results:", len(results)
+ for docid, score in results:
+ print "docid %4d score %2d" % (docid, score)
+ if VERBOSE:
+ msg = docs[docid]
+ # print 3 lines of context
+ CONTEXT = 5
+ ctx = msg.text.split("\n", CONTEXT)
+ del ctx[-1]
+ print "-" * 60
+ print "message:"
+ for l in ctx:
+ print l
+ print "-" * 60
+
def main(fs_path, mbox_path, query_str):
f = ZODB.FileStorage.FileStorage(fs_path)
@@ -136,12 +158,13 @@
import getopt
NUM = 0
+ BEST = 10
VERBOSE = 0
PACK_INTERVAL = 500
query_str = None
mbox_path = None
try:
- opts, args = getopt.getopt(sys.argv[1:], 'vn:p:i:q:')
+ opts, args = getopt.getopt(sys.argv[1:], 'vn:p:i:q:b:')
except getopt.error, msg:
usage(msg)
if len(args) != 1:
@@ -157,5 +180,7 @@
query_str = v
elif o == '-i':
mbox_path = v
+ elif o == '-b':
+ BEST = int(v)
fs_path, = args
main(fs_path, mbox_path, query_str)