[Zope-CVS] CVS: Products/ZCTextIndex/tests - mailtest.py:1.1.2.6
Jeremy Hylton
jeremy@zope.com
Wed, 1 May 2002 13:43:56 -0400
Update of /cvs-repository/Products/ZCTextIndex/tests
In directory cvs.zope.org:/tmp/cvs-serv6428
Modified Files:
Tag: TextIndexDS9-branch
mailtest.py
Log Message:
mailtest is growing a regular swiss army knife of features.
-m mailbox: index the mailbox
-q query: execute the query
Store the documents in the database, too, so we can print out useful
results one day.
Update the doc strings.
=== Products/ZCTextIndex/tests/mailtest.py 1.1.2.5 => 1.1.2.6 ===
-usage: python mailtest.py [options] <mailbox> <data.fs>
+usage: python mailtest.py [options] <data.fs>
options:
-v verbose
-n NNN -- max number of messages to read from mailbox
+ -q query
+ -i mailbox
-The script reads mail messages from the mailbox and indexes them. It
-indexes one message at a time, then commits the transaction.
+The script either indexes or queries depending on whether -q or -i is
+passed as an option.
-To interact with the index after it is completed, you can simply load
-the index from the database:
+For -i mailbox, the script reads mail messages from the mailbox and
+indexes them. It indexes one message at a time, then commits the
+transaction.
+
+For -q query, it performs a query on an existing index.
+
+If both are specified, the index is performed first.
+
+You can also interact with the index after it is completed. Load the
+index from the database:
import ZODB
from ZODB.FileStorage import FileStorage
@@ -23,6 +33,7 @@
import ZODB
import ZODB.FileStorage
from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex
+from BTrees.IOBTree import IOBTree
import sys
import mailbox
@@ -38,28 +49,21 @@
total_bytes = 0
def __init__(self, msg):
- self.msg = msg
-
- def text(self):
- buf = self.msg.fp.read()
- Message.total_bytes += len(buf)
- return buf
+ self.text = msg.fp.read()
+ Message.total_bytes += len(self.text)
-def main(inp, out):
+def index(rt, mboxfile):
global NUM
idx_time = 0
pack_time = 0
- f = ZODB.FileStorage.FileStorage(out)
- db = ZODB.DB(f)
- cn = db.open()
- rt = cn.root()
rt["index"] = idx = ZCTextIndex("text")
+ rt["documents"] = docs = IOBTree()
get_transaction().commit()
- mbox = mailbox.UnixMailbox(open(inp))
+ mbox = mailbox.UnixMailbox(open(mboxfile))
if VERBOSE:
- print "opened", inp
+ print "opened", mboxfile
if not NUM:
NUM = sys.maxint
i = 0
@@ -71,6 +75,7 @@
msg = Message(_msg)
i0 = time.clock()
idx.index_object(i, msg)
+ docs[i] = msg
get_transaction().commit()
i1 = time.clock()
idx_time += i1 - i0
@@ -80,10 +85,37 @@
p0 = time.clock()
db.pack(time.time())
p1 = time.clock()
- print "pack took %s sec" % (p1 - p0)
+ if VERBOSE:
+ print "pack took %s sec" % (p1 - p0)
pack_time += p1 - p0
- return idx_time, pack_time
+ if VERBOSE:
+ print "Index time", idx_time
+ print "Index bytes", Message.total_bytes
+ rate = (Message.total_bytes / idx_time) / 1024
+ print "Index rate %d KB/sec" % int(rate)
+
+def query(rt, query_str):
+ idx = rt["index"]
+ results = idx.query(query_str)
+ print results
+ for r in results.items():
+ print r
+
+def main(fs_path, mbox_path, query_str):
+ f = ZODB.FileStorage.FileStorage(fs_path)
+ db = ZODB.DB(f)
+ cn = db.open()
+ rt = cn.root()
+
+ if mbox_path is not None:
+ index(rt, mbox_path)
+ if query is not None:
+ query(rt, query_str)
+
+ cn.close()
+ db.close()
+ f.close()
if __name__ == "__main__":
import getopt
@@ -91,12 +123,14 @@
NUM = 0
VERBOSE = 0
PACK_INTERVAL = 500
+ query_str = None
+ mbox_path = None
try:
- opts, args = getopt.getopt(sys.argv[1:], 'vn:p:')
+ opts, args = getopt.getopt(sys.argv[1:], 'vn:p:m:q:')
except getopt.error, msg:
usage(msg)
- if len(args) != 2:
- usage("exactly 2 filename arguments required")
+ if len(args) != 1:
+ usage("exactly 1 filename argument required")
for o, v in opts:
if o == '-n':
NUM = int(v)
@@ -104,7 +138,10 @@
VERBOSE += 1
elif o == '-p':
PACK_INTERVAL = int(v)
- inp, out = args
- ti, tp = main(inp, out)
- print "Index time", ti
- print "Index bytes", Message.total_bytes
+ elif o == '-q':
+ query_str = v
+ elif o == '-m':
+ mbox_path = v
+ fs_path, = args
+ print "main"
+ main(fs_path, mbox_path, query_str)