[Zope-CVS] CVS: Products/ZCTextIndex/tests - mailtest.py:1.1.2.14
Jeremy Hylton
jeremy@zope.com
Fri, 3 May 2002 16:09:07 -0400
Update of /cvs-repository/Products/ZCTextIndex/tests
In directory cvs.zope.org:/tmp/cvs-serv8631
Modified Files:
Tag: TextIndexDS9-branch
mailtest.py
Log Message:
Add -x and -t options:
-x -- exclude the message text from the data.fs
-t NNN -- commit a transaction every NNN messages (default: 1)
-x makes a big difference on data size
-t makes a big difference on indexing speed
=== Products/ZCTextIndex/tests/mailtest.py 1.1.2.13 => 1.1.2.14 ===
-p NNN -- pack <data.fs> every NNN messages (default: 500), and at end
-p 0 -- don't pack at all
- -b NNN -- return the NNN best matches (default is 10)
+ -b NNN -- return the NNN best matches (default: 10)
+ -x -- exclude the message text from the data.fs
+ -t NNN -- commit a transaction every NNN messages (default: 1)
The script either indexes or queries depending on whether -q or -i is
passed as an option.
@@ -67,7 +69,8 @@
pack_time = 0
rt["index"] = idx = ZCTextIndex("text")
- rt["documents"] = docs = IOBTree()
+ if not EXCLUDE_TEXT:
+ rt["documents"] = docs = IOBTree()
get_transaction().commit()
mbox = mailbox.UnixMailbox(open(mboxfile))
@@ -86,12 +89,15 @@
print "indexing msg", i
i0 = time.clock()
idx.index_object(i, msg)
- docs[i] = msg
- get_transaction().commit()
+ if not EXCLUDE_TEXT:
+ docs[i] = msg
+ if i % TXN_SIZE == 0:
+ get_transaction().commit()
i1 = time.clock()
idx_time += i1 - i0
if VERBOSE and i % 50 == 0:
print i, "messages indexed"
+ print "cache size", db.cacheSize()
if PACK_INTERVAL and i % PACK_INTERVAL == 0:
if VERBOSE >= 2:
print "packing..."
@@ -102,6 +108,8 @@
print "pack took %s sec" % (p1 - p0)
pack_time += p1 - p0
+ get_transaction().commit()
+
if PACK_INTERVAL and i % PACK_INTERVAL != 0:
if VERBOSE >= 2:
print "packing one last time..."
@@ -141,7 +149,7 @@
def main(fs_path, mbox_path, query_str):
f = ZODB.FileStorage.FileStorage(fs_path)
- db = ZODB.DB(f)
+ db = ZODB.DB(f, cache_size=CACHE_SIZE)
cn = db.open()
rt = cn.root()
@@ -161,10 +169,13 @@
BEST = 10
VERBOSE = 0
PACK_INTERVAL = 500
+ EXCLUDE_TEXT = 0
+ CACHE_SIZE = 10000
+ TXN_SIZE = 1
query_str = None
mbox_path = None
try:
- opts, args = getopt.getopt(sys.argv[1:], 'vn:p:i:q:b:')
+ opts, args = getopt.getopt(sys.argv[1:], 'vn:p:i:q:b:xt:')
except getopt.error, msg:
usage(msg)
if len(args) != 1:
@@ -182,5 +193,9 @@
mbox_path = v
elif o == '-b':
BEST = int(v)
+ elif o == '-x':
+ EXCLUDE_TEXT = 1
+ elif o == '-t':
+ TXN_SIZE = int(v)
fs_path, = args
main(fs_path, mbox_path, query_str)