[Zope-CVS] CVS: Products/ZCTextIndex/tests - mailtest.py:1.1.2.14

Jeremy Hylton jeremy@zope.com
Fri, 3 May 2002 16:09:07 -0400


Update of /cvs-repository/Products/ZCTextIndex/tests
In directory cvs.zope.org:/tmp/cvs-serv8631

Modified Files:
      Tag: TextIndexDS9-branch
	mailtest.py 
Log Message:
Add -x and -t options:
    -x -- exclude the message text from the data.fs
    -t NNN -- commit a transaction every NNN messages (default: 1)

-x makes a big difference on data size
-t makes a big difference on indexing speed


=== Products/ZCTextIndex/tests/mailtest.py 1.1.2.13 => 1.1.2.14 ===
     -p NNN -- pack <data.fs> every NNN messages (default: 500), and at end
     -p 0 -- don't pack at all
-    -b NNN -- return the NNN best matches (default is 10)
+    -b NNN -- return the NNN best matches (default: 10)
+    -x -- exclude the message text from the data.fs
+    -t NNN -- commit a transaction every NNN messages (default: 1)
 
 The script either indexes or queries depending on whether -q or -i is
 passed as an option.
@@ -67,7 +69,8 @@
     pack_time = 0
 
     rt["index"] = idx = ZCTextIndex("text")
-    rt["documents"] = docs = IOBTree()
+    if not EXCLUDE_TEXT:
+        rt["documents"] = docs = IOBTree()
     get_transaction().commit()
 
     mbox = mailbox.UnixMailbox(open(mboxfile))
@@ -86,12 +89,15 @@
             print "indexing msg", i
         i0 = time.clock()
         idx.index_object(i, msg)
-        docs[i] = msg
-        get_transaction().commit()
+        if not EXCLUDE_TEXT:
+            docs[i] = msg
+        if i % TXN_SIZE == 0:
+            get_transaction().commit()
         i1 = time.clock()
         idx_time += i1 - i0
         if VERBOSE and i % 50 == 0:
             print i, "messages indexed"
+            print "cache size", db.cacheSize()
         if PACK_INTERVAL and i % PACK_INTERVAL == 0:
             if VERBOSE >= 2:
                 print "packing..."
@@ -102,6 +108,8 @@
                 print "pack took %s sec" % (p1 - p0)
             pack_time += p1 - p0
 
+    get_transaction().commit()
+
     if PACK_INTERVAL and i % PACK_INTERVAL != 0:
         if VERBOSE >= 2:
             print "packing one last time..."
@@ -141,7 +149,7 @@
 
 def main(fs_path, mbox_path, query_str):
     f = ZODB.FileStorage.FileStorage(fs_path)
-    db = ZODB.DB(f)
+    db = ZODB.DB(f, cache_size=CACHE_SIZE)
     cn = db.open()
     rt = cn.root()
 
@@ -161,10 +169,13 @@
     BEST = 10
     VERBOSE = 0
     PACK_INTERVAL = 500
+    EXCLUDE_TEXT = 0
+    CACHE_SIZE = 10000
+    TXN_SIZE = 1
     query_str = None
     mbox_path = None
     try:
-        opts, args = getopt.getopt(sys.argv[1:], 'vn:p:i:q:b:')
+        opts, args = getopt.getopt(sys.argv[1:], 'vn:p:i:q:b:xt:')
     except getopt.error, msg:
         usage(msg)
     if len(args) != 1:
@@ -182,5 +193,9 @@
             mbox_path = v
         elif o == '-b':
             BEST = int(v)
+        elif o == '-x':
+            EXCLUDE_TEXT = 1
+        elif o == '-t':
+            TXN_SIZE = int(v)
     fs_path, = args
     main(fs_path, mbox_path, query_str)