[Zope-Checkins] CVS: Zope2 - testCatalog.py:1.1.4.4
Andreas Jung
andreas@yetix.digicool.com
Thu, 8 Mar 2001 07:14:29 -0500
Update of /mnt/cvs-repository/Zope2/lib/python/Products/ZCatalog/tests
In directory yetix:/work/Zope2/Catalog-BTrees-Integration/lib/python/Products/ZCatalog/tests
Modified Files:
Tag: Catalog-BTrees-Integration
testCatalog.py
Log Message:
minor changes
--- Updated File testCatalog.py in package test --
--- testCatalog.py 2001/03/05 15:28:52 1.1.4.3
+++ testCatalog.py 2001/03/08 12:14:27 1.1.4.4
@@ -7,59 +7,25 @@
Andreas Jung, andreas@digicool.com
$Log$
- Revision 1.1.4.3 2001/03/05 15:28:52 andreas
- update
-
- Revision 1.1.2.16 2001/03/05 15:14:51 andreas
- - minor changes in testing catalog/uncatalogObject
- - tests must now be started in the lib/python directory
- - older input sets are no longer valid (must be recreated)
-
- Revision 1.1.2.15 2001/03/02 17:03:03 andreas
- changed default settings
-
- Revision 1.1.2.14 2001/03/02 15:16:47 andreas
- version for release
-
- Revision 1.1.2.13 2001/03/02 00:41:33 andreas
- SHould now be a "final" version
-
- Revision 1.1.2.12 2001/03/01 23:46:16 andreas
- complete thread handling rewrite
-
- Revision 1.1.2.11 2001/03/01 18:35:50 andreas
- simple tests are now doing benchmarks
-
- Revision 1.1.2.10 2001/02/28 20:23:23 andreas
+ Revision 1.1.4.4 2001/03/08 12:14:27 andreas
minor changes
- Revision 1.1.2.9 2001/02/28 18:39:20 andreas
- misc changes
+ Revision 1.1.2.20 2001/03/07 14:58:40 andreas
+ *** empty log message ***
- Revision 1.1.2.8 2001/02/28 16:51:32 andreas
- added benchmarks
+ Revision 1.1.2.19 2001/03/07 14:07:51 andreas
+ Code cleanup
- Revision 1.1.2.7 2001/02/28 16:02:15 andreas
- fixed bug in generation of keywords index
+ Revision 1.1.2.18 2001/03/07 12:46:32 andreas
+ added advanced tests
- Revision 1.1.2.6 2001/02/28 15:31:19 andreas
- updated tests
+ Revision 1.1.2.17 2001/03/07 10:28:27 andreas
+ reworked version now using the new thread dispatcher
- Revision 1.1.2.5 2001/02/27 21:06:18 andreas
- minor changes
-
- Revision 1.1.2.4 2001/02/27 20:43:08 andreas
- added -d option
-
- Revision 1.1.2.3 2001/02/27 20:26:24 andreas
- added prelimary stress test
-
- Revision 1.1.2.2 2001/02/27 19:33:55 andreas
- detabbed version
-
- Revision 1.1.2.1 2001/02/27 19:27:58 andreas
- first lame version
-
+ Revision 1.1.2.16 2001/03/05 15:14:51 andreas
+ - minor changes in testing catalog/uncatalogObject
+ - tests must now be started in the lib/python directory
+ - older input sets are no longer valid (must be recreated)
"""
@@ -79,16 +45,14 @@
import Zope
import ZODB, ZODB.FileStorage
from Products.ZCatalog import Catalog,Vocabulary
-from SearchIndex.UnIndex import UnIndex
-from SearchIndex.UnTextIndex import UnTextIndex
-from SearchIndex.UnKeywordIndex import UnKeywordIndex
-from SearchIndex.Lexicon import Lexicon, stop_word_dict
import Persistence
import ExtensionClass
+from Testing import dispatcher
+import keywords
from zLOG import LOG
-import getopt,whrandom,thread,time,string
-from unittest import TestCase, TestSuite, TextTestRunner
+import getopt,whrandom,thread,time,string,mailbox,rfc822
+from Testing.unittest import TestCase, TestSuite, TextTestRunner
# maximum number of files to read for the test suite
@@ -97,53 +61,23 @@
# maximum number of threads for stress testa
numThreads = 4
-# directory where we can find some stuff to index
-testdataDir = "/work/testdata"
-# dictionary with test words
-dictFile = "/usr/share/dict/words"
-
# number of iterations for searches
searchIterations = 1000
# number of iterations for catalog/uncatalog operations
updateIterations = 100
+# input mailbox file
+mbox = "/usr/home/andreas/zope.mbox"
+mbox2 = "/usr/home/andreas/python.mbox"
+
#
# Don't change anything below
#
-def myLOG(*args):
- args = map(str,args)
- LOG('catalog',0,'bench', string.join(args , ' '))
- open('bench.log','a').write( string.join(args," ") + "\n")
-
-class Timer:
-
- def __init__(self,name=''):
- self.name = name
- self.start()
-
- def start(self):
- self.ts = time.time()
-
- def end(self):
- self.te = time.time()
- if thread.get_ident() == mainThreadID:
- myLOG('bench THMain ' , self.__repr__())
- else:
- myLOG('bench TH%-6s' % thread.get_ident(),self.__repr__())
-
-
- def __repr__(self):
- return "%-60s: %8.3f sec" % (self.name,self.te-self.ts)
-
- def __str__(self):
- return self.__repr__()
-
-
class testZODB:
""" some wrapper stuff around ZODB """
@@ -173,61 +107,142 @@
class testCatalog(Persistence.Persistent,TestCase):
""" Wrapper around the catalog stuff """
- def __init__(self,dname):
- self.files = []
- self.dname = dname
- os.path.walk(dname,self.walkf,())
+ def __init__(self,mboxname):
+ self.msg_ids = []
self.num_files = 0
+ self.keywords = []
self._vocabulary = Vocabulary.Vocabulary('Vocabulary','Vocabulary', globbing=1)
self._catalog = Catalog.Catalog()
+ self._catalog.addIndex('to', 'TextIndex')
+ self._catalog.addIndex('sender', 'TextIndex')
+ self._catalog.addIndex('subject', 'TextIndex')
self._catalog.addIndex('content', 'TextIndex')
self._catalog.addIndex('file_id', 'TextIndex')
self._catalog.addColumn('file_id')
- self._catalog.addIndex('length', 'FieldIndex')
- self._catalog.addIndex('modtime', 'FieldIndex')
+ self._catalog.addIndex('length', 'FieldIndex')
+ self._catalog.addColumn('length')
+ self._catalog.addIndex('date', 'FieldIndex')
self._catalog.addIndex('keywords', "KeywordIndex")
-
- for i in range(len(self.files)):
- f = self.files[i]
- self.catFile( f )
- print i,'/',len(self.files),f
+
+ self.build_catalog(mboxname)
+
+
+ def build_catalog(self,mboxname):
+
+ mb = mailbox.UnixMailbox(open(mboxname,"r"))
+ i = 0
+
+ msg = mb.next()
+ while msg and self.num_files<maxFiles:
+ self.catMessage(msg)
+ self.msg_ids.append(msg.dict["message-id"])
+
+ msg = mb.next()
self.num_files = self.num_files + 1
+ if self.num_files % 100==0: print self.num_files
+
+ sub = string.split(msg.dict["subject"])
+ for s in sub:
+ if not s in self.keywords: self.keywords.append(s)
self._catalog.aq_parent = None
-
- def catFile(self,f):
- self._catalog.catalogObject( testFile(f) , f)
+
+ def catMessage(self,m):
+ print m.dict["message-id"]
+ self._catalog.catalogObject( testMessage(m) , m.dict["message-id"] )
- def uncatFile(self,uid):
+ def uncatMessage(self,uid):
self._catalog.uncatalogObject( uid )
- def walkf(self,arg,dirname,names):
- """ used to collect all files inside a file hierarchy """
- for n in names:
- if len(self.files) < maxFiles:
- if os.path.isfile(os.path.join(dirname,n)): self.files.append(os.path.join(dirname,n))
-
-
-class testFile(ExtensionClass.Base):
+class testMessage(ExtensionClass.Base):
+
+ def __init__(self,msg):
- def __init__(self,fname):
- self.content = open(fname,'r').read()
- self.file_id = fname
- self.length = os.stat(fname)[6]
- self.modtime = os.stat(fname)[8]
- self.keywords = filter(lambda x: x!="",string.split(fname , "/")) # Hack !!!
+ self.sender = msg.dict.get("from","")
+ self.subject = msg.dict.get("subject","")
+ self.to = msg.dict.get("to","")
+ self.content = str(msg)
+ self.keywords= string.split(self.subject , " ")
+
+ self.file_id = msg.dict.get("message-id","")
+
+ self.length = len(str(msg))
+ date = msg.dict.get("date","")
+ try:
+ self.date = time.mktime(rfc822.parsedate(date)[:9])
+ except: pass
def __del__(self):
- self.content = self.file_id = None
+ pass
+
+class BuildEnv(dispatcher.Dispatcher,TestCase):
+ """ build environment """
+
+ def __init__(self,func):
+
+ TestCase.__init__(self,func)
+ dispatcher.Dispatcher.__init__(self)
+
+ self.init_phase = 0
+
+ self.setlog( open("dispatcher.log","a") )
+ self.logn('treads=%d searchiterations=%d' % (numThreads,searchIterations))
+ self.logn('updateiterations=%d maxfiles=%d' % (updateIterations,maxFiles))
+
+ #############################################################
+ # Build up ZODB
+ #############################################################
+
+ def buildTestEnvironment(self,*args):
+ self.init_phase = 1
+ self.dispatcher("funcTestEnvironment",("funcTestEnvironment",1,(),{}))
+
+
+ def funcTestEnvironment(self,*args):
+
+ env = self.th_setup()
+
+ if not os.path.exists(dataDir): os.makedirs(dataDir)
+ os.system("rm -f %s/*" % dataDir)
+ zodb = testZODB("%s/Data_orig.fs" % dataDir)
+
+ print "parsing and reading mailbox file %s....please wait" % mbox
+ tc = testCatalog( mbox )
+
+ print "writing Catalog to ZODB"
+ zodb.write("catalog" , tc)
+
+ print "Creating keywords file"
+ kw = keywords.Keywords()
+ kw.build(mbox,1000)
+
+
+ print tc.num_files, "files read"
+ print "Initalization complete"
+
+ self.th_teardown(env)
+
-class testSearches(TestCase):
+class testSearches(dispatcher.Dispatcher,TestCase):
""" test searches """
+ def __init__(self,func,*args,**kw):
+
+ TestCase.__init__(self,func,args,kw)
+ dispatcher.Dispatcher.__init__(self)
+
+ self.init_phase = 0
+
+ self.setlog( open("dispatcher.log","a") )
+ self.logn('treads=%d searchiterations=%d' % (numThreads,searchIterations))
+ self.logn('updateiterations=%d maxfiles=%d' % (updateIterations,maxFiles))
+
+
def setUp(self):
os.system("rm -fr data/work")
if not os.path.exists("data/work"): os.makedirs("data/work")
@@ -238,147 +253,192 @@
self.threads = {}
self.conflicts = {}
-
+ kw = keywords.Keywords()
+ kw.reload()
+ self.keywords = kw.keywords()
+
+ self.logn("-" * 80)
+ self.log_zodb_size("before")
+
+
def tearDown(self):
+ self.log_zodb_size("after")
del self.zodb
self.zodb = self.catalog = None
+
+ def log_zodb_size(self,s):
+ self.logn("Size of ZODB (data/work/Data.fs) %s test : %s" % (s,self.size2size(os.stat("data/work/Data.fs")[6])) )
+
+
+ def size2size(self,n):
+ import math
+ if n <1024.0: return "%8.3lf Bytes" % n
+ if n <1024.0*1024.0: return "%8.3lf KB" % (1.0*n/1024.0)
+ if n <1024.0*1024.0*1024.0: return "%8.3lf MB" % (1.0*n/1024.0/1024.0)
+
+
+ #############################################################
+ # Fulltext test
+ #############################################################
+
+
+ def testFulltextIndex(self,args,kw):
+ """ benchmark FulltextIndex """
+ self.dispatcher('funcFulltextIndex' , ('funcFulltextIndex', kw["numThreads"] , () , {} ) )
+
+
+ def funcFulltextIndex(self,*args):
+ """ benchmark FulltextIndex """
- def testFieldIndex(self,*args):
+ cat,msg_ids = self.get_catalog()
+
+ env = self.th_setup()
+
+ for kw in self.keywords:
+ res = cat.searchResults( {"content" : kw } )
+
+ self.th_teardown(env)
+
+
+ #############################################################
+ # Field index test
+ #############################################################
+
+ def testFieldIndex(self,args,kw):
+ """ benchmark field index"""
+ self.dispatcher('funcFieldIndex' , ('funcFieldIndex',kw["numThreads"] , () , {} ) )
+
+
+ def funcFieldIndex(self,*args):
""" benchmark FieldIndex """
- cat,files = self.get_catalog()
+ cat,msg_ids = self.get_catalog()
- T = Timer('testFieldIndex')
+ env = self.th_setup()
for i in range(0,searchIterations):
res = cat.searchResults( {"length" : i } )
for r in res:
- assert i==os.stat(r.file_id)[6] , "%s should have size %d but is %s" % (r.file_id,i,os.stat(r.file_id)[6])
+ assert i==r.length , "%s should have size %d but is %s" % (r.file_id,i,r.length)
- T.end()
-
- self.threads[thread.get_ident()] = 1
+ self.th_teardown(env)
-
- def testFieldRangeIndex(self,*args):
+ #############################################################
+ # Keyword index test
+ #############################################################
+
+ def testKeywordIndex(self,args,kw):
+ """ benchmark Keyword index"""
+ self.dispatcher('funcKeywordIndex' , ('funcKeywordIndex', kw["numThreads"] , () , {} ) )
+
+
+ def funcKeywordIndex(self,*args):
+ """ benchmark KeywordIndex """
+
+ cat,msg_ids = self.get_catalog()
+
+ env = self.th_setup()
+
+ for kw in self.keywords:
+ res = cat.searchResults( {"subject" : kw } )
+# assert len(res) != 0 , "Search result for keyword '%s' is empty" % kw
+
+ self.th_teardown(env)
+
+ #############################################################
+ # Field range index test
+ #############################################################
+
+ def testFieldRangeIndex(self,args,kw):
+ """ benchmark field range index"""
+ self.dispatcher('funcFieldRangeIndex' , ('funcFieldRangeIndex', kw["numThreads"] , () , {} ) )
+
+
+ def funcFieldRangeIndex(self,*args):
""" benchmark FieldRangeIndex """
- cat,files = self.get_catalog()
+ cat,msg_ids = self.get_catalog()
+ env = self.th_setup()
+
rg = []
for i in range(searchIterations):
m = whrandom.randint(0,10000)
n = m + 200
rg.append(m,n)
- T = Timer('testFieldRangeIndex')
results = []
for i in range(searchIterations):
results.append( cat.searchResults( {"length" : rg[i],"length_usage" : "range:min:max" } ))
- T.end()
-
for i in range(searchIterations):
for r in results[i]:
- size = os.stat(r.file_id)[6]
+ size = r.length
assert rg[i][0]<=size and size<=rg[i][1] , "Filesize of %s is out of range (%d,%d)" % (r.file_id,rg[i][0],rg[i][1])
+ self.th_teardown(env)
- self.threads[thread.get_ident()] = 1
- def testKeywordIndex(self,*args):
- """ benchmark KeywordIndex """
-
- cat,files = self.get_catalog()
-
- # Setup a list of all possible keywords
- keywords = []
- for f in cat.files:
- for kw in filter(lambda x: x!="",string.split(f, "/")):
- if len(keywords)<searchIterations and not kw in keywords: keywords.append(kw)
-
- T = Timer('testKeywordIndex')
-
- for kw in keywords:
- res = cat.searchResults( {"keywords" : kw } )
- assert len(res) != 0 , "Search result for keyword '%s' is empty" % kw
-
- T.end()
- self.threads[thread.get_ident()] = 1
+ #############################################################
+ # Keyword + range index test
+ #############################################################
+ def testKeywordRangeIndex(self,args,kw):
+ """ benchmark Keyword range index"""
+ self.dispatcher('funcKeywordRangeIndex' , ('funcKeywordRangeIndex', kw["numThreads"] , () , {} ) )
- def testKeywordRangeIndex(self,*args):
+ def funcKeywordRangeIndex(self,*args):
""" benchmark Keyword & IndexRange search """
- cat,files = self.get_catalog()
+ cat,msg_ids = self.get_catalog()
- # Setup a list of all possible keywords
- keywords = []
- for f in cat.files:
- for kw in filter(lambda x: x!="",string.split(f, "/")):
- if not kw in keywords: keywords.append(kw)
-
rg = []
- for i in range(searchIterations):
+ for i in range(len(self.keywords)):
m = whrandom.randint(0,10000)
n = m + 200
rg.append(m,n)
- T = Timer("testKeywordRangeSearch")
+ env = self.th_setup()
results = []
- for i in range(searchIterations):
- results.append( cat.searchResults( {"keywords":kw[whrandom.randint(0,len(kw)-1)], "length" : rg[i],"length_usage" : "range:min:max" } ))
+ for i in range(len(self.keywords)):
+ results.append( cat.searchResults( {"keywords":self.keywords[i], "length" : rg[i],"length_usage" : "range:min:max" } ))
+ self.th_teardown(env)
- T.end()
- self.threads[thread.get_ident()] = 1
-
-
- def testFulltextIndex(self,*args):
- """ benchmark FulltextIndex """
+ #############################################################
+ # Test full reindexing
+ #############################################################
- cat,files = self.get_catalog()
+ def testUpdates(self,args,kw):
+ """ test reindexing of existing data """
+ self.dispatcher("testUpdates" , ("funcUpdates",4 , () , {} ))
- words = open(dictFile).readlines()
- words = map(lambda x: x[:-1], words)
- ct=[]
- for i in range(searchIterations):
- ct.append( words[whrandom.randint(0,len(words)-1)])
-
- T = Timer('testFulltextIndex')
- for i in range(searchIterations):
- res = cat.searchResults( {"content" : ct[i] } )
-
- T.end()
- self.threads[thread.get_ident()] = 1
-
-
- def testUpdates(self,*args):
+ def funcUpdates(self,*args):
""" benchmark catalog/uncatalog operations """
conflicts = 0
- cat,files = self.get_catalog()
+ cat,msg_ids = self.get_catalog()
- T = Timer('testUpdates of objects (100 iterations)')
+ env = self.th_setup()
+
for i in range(updateIterations):
- r = whrandom.randint(0,len(files)-1)
- f = files[r]
+ r = whrandom.randint(0,len(msg_ids)-1)
try:
- cat.uncatFile(f)
- cat.catFile(f)
+ cat.uncatMessage(msg_ids[r])
+ cat.catalogObject("This test sucks",r)
if i%10 ==0: get_transaction().commit()
except ZODB.POSException.ConflictError:
-# print sys.exc_type,sys.exc_value
+ print sys.exc_type,sys.exc_value
conflicts = conflicts + 1
try:
@@ -386,94 +446,94 @@
except:
conflicts = conflicts + 1
- T.end()
- self.conflicts[thread.get_ident()] = conflicts
- self.threads[thread.get_ident()] = 1
+ self.th_teardown(env,conflicts=conflicts)
+ #############################################################
+ # Test full reindexing
+ #############################################################
- def get_catalog(self):
- """ return a catalog object """
+ def testReindexing(self,args,kw):
+ """ test reindexing of existing data """
+ self.dispatcher("testReindexing" , ("funcReindexing",1 , (mbox,1000) , {} ))
- # depended we are running in multithreaded mode we must take
- # care how threads open the ZODB
- if thread.get_ident()==mainThreadID:
- cat = self.catalog._catalog
- files = self.catalog.files
- else:
- connection = self.zodb.db.open()
- root = connection.root()
- cat = root["catalog"]._catalog
- files = root['catalog'].files
+ def funcReindexing(self,mbox,numfiles=100):
+ """ test reindexing of existing data """
- return cat,files
+ conflicts = 0
+ cat,msg_ids = self.get_catalog()
+ env = self.th_setup()
- def testSpeed(self,num):
- """ wrapper to start multiple threads of the test functions """
+ mb = mailbox.UnixMailbox(open(mbox,"r"))
+ i = 0
- self.threads = {}
- self.conflicts = {}
+ msg = mb.next()
+ while msg and i<numfiles:
- if num==1: f = self.testFulltextIndex
- elif num==2: f = self.testKeywordIndex
- elif num==3: f = self.testFieldIndex
- elif num==4: f = self.testFieldRangeIndex
- elif num==5: f = self.testKeywordRangeIndex
- elif num==6: f = self.testUpdates
+ obj = testMessage(msg)
+ mid = msg.dict["message-id"]
- self.zodb.db.close()
- self.zodb = testZODB('data/work/Data.fs',open=0)
+ try:
+ cat.catalogObject(obj,mid)
+ get_transaction().commit()
+ except:
+ conflicts = conflicts + 1
- for i in range(numThreads):
- t = thread.start_new_thread(f,(None,))
+ msg = mb.next()
+ i = i+1
+ if i%100==0: print i
- while len(self.threads) != numThreads: time.sleep(1)
+ env = self.th_teardown(env,conflicts=conflicts)
- if num==6:
- for k,v in self.conflicts.items():
- myLOG('Conflicts TH%d : %d' % (k,v) )
+ #############################################################
+ # Test full reindexing
+ #############################################################
+
+ def testIncrementalIndexing(self,args,kw):
+ """ testing incremental indexing """
+ self.dispatcher("testIncrementalIndexing" , ("funcReindexing",1, (mbox2,1000) , {}))
- def testSpeed1(self):
- """ thread benchmark FulltextIndex """
- self.testSpeed(1)
- def testSpeed2(self):
- """ thread benchmark KeywordIndex """
- self.testSpeed(2)
+ def get_catalog(self):
+ """ return a catalog object """
- def testSpeed3(self):
- """ thread benchmark FieldIndex """
- self.testSpeed(3)
+ # depended we are running in multithreaded mode we must take
+ # care how threads open the ZODB
- def testSpeed4(self):
- """ thread benchmark FieldRangeIndex """
- self.testSpeed(4)
+ if thread.get_ident()==mainThreadID:
+ cat = self.catalog._catalog
+ msg_ids = self.catalog.msg_ids
+ else:
+ connection = self.zodb.db.open()
+ root = connection.root()
+ cat = root["catalog"]._catalog
+ msg_ids = root['catalog'].msg_ids
- def testSpeed5(self):
- """ thread benchmark Keyword & RangeIndex """
- self.testSpeed(5)
+ return cat,msg_ids
- def testSpeed6(self):
- """ thread benchmark catalog/uncatalog operations"""
- self.testSpeed(6)
+
+
def usage(program):
print "Usage: "
print
print "initalize the test catalog: %s -i -f <maximum number files to use> [-d <data directory>] " % program
- print "to run the tests: %s -t -f <maximum number files to use> [-n <number of threads>]" % program
+ print "to run the basic tests: %s -b -f <maximum number files to use> [-n <number of threads>]" % program
+ print "to run the advanced tests: %s -a -f <maximum number files to use> [-n <number of threads>]" % program
if __name__ == '__main__':
# sys.setcheckinterval(-1)
+
+ mainThreadID = thread.get_ident()
- opts,args = getopt.getopt(sys.argv[1:],"hitd:n:f:",['help'])
+ opts,args = getopt.getopt(sys.argv[1:],"hiabn:f:",['help'])
opts.sort()
optsLst = map(lambda x: x[0],opts)
@@ -482,7 +542,6 @@
for k,v in opts:
if k in ['-h','--help'] : usage(os.path.basename(sys.argv[0])); sys.exit(0)
- if k == "-d": testdataDir = v
if k == "-n": numThreads = string.atoi(v)
if k == "-f": maxFiles = string.atoi(v)
@@ -490,64 +549,48 @@
if '-i' in optsLst:
- if not os.path.exists(dataDir): os.makedirs(dataDir)
-
- print "Initalizing ZODB"
- os.system("rm -f %s/*" % dataDir)
- zodb = testZODB("%s/Data_orig.fs" % dataDir)
-
- print "parsing and reading testdata....please wait (%s)" % testdataDir
- tc = testCatalog( testdataDir )
-
- print "writing Catalog to ZODB"
- zodb.write("catalog" , tc)
-
- print tc.num_files, "files read"
-
- print "Initalization complete"
-
- sys.exit(0)
-
-
- if '-t' in optsLst:
-
- mainThreadID = thread.get_ident()
-
- myLOG('-'*80)
- myLOG('treads=%d searchiterations=%d' % (numThreads,searchIterations))
- myLOG('updateiterations=%d maxfiles=%d' % (updateIterations,maxFiles))
-
- s_tests = [
- testSearches("testFulltextIndex"),
- testSearches("testKeywordIndex"),
- testSearches("testFieldIndex"),
- testSearches("testFieldRangeIndex"),
- testSearches("testKeywordRangeIndex"),
- testSearches("testUpdates")
- ]
-
- m_tests = [
- testSearches("testSpeed1"),
- testSearches("testSpeed2"),
- testSearches("testSpeed3"),
- testSearches("testSpeed4"),
- testSearches("testSpeed5"),
- testSearches("testSpeed6"),
- ]
+ tests = [ BuildEnv("buildTestEnvironment") ]
- print "Original size of ZODB"
- os.system("ls -la %s/Data_*" % dataDir)
+ testsuite = TestSuite()
+ for x in tests: testsuite.addTest(x)
- testsuite1 = TestSuite()
- for x in s_tests: testsuite1.addTest(x)
+ runner = TextTestRunner()
+ runner.run(testsuite)
- testsuite2 = TestSuite()
- for x in m_tests: testsuite2.addTest(x)
-
- runner = TextTestRunner()
- runner.run(testsuite1)
- runner.run(testsuite2)
+ sys.exit(0)
+
+
+ if '-b' in optsLst:
- print "size of modified ZODB"
- os.system("ls -la data/work/Data.*")
+ basic_tests = [
+ testSearches("testFulltextIndex",numThreads=1),
+ testSearches("testFulltextIndex",numThreads= 4),
+ testSearches("testFieldIndex",numThreads= 1),
+ testSearches("testFieldIndex",numThreads= 4),
+ testSearches("testFieldRangeIndex",numThread= 1),
+ testSearches("testFieldRangeIndex",numThreads= 4),
+ testSearches("testKeywordIndex",numThreads= 1),
+ testSearches("testKeywordIndex",numThreads= 4),
+ testSearches("testKeywordRangeIndex",numThreads= 1),
+ testSearches("testKeywordRangeIndex",numThreads=4)
+ ]
+
+ testsuite1 = TestSuite()
+ for x in basic_tests: testsuite1.addTest(x)
+
+ runner = TextTestRunner()
+ runner.run(testsuite1)
+
+ if '-a' in optsLst:
+
+ basic_tests = [
+ testSearches("testUpdates",(),{"numThreads" : 4}),
+ testSearches("testReindexing",(),{"numThreads" : 1}),
+ testSearches("testIncrementalIndexing",(),{"numThreads" : 1})
+ ]
+ testsuite1 = TestSuite()
+ for x in basic_tests: testsuite1.addTest(x)
+
+ runner = TextTestRunner()
+ runner.run(testsuite1)
--- Updated File testCatalog.py in package Zope2 --
--- testCatalog.py 2001/03/05 15:28:52 1.1.4.3
+++ testCatalog.py 2001/03/08 12:14:27 1.1.4.4
@@ -7,59 +7,25 @@
Andreas Jung, andreas@digicool.com
$Log$
- Revision 1.1.4.3 2001/03/05 15:28:52 andreas
- update
-
- Revision 1.1.2.16 2001/03/05 15:14:51 andreas
- - minor changes in testing catalog/uncatalogObject
- - tests must now be started in the lib/python directory
- - older input sets are no longer valid (must be recreated)
-
- Revision 1.1.2.15 2001/03/02 17:03:03 andreas
- changed default settings
-
- Revision 1.1.2.14 2001/03/02 15:16:47 andreas
- version for release
-
- Revision 1.1.2.13 2001/03/02 00:41:33 andreas
- SHould now be a "final" version
-
- Revision 1.1.2.12 2001/03/01 23:46:16 andreas
- complete thread handling rewrite
-
- Revision 1.1.2.11 2001/03/01 18:35:50 andreas
- simple tests are now doing benchmarks
-
- Revision 1.1.2.10 2001/02/28 20:23:23 andreas
+ Revision 1.1.4.4 2001/03/08 12:14:27 andreas
minor changes
- Revision 1.1.2.9 2001/02/28 18:39:20 andreas
- misc changes
+ Revision 1.1.2.20 2001/03/07 14:58:40 andreas
+ *** empty log message ***
- Revision 1.1.2.8 2001/02/28 16:51:32 andreas
- added benchmarks
+ Revision 1.1.2.19 2001/03/07 14:07:51 andreas
+ Code cleanup
- Revision 1.1.2.7 2001/02/28 16:02:15 andreas
- fixed bug in generation of keywords index
+ Revision 1.1.2.18 2001/03/07 12:46:32 andreas
+ added advanced tests
- Revision 1.1.2.6 2001/02/28 15:31:19 andreas
- updated tests
+ Revision 1.1.2.17 2001/03/07 10:28:27 andreas
+ reworked version now using the new thread dispatcher
- Revision 1.1.2.5 2001/02/27 21:06:18 andreas
- minor changes
-
- Revision 1.1.2.4 2001/02/27 20:43:08 andreas
- added -d option
-
- Revision 1.1.2.3 2001/02/27 20:26:24 andreas
- added prelimary stress test
-
- Revision 1.1.2.2 2001/02/27 19:33:55 andreas
- detabbed version
-
- Revision 1.1.2.1 2001/02/27 19:27:58 andreas
- first lame version
-
+ Revision 1.1.2.16 2001/03/05 15:14:51 andreas
+ - minor changes in testing catalog/uncatalogObject
+ - tests must now be started in the lib/python directory
+ - older input sets are no longer valid (must be recreated)
"""
@@ -79,16 +45,14 @@
import Zope
import ZODB, ZODB.FileStorage
from Products.ZCatalog import Catalog,Vocabulary
-from SearchIndex.UnIndex import UnIndex
-from SearchIndex.UnTextIndex import UnTextIndex
-from SearchIndex.UnKeywordIndex import UnKeywordIndex
-from SearchIndex.Lexicon import Lexicon, stop_word_dict
import Persistence
import ExtensionClass
+from Testing import dispatcher
+import keywords
from zLOG import LOG
-import getopt,whrandom,thread,time,string
-from unittest import TestCase, TestSuite, TextTestRunner
+import getopt,whrandom,thread,time,string,mailbox,rfc822
+from Testing.unittest import TestCase, TestSuite, TextTestRunner
# maximum number of files to read for the test suite
@@ -97,53 +61,23 @@
# maximum number of threads for stress testa
numThreads = 4
-# directory where we can find some stuff to index
-testdataDir = "/work/testdata"
-# dictionary with test words
-dictFile = "/usr/share/dict/words"
-
# number of iterations for searches
searchIterations = 1000
# number of iterations for catalog/uncatalog operations
updateIterations = 100
+# input mailbox file
+mbox = "/usr/home/andreas/zope.mbox"
+mbox2 = "/usr/home/andreas/python.mbox"
+
#
# Don't change anything below
#
-def myLOG(*args):
- args = map(str,args)
- LOG('catalog',0,'bench', string.join(args , ' '))
- open('bench.log','a').write( string.join(args," ") + "\n")
-
-class Timer:
-
- def __init__(self,name=''):
- self.name = name
- self.start()
-
- def start(self):
- self.ts = time.time()
-
- def end(self):
- self.te = time.time()
- if thread.get_ident() == mainThreadID:
- myLOG('bench THMain ' , self.__repr__())
- else:
- myLOG('bench TH%-6s' % thread.get_ident(),self.__repr__())
-
-
- def __repr__(self):
- return "%-60s: %8.3f sec" % (self.name,self.te-self.ts)
-
- def __str__(self):
- return self.__repr__()
-
-
class testZODB:
""" some wrapper stuff around ZODB """
@@ -173,61 +107,142 @@
class testCatalog(Persistence.Persistent,TestCase):
""" Wrapper around the catalog stuff """
- def __init__(self,dname):
- self.files = []
- self.dname = dname
- os.path.walk(dname,self.walkf,())
+ def __init__(self,mboxname):
+ self.msg_ids = []
self.num_files = 0
+ self.keywords = []
self._vocabulary = Vocabulary.Vocabulary('Vocabulary','Vocabulary', globbing=1)
self._catalog = Catalog.Catalog()
+ self._catalog.addIndex('to', 'TextIndex')
+ self._catalog.addIndex('sender', 'TextIndex')
+ self._catalog.addIndex('subject', 'TextIndex')
self._catalog.addIndex('content', 'TextIndex')
self._catalog.addIndex('file_id', 'TextIndex')
self._catalog.addColumn('file_id')
- self._catalog.addIndex('length', 'FieldIndex')
- self._catalog.addIndex('modtime', 'FieldIndex')
+ self._catalog.addIndex('length', 'FieldIndex')
+ self._catalog.addColumn('length')
+ self._catalog.addIndex('date', 'FieldIndex')
self._catalog.addIndex('keywords', "KeywordIndex")
-
- for i in range(len(self.files)):
- f = self.files[i]
- self.catFile( f )
- print i,'/',len(self.files),f
+
+ self.build_catalog(mboxname)
+
+
+ def build_catalog(self,mboxname):
+
+ mb = mailbox.UnixMailbox(open(mboxname,"r"))
+ i = 0
+
+ msg = mb.next()
+ while msg and self.num_files<maxFiles:
+ self.catMessage(msg)
+ self.msg_ids.append(msg.dict["message-id"])
+
+ msg = mb.next()
self.num_files = self.num_files + 1
+ if self.num_files % 100==0: print self.num_files
+
+ sub = string.split(msg.dict["subject"])
+ for s in sub:
+ if not s in self.keywords: self.keywords.append(s)
self._catalog.aq_parent = None
-
- def catFile(self,f):
- self._catalog.catalogObject( testFile(f) , f)
+
+ def catMessage(self,m):
+ print m.dict["message-id"]
+ self._catalog.catalogObject( testMessage(m) , m.dict["message-id"] )
- def uncatFile(self,uid):
+ def uncatMessage(self,uid):
self._catalog.uncatalogObject( uid )
- def walkf(self,arg,dirname,names):
- """ used to collect all files inside a file hierarchy """
- for n in names:
- if len(self.files) < maxFiles:
- if os.path.isfile(os.path.join(dirname,n)): self.files.append(os.path.join(dirname,n))
-
-
-class testFile(ExtensionClass.Base):
+class testMessage(ExtensionClass.Base):
+
+ def __init__(self,msg):
- def __init__(self,fname):
- self.content = open(fname,'r').read()
- self.file_id = fname
- self.length = os.stat(fname)[6]
- self.modtime = os.stat(fname)[8]
- self.keywords = filter(lambda x: x!="",string.split(fname , "/")) # Hack !!!
+ self.sender = msg.dict.get("from","")
+ self.subject = msg.dict.get("subject","")
+ self.to = msg.dict.get("to","")
+ self.content = str(msg)
+ self.keywords= string.split(self.subject , " ")
+
+ self.file_id = msg.dict.get("message-id","")
+
+ self.length = len(str(msg))
+ date = msg.dict.get("date","")
+ try:
+ self.date = time.mktime(rfc822.parsedate(date)[:9])
+ except: pass
def __del__(self):
- self.content = self.file_id = None
+ pass
+
+class BuildEnv(dispatcher.Dispatcher,TestCase):
+ """ build environment """
+
+ def __init__(self,func):
+
+ TestCase.__init__(self,func)
+ dispatcher.Dispatcher.__init__(self)
+
+ self.init_phase = 0
+
+ self.setlog( open("dispatcher.log","a") )
+ self.logn('treads=%d searchiterations=%d' % (numThreads,searchIterations))
+ self.logn('updateiterations=%d maxfiles=%d' % (updateIterations,maxFiles))
+
+ #############################################################
+ # Build up ZODB
+ #############################################################
+
+ def buildTestEnvironment(self,*args):
+ self.init_phase = 1
+ self.dispatcher("funcTestEnvironment",("funcTestEnvironment",1,(),{}))
+
+
+ def funcTestEnvironment(self,*args):
+
+ env = self.th_setup()
+
+ if not os.path.exists(dataDir): os.makedirs(dataDir)
+ os.system("rm -f %s/*" % dataDir)
+ zodb = testZODB("%s/Data_orig.fs" % dataDir)
+
+ print "parsing and reading mailbox file %s....please wait" % mbox
+ tc = testCatalog( mbox )
+
+ print "writing Catalog to ZODB"
+ zodb.write("catalog" , tc)
+
+ print "Creating keywords file"
+ kw = keywords.Keywords()
+ kw.build(mbox,1000)
+
+
+ print tc.num_files, "files read"
+ print "Initalization complete"
+
+ self.th_teardown(env)
+
-class testSearches(TestCase):
+class testSearches(dispatcher.Dispatcher,TestCase):
""" test searches """
+ def __init__(self,func,*args,**kw):
+
+ TestCase.__init__(self,func,args,kw)
+ dispatcher.Dispatcher.__init__(self)
+
+ self.init_phase = 0
+
+ self.setlog( open("dispatcher.log","a") )
+ self.logn('treads=%d searchiterations=%d' % (numThreads,searchIterations))
+ self.logn('updateiterations=%d maxfiles=%d' % (updateIterations,maxFiles))
+
+
def setUp(self):
os.system("rm -fr data/work")
if not os.path.exists("data/work"): os.makedirs("data/work")
@@ -238,147 +253,192 @@
self.threads = {}
self.conflicts = {}
-
+ kw = keywords.Keywords()
+ kw.reload()
+ self.keywords = kw.keywords()
+
+ self.logn("-" * 80)
+ self.log_zodb_size("before")
+
+
def tearDown(self):
+ self.log_zodb_size("after")
del self.zodb
self.zodb = self.catalog = None
+
+ def log_zodb_size(self,s):
+ self.logn("Size of ZODB (data/work/Data.fs) %s test : %s" % (s,self.size2size(os.stat("data/work/Data.fs")[6])) )
+
+
+ def size2size(self,n):
+ import math
+ if n <1024.0: return "%8.3lf Bytes" % n
+ if n <1024.0*1024.0: return "%8.3lf KB" % (1.0*n/1024.0)
+ if n <1024.0*1024.0*1024.0: return "%8.3lf MB" % (1.0*n/1024.0/1024.0)
+
+
+ #############################################################
+ # Fulltext test
+ #############################################################
+
+
+ def testFulltextIndex(self,args,kw):
+ """ benchmark FulltextIndex """
+ self.dispatcher('funcFulltextIndex' , ('funcFulltextIndex', kw["numThreads"] , () , {} ) )
+
+
+ def funcFulltextIndex(self,*args):
+ """ benchmark FulltextIndex """
- def testFieldIndex(self,*args):
+ cat,msg_ids = self.get_catalog()
+
+ env = self.th_setup()
+
+ for kw in self.keywords:
+ res = cat.searchResults( {"content" : kw } )
+
+ self.th_teardown(env)
+
+
+ #############################################################
+ # Field index test
+ #############################################################
+
+ def testFieldIndex(self,args,kw):
+ """ benchmark field index"""
+ self.dispatcher('funcFieldIndex' , ('funcFieldIndex',kw["numThreads"] , () , {} ) )
+
+
+ def funcFieldIndex(self,*args):
""" benchmark FieldIndex """
- cat,files = self.get_catalog()
+ cat,msg_ids = self.get_catalog()
- T = Timer('testFieldIndex')
+ env = self.th_setup()
for i in range(0,searchIterations):
res = cat.searchResults( {"length" : i } )
for r in res:
- assert i==os.stat(r.file_id)[6] , "%s should have size %d but is %s" % (r.file_id,i,os.stat(r.file_id)[6])
+ assert i==r.length , "%s should have size %d but is %s" % (r.file_id,i,r.length)
- T.end()
-
- self.threads[thread.get_ident()] = 1
+ self.th_teardown(env)
-
- def testFieldRangeIndex(self,*args):
+ #############################################################
+ # Keyword index test
+ #############################################################
+
+ def testKeywordIndex(self,args,kw):
+ """ benchmark Keyword index"""
+ self.dispatcher('funcKeywordIndex' , ('funcKeywordIndex', kw["numThreads"] , () , {} ) )
+
+
+ def funcKeywordIndex(self,*args):
+ """ benchmark KeywordIndex """
+
+ cat,msg_ids = self.get_catalog()
+
+ env = self.th_setup()
+
+ for kw in self.keywords:
+ res = cat.searchResults( {"subject" : kw } )
+# assert len(res) != 0 , "Search result for keyword '%s' is empty" % kw
+
+ self.th_teardown(env)
+
+ #############################################################
+ # Field range index test
+ #############################################################
+
+ def testFieldRangeIndex(self,args,kw):
+ """ benchmark field range index"""
+ self.dispatcher('funcFieldRangeIndex' , ('funcFieldRangeIndex', kw["numThreads"] , () , {} ) )
+
+
+ def funcFieldRangeIndex(self,*args):
""" benchmark FieldRangeIndex """
- cat,files = self.get_catalog()
+ cat,msg_ids = self.get_catalog()
+ env = self.th_setup()
+
rg = []
for i in range(searchIterations):
m = whrandom.randint(0,10000)
n = m + 200
rg.append(m,n)
- T = Timer('testFieldRangeIndex')
results = []
for i in range(searchIterations):
results.append( cat.searchResults( {"length" : rg[i],"length_usage" : "range:min:max" } ))
- T.end()
-
for i in range(searchIterations):
for r in results[i]:
- size = os.stat(r.file_id)[6]
+ size = r.length
assert rg[i][0]<=size and size<=rg[i][1] , "Filesize of %s is out of range (%d,%d)" % (r.file_id,rg[i][0],rg[i][1])
+ self.th_teardown(env)
- self.threads[thread.get_ident()] = 1
- def testKeywordIndex(self,*args):
- """ benchmark KeywordIndex """
-
- cat,files = self.get_catalog()
-
- # Setup a list of all possible keywords
- keywords = []
- for f in cat.files:
- for kw in filter(lambda x: x!="",string.split(f, "/")):
- if len(keywords)<searchIterations and not kw in keywords: keywords.append(kw)
-
- T = Timer('testKeywordIndex')
-
- for kw in keywords:
- res = cat.searchResults( {"keywords" : kw } )
- assert len(res) != 0 , "Search result for keyword '%s' is empty" % kw
-
- T.end()
- self.threads[thread.get_ident()] = 1
+ #############################################################
+ # Keyword + range index test
+ #############################################################
+ def testKeywordRangeIndex(self,args,kw):
+ """ benchmark Keyword range index"""
+ self.dispatcher('funcKeywordRangeIndex' , ('funcKeywordRangeIndex', kw["numThreads"] , () , {} ) )
- def testKeywordRangeIndex(self,*args):
+ def funcKeywordRangeIndex(self,*args):
""" benchmark Keyword & IndexRange search """
- cat,files = self.get_catalog()
+ cat,msg_ids = self.get_catalog()
- # Setup a list of all possible keywords
- keywords = []
- for f in cat.files:
- for kw in filter(lambda x: x!="",string.split(f, "/")):
- if not kw in keywords: keywords.append(kw)
-
rg = []
- for i in range(searchIterations):
+ for i in range(len(self.keywords)):
m = whrandom.randint(0,10000)
n = m + 200
rg.append(m,n)
- T = Timer("testKeywordRangeSearch")
+ env = self.th_setup()
results = []
- for i in range(searchIterations):
- results.append( cat.searchResults( {"keywords":kw[whrandom.randint(0,len(kw)-1)], "length" : rg[i],"length_usage" : "range:min:max" } ))
+ for i in range(len(self.keywords)):
+ results.append( cat.searchResults( {"keywords":self.keywords[i], "length" : rg[i],"length_usage" : "range:min:max" } ))
+ self.th_teardown(env)
- T.end()
- self.threads[thread.get_ident()] = 1
-
-
- def testFulltextIndex(self,*args):
- """ benchmark FulltextIndex """
+ #############################################################
+ # Test full reindexing
+ #############################################################
- cat,files = self.get_catalog()
+ def testUpdates(self,args,kw):
+ """ test reindexing of existing data """
+ self.dispatcher("testUpdates" , ("funcUpdates",4 , () , {} ))
- words = open(dictFile).readlines()
- words = map(lambda x: x[:-1], words)
- ct=[]
- for i in range(searchIterations):
- ct.append( words[whrandom.randint(0,len(words)-1)])
-
- T = Timer('testFulltextIndex')
- for i in range(searchIterations):
- res = cat.searchResults( {"content" : ct[i] } )
-
- T.end()
- self.threads[thread.get_ident()] = 1
-
-
- def testUpdates(self,*args):
+ def funcUpdates(self,*args):
""" benchmark catalog/uncatalog operations """
conflicts = 0
- cat,files = self.get_catalog()
+ cat,msg_ids = self.get_catalog()
- T = Timer('testUpdates of objects (100 iterations)')
+ env = self.th_setup()
+
for i in range(updateIterations):
- r = whrandom.randint(0,len(files)-1)
- f = files[r]
+ r = whrandom.randint(0,len(msg_ids)-1)
try:
- cat.uncatFile(f)
- cat.catFile(f)
+ cat.uncatMessage(msg_ids[r])
+ cat.catalogObject("This test sucks",r)
if i%10 ==0: get_transaction().commit()
except ZODB.POSException.ConflictError:
-# print sys.exc_type,sys.exc_value
+ print sys.exc_type,sys.exc_value
conflicts = conflicts + 1
try:
@@ -386,94 +446,94 @@
except:
conflicts = conflicts + 1
- T.end()
- self.conflicts[thread.get_ident()] = conflicts
- self.threads[thread.get_ident()] = 1
+ self.th_teardown(env,conflicts=conflicts)
+ #############################################################
+ # Test full reindexing
+ #############################################################
- def get_catalog(self):
- """ return a catalog object """
+ def testReindexing(self,args,kw):
+ """ test reindexing of existing data """
+ self.dispatcher("testReindexing" , ("funcReindexing",1 , (mbox,1000) , {} ))
- # depended we are running in multithreaded mode we must take
- # care how threads open the ZODB
- if thread.get_ident()==mainThreadID:
- cat = self.catalog._catalog
- files = self.catalog.files
- else:
- connection = self.zodb.db.open()
- root = connection.root()
- cat = root["catalog"]._catalog
- files = root['catalog'].files
+ def funcReindexing(self,mbox,numfiles=100):
+ """ test reindexing of existing data """
- return cat,files
+ conflicts = 0
+ cat,msg_ids = self.get_catalog()
+ env = self.th_setup()
- def testSpeed(self,num):
- """ wrapper to start multiple threads of the test functions """
+ mb = mailbox.UnixMailbox(open(mbox,"r"))
+ i = 0
- self.threads = {}
- self.conflicts = {}
+ msg = mb.next()
+ while msg and i<numfiles:
- if num==1: f = self.testFulltextIndex
- elif num==2: f = self.testKeywordIndex
- elif num==3: f = self.testFieldIndex
- elif num==4: f = self.testFieldRangeIndex
- elif num==5: f = self.testKeywordRangeIndex
- elif num==6: f = self.testUpdates
+ obj = testMessage(msg)
+ mid = msg.dict["message-id"]
- self.zodb.db.close()
- self.zodb = testZODB('data/work/Data.fs',open=0)
+ try:
+ cat.catalogObject(obj,mid)
+ get_transaction().commit()
+ except:
+ conflicts = conflicts + 1
- for i in range(numThreads):
- t = thread.start_new_thread(f,(None,))
+ msg = mb.next()
+ i = i+1
+ if i%100==0: print i
- while len(self.threads) != numThreads: time.sleep(1)
+ env = self.th_teardown(env,conflicts=conflicts)
- if num==6:
- for k,v in self.conflicts.items():
- myLOG('Conflicts TH%d : %d' % (k,v) )
+ #############################################################
+ # Test full reindexing
+ #############################################################
+
+ def testIncrementalIndexing(self,args,kw):
+ """ testing incremental indexing """
+ self.dispatcher("testIncrementalIndexing" , ("funcReindexing",1, (mbox2,1000) , {}))
- def testSpeed1(self):
- """ thread benchmark FulltextIndex """
- self.testSpeed(1)
- def testSpeed2(self):
- """ thread benchmark KeywordIndex """
- self.testSpeed(2)
+ def get_catalog(self):
+ """ return a catalog object """
- def testSpeed3(self):
- """ thread benchmark FieldIndex """
- self.testSpeed(3)
+ # depended we are running in multithreaded mode we must take
+ # care how threads open the ZODB
- def testSpeed4(self):
- """ thread benchmark FieldRangeIndex """
- self.testSpeed(4)
+ if thread.get_ident()==mainThreadID:
+ cat = self.catalog._catalog
+ msg_ids = self.catalog.msg_ids
+ else:
+ connection = self.zodb.db.open()
+ root = connection.root()
+ cat = root["catalog"]._catalog
+ msg_ids = root['catalog'].msg_ids
- def testSpeed5(self):
- """ thread benchmark Keyword & RangeIndex """
- self.testSpeed(5)
+ return cat,msg_ids
- def testSpeed6(self):
- """ thread benchmark catalog/uncatalog operations"""
- self.testSpeed(6)
+
+
def usage(program):
print "Usage: "
print
print "initalize the test catalog: %s -i -f <maximum number files to use> [-d <data directory>] " % program
- print "to run the tests: %s -t -f <maximum number files to use> [-n <number of threads>]" % program
+ print "to run the basic tests: %s -b -f <maximum number files to use> [-n <number of threads>]" % program
+ print "to run the advanced tests: %s -a -f <maximum number files to use> [-n <number of threads>]" % program
if __name__ == '__main__':
# sys.setcheckinterval(-1)
+
+ mainThreadID = thread.get_ident()
- opts,args = getopt.getopt(sys.argv[1:],"hitd:n:f:",['help'])
+ opts,args = getopt.getopt(sys.argv[1:],"hiabn:f:",['help'])
opts.sort()
optsLst = map(lambda x: x[0],opts)
@@ -482,7 +542,6 @@
for k,v in opts:
if k in ['-h','--help'] : usage(os.path.basename(sys.argv[0])); sys.exit(0)
- if k == "-d": testdataDir = v
if k == "-n": numThreads = string.atoi(v)
if k == "-f": maxFiles = string.atoi(v)
@@ -490,64 +549,48 @@
if '-i' in optsLst:
- if not os.path.exists(dataDir): os.makedirs(dataDir)
-
- print "Initalizing ZODB"
- os.system("rm -f %s/*" % dataDir)
- zodb = testZODB("%s/Data_orig.fs" % dataDir)
-
- print "parsing and reading testdata....please wait (%s)" % testdataDir
- tc = testCatalog( testdataDir )
-
- print "writing Catalog to ZODB"
- zodb.write("catalog" , tc)
-
- print tc.num_files, "files read"
-
- print "Initalization complete"
-
- sys.exit(0)
-
-
- if '-t' in optsLst:
-
- mainThreadID = thread.get_ident()
-
- myLOG('-'*80)
- myLOG('treads=%d searchiterations=%d' % (numThreads,searchIterations))
- myLOG('updateiterations=%d maxfiles=%d' % (updateIterations,maxFiles))
-
- s_tests = [
- testSearches("testFulltextIndex"),
- testSearches("testKeywordIndex"),
- testSearches("testFieldIndex"),
- testSearches("testFieldRangeIndex"),
- testSearches("testKeywordRangeIndex"),
- testSearches("testUpdates")
- ]
-
- m_tests = [
- testSearches("testSpeed1"),
- testSearches("testSpeed2"),
- testSearches("testSpeed3"),
- testSearches("testSpeed4"),
- testSearches("testSpeed5"),
- testSearches("testSpeed6"),
- ]
+ tests = [ BuildEnv("buildTestEnvironment") ]
- print "Original size of ZODB"
- os.system("ls -la %s/Data_*" % dataDir)
+ testsuite = TestSuite()
+ for x in tests: testsuite.addTest(x)
- testsuite1 = TestSuite()
- for x in s_tests: testsuite1.addTest(x)
+ runner = TextTestRunner()
+ runner.run(testsuite)
- testsuite2 = TestSuite()
- for x in m_tests: testsuite2.addTest(x)
-
- runner = TextTestRunner()
- runner.run(testsuite1)
- runner.run(testsuite2)
+ sys.exit(0)
+
+
+ if '-b' in optsLst:
- print "size of modified ZODB"
- os.system("ls -la data/work/Data.*")
+ basic_tests = [
+ testSearches("testFulltextIndex",numThreads=1),
+ testSearches("testFulltextIndex",numThreads= 4),
+ testSearches("testFieldIndex",numThreads= 1),
+ testSearches("testFieldIndex",numThreads= 4),
+ testSearches("testFieldRangeIndex",numThread= 1),
+ testSearches("testFieldRangeIndex",numThreads= 4),
+ testSearches("testKeywordIndex",numThreads= 1),
+ testSearches("testKeywordIndex",numThreads= 4),
+ testSearches("testKeywordRangeIndex",numThreads= 1),
+ testSearches("testKeywordRangeIndex",numThreads=4)
+ ]
+
+ testsuite1 = TestSuite()
+ for x in basic_tests: testsuite1.addTest(x)
+
+ runner = TextTestRunner()
+ runner.run(testsuite1)
+
+ if '-a' in optsLst:
+
+ basic_tests = [
+ testSearches("testUpdates",(),{"numThreads" : 4}),
+ testSearches("testReindexing",(),{"numThreads" : 1}),
+ testSearches("testIncrementalIndexing",(),{"numThreads" : 1})
+ ]
+ testsuite1 = TestSuite()
+ for x in basic_tests: testsuite1.addTest(x)
+
+ runner = TextTestRunner()
+ runner.run(testsuite1)