[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - interactiveDemo.py:1.1.2.2
Andreas Jung
andreas@digicool.com
Wed, 13 Feb 2002 15:51:37 -0500
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv1703
Modified Files:
Tag: ajung-textindexng-branch
interactiveDemo.py
Log Message:
code cleanup
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/interactiveDemo.py 1.1.2.1 => 1.1.2.2 ===
from Products.PluginIndexes.TextIndex import TextIndex
from Products.ZCatalog import Catalog
-import os, sys, re,traceback, atexit
+import os, sys, re,traceback, atexit, getopt
import time
import readline
@@ -15,9 +15,6 @@
except IOError: pass
atexit.register(readline.write_history_file,histfile)
-datadir = '/work/html//doc/python-2.2/lib'
-datadir = '/export/html//doc/ZopeBook'
-
class extra: pass
@@ -26,73 +23,103 @@
def __init__(self,txt,path=''):
self.text = txt
self.path = path
-
-ex = extra()
-ex.useSplitter='ZopeSplitter'
-#ex.useStemmer='porter'
-ex.useOperator='and'
-ex.lexicon = None
-ex.useGlobbing=1
-#ex.useProximity='soundex'
-#ex.nearStorage = 'documentLookup'
-ex.nearStorage = 'internal'
+def index_directory(dirname, verbose, timed):
-CAT = Catalog.Catalog("cat")
-CAT.aq_parent = TO('aq_parent')
+ if not dirname: raise RuntimeError,'no directory name'
-TI = TextIndexNG.TextIndexNG('text',ex,caller = CAT)
-CAT.addIndex('text',TI)
+ ex = extra()
+ ex.useSplitter = 'ZopeSplitter'
+ ex.splitterCasefolding = 1
+ ex.useStemmer = None
+ ex.useOperator = 'and'
+ ex.lexicon = None
+ ex.useGlobbing = 0
+ ex.nearDistance = 5
+ ex.useSimilarity = 0
+ ex.stopWords = None
-t1 = TO ('this text is a text')
-t2 = TO ('the quick brown fox jumps over the lazy dog because the dog is quick and jumps quick')
+ CAT = Catalog.Catalog("cat")
+ CAT.aq_parent = TO('aq_parent')
-CAT.catalogObject(t1 , 't1')
-CAT.catalogObject(t1 , 't2')
+ TI = TextIndexNG.TextIndexNG('text',ex, caller = CAT)
+ if verbose: TI.debugOn()
+ else: TI.debugOff()
+ TI.timed_statistics = timed
-files = os.listdir(datadir)
-files.sort()
+ CAT.addIndex('text',TI)
+ CAT.addColumn('text')
+ CAT.addColumn('path')
-ts = time.time()
-bytes = 0
-print '-'*78
+ t1 = TO ('this text is a text')
+ t2 = TO ('the quick brown fox jumps over the lazy dog because the dog is quick and jumps quick')
-for i in range(len(files)):
- f = files[i]
- print >>sys.stderr,f
- fname = os.path.join(datadir,f)
- bytes+=os.stat(fname)[6]
- if not os.path.isfile(fname): continue
- data = open(fname).read()
+ CAT.catalogObject(t1 , 't1')
+ CAT.catalogObject(t1 , 't2')
- T = TO(data,fname)
- CAT.catalogObject(T,fname)
+ files = os.listdir(dirname)
+ files.sort()
-print "%d files, total size: %d" % (len(files), bytes)
-print "Indexing time: %5.3lf" % (time.time() - ts)
-for x in dir(ex):
- print "%25s = %s" % (x,getattr(ex,x))
+ ts = time.time()
+ bytes = 0
+ print '-'*78
+ for i in range(len(files)):
+ f = files[i]
+ print >>sys.stderr,f
+ fname = os.path.join(dirname,f)
+ bytes+=os.stat(fname)[6]
+ if not os.path.isfile(fname): continue
+ data = open(fname).read()
+ T = TO(data,fname)
+ CAT.catalogObject(T,fname)
+ print "%d files, total size: %d" % (len(files), bytes)
+ print "Indexing time: %5.3lf" % (time.time() - ts)
+ for x in dir(ex):
+ print "%25s = %s" % (x,getattr(ex,x))
-while 1:
+ return CAT
- line = raw_input("> ")
-
-
- try:
- res = CAT.searchResults(text={'query':line})
+def interactive_mode(CAT):
- print "Result:"
+ while 1:
- for r in res:
- rid = r.getRID()
- print CAT.paths[rid]
- print r.text,r.path
+ line = raw_input("> ")
+ try:
+ res = CAT.searchResults(text={'query':line})
+
+ print "Result: %d matches" % len(res)
+
+ for i in range(len(res)):
+ r = res[i]
+ rid = r.getRID()
+ print "%-2d %s" % (i, r.path)
+
+
+ except:
+ traceback.print_exc()
+
+
+if __name__== '__main__':
+
+ opts,args = getopt.getopt(sys.argv[1:],'hd:',['help','directory=',\
+ 'verbose','timed'])
+
+ directory = None
+ verbose = 0
+ timed = 0
+
+ for k,v in opts:
+ if k in ['-h','--help']: usage(); sys.exit(1)
+ if k in ['-d','--directory']: directory = v
+ if k in ['--verbose']: verbose = 1
+ if k in ['--timed']: timed = 1
- except:
- traceback.print_exc()
+ cat = index_directory(directory, verbose, timed)
+ interactive_mode(cat)
+