[Zope3-checkins] CVS: Zope3/lib/python/Zope/TextIndex/tests - mhindex.py:1.2
Guido van Rossum
guido@python.org
Tue, 3 Dec 2002 16:28:32 -0500
Update of /cvs-repository/Zope3/lib/python/Zope/TextIndex/tests
In directory cvs.zope.org:/tmp/cvs-serv31862
Modified Files:
mhindex.py
Log Message:
Changes needed to work with Zope3.
Use TextIndexWrapper instead of our own class.
Fix a bug in unindexing removed messages (it was iterating over a lazy
list that was being modified as we went).
=== Zope3/lib/python/Zope/TextIndex/tests/mhindex.py 1.1 => 1.2 ===
--- Zope3/lib/python/Zope/TextIndex/tests/mhindex.py:1.1 Tue Dec 3 09:28:16 2002
+++ Zope3/lib/python/Zope/TextIndex/tests/mhindex.py Tue Dec 3 16:28:32 2002
@@ -1,4 +1,4 @@
-#! /usr/bin/env python2.1
+#! /usr/bin/env python2.2
"""MH mail indexer.
@@ -40,17 +40,19 @@
from StringIO import StringIO
from stat import ST_MTIME
-DATAFS = "~/.Data.fs"
-ZOPECODE = "~/projects/Zope/lib/python"
+DATAFS = "~/.mhindex.fs"
+ZOPECODE = "~/projects/Zope3/lib/python"
-sys.path.append(os.path.expanduser(ZOPECODE))
+zopecode = os.path.expanduser(ZOPECODE)
+sys.path.insert(0, zopecode)
-from ZODB import DB
+from ZODB.DB import DB
from ZODB.FileStorage import FileStorage
+from Transaction import get_transaction
from Persistence import Persistent
-from BTrees.IOBTree import IOBTree
-from BTrees.OIBTree import OIBTree
-from BTrees.IIBTree import IIBTree
+from Persistence.BTrees.IOBTree import IOBTree
+from Persistence.BTrees.OIBTree import OIBTree
+from Persistence.BTrees.IIBTree import IIBTree
from Zope.TextIndex.NBest import NBest
from Zope.TextIndex.OkapiIndex import OkapiIndex
@@ -58,6 +60,7 @@
from Zope.TextIndex.Lexicon import CaseNormalizer, StopWordRemover
from Zope.TextIndex.QueryParser import QueryParser
from Zope.TextIndex.StopDict import get_stopdict
+from Zope.TextIndex.TextIndexWrapper import TextIndexWrapper
NBEST = 3
MAXLINES = 3
@@ -151,7 +154,7 @@
try:
self.index = self.root["index"]
except KeyError:
- self.index = self.root["index"] = TextIndex()
+ self.index = self.root["index"] = TextIndexWrapper()
try:
self.docpaths = self.root["docpaths"]
except KeyError:
@@ -305,7 +308,7 @@
def timequery(self, text, nbest):
t0 = time.time()
c0 = time.clock()
- results, n = self.index.query(text, nbest)
+ results, n = self.index.query(text, 0, nbest)
t1 = time.time()
c1 = time.clock()
print "[Query time: %.3f real, %.3f user]" % (t1-t0, c1-c0)
@@ -320,11 +323,10 @@
prog = re.compile(pattern, re.IGNORECASE)
print '='*70
rank = lo
- qw = self.index.query_weight(text)
for docid, score in results[lo:hi]:
rank += 1
path = self.docpaths[docid]
- score = 100.0*score/qw
+ score *= 100.0
print "Rank: %d Score: %d%% File: %s" % (rank, score, path)
path = os.path.join(self.mh.getpath(), path)
try:
@@ -465,10 +467,10 @@
self.unindexpath(path)
continue
print "indexing", docid, path
- self.index.index_text(docid, text)
+ self.index.index_doc(docid, text)
self.maycommit()
# Remove messages from the folder that no longer exist
- for path in self.path2docid.keys(f.name):
+ for path in list(self.path2docid.keys(f.name)):
if not path.startswith(f.name + "/"):
break
if self.getmtime(path) == 0:
@@ -483,7 +485,7 @@
del self.doctimes[docid]
del self.path2docid[path]
try:
- self.index.unindex(docid)
+ self.index.unindex_doc(docid)
except KeyError, msg:
print "KeyError", msg
self.maycommit()
@@ -565,37 +567,6 @@
print "packing..."
self.database.pack()
self.pack_count = 0
-
-class TextIndex(Persistent):
-
- def __init__(self):
- self.lexicon = Lexicon(Splitter(), CaseNormalizer(), StopWordRemover())
- self.index = OkapiIndex(self.lexicon)
-
- def index_text(self, docid, text):
- self.index.index_doc(docid, text)
- self._p_changed = 1 # XXX
-
- def unindex(self, docid):
- self.index.unindex_doc(docid)
- self._p_changed = 1 # XXX
-
- def query(self, query, nbest=10):
- # returns a total hit count and a mapping from docids to scores
- parser = QueryParser(self.lexicon)
- tree = parser.parseQuery(query)
- results = tree.executeQuery(self.index)
- if results is None:
- return [], 0
- chooser = NBest(nbest)
- chooser.addmany(results.items())
- return chooser.getbest(), len(results)
-
- def query_weight(self, query):
- parser = QueryParser(self.lexicon)
- tree = parser.parseQuery(query)
- terms = tree.terms()
- return self.index.query_weight(terms)
def reportexc():
traceback.print_exc()