[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - TextIndexNG.py:1.2.2.15
Andreas Jung
andreas@digicool.com
Wed, 16 Jan 2002 21:21:00 -0500
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv23290
Modified Files:
Tag: ajung-textindexng-branch
TextIndexNG.py
Log Message:
added detailed timed statistics for every single step in index_object()
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/TextIndexNG.py 1.2.2.14 => 1.2.2.15 ===
import Proximity
import Thesaurus, StopWords
+import time
+
+
+class Timer:
+
+ def __init__(self):
+ self.ts = time.time()
+
+ def __call__(self,s):
+ diff = time.time() - self.ts
+ self.ts = time.time()
+ print "%s: %5.5lf" % (s, diff)
+
class QueryException(Exception): pass
@@ -335,6 +348,9 @@
def index_object(self, documentId, obj, threshold=None):
+
+ T = Timer()
+
try:
source = getattr(obj, self.id)
if callable(source): source = str(source())
@@ -354,17 +370,20 @@
encoding = 'latin1'
+ T("encoding")
+
# Split the text into a list of words
# The splitterfunc just returns an iterator-like object.
words = self._v_splitterfunc(source,encoding=encoding).split()
+ T("Splitter")
# apply stopwords list
# Maybe this should go into a C extension for performance reasons
isStopWord = self._stopwords.has_key
words = filter(lambda x,f=isStopWord: f(x)==0, words)
-
+ T("Stopwords")
# Check if we want proximity searches. If yes, we need to create
# a list containing the proximity representations of the words
@@ -376,11 +395,14 @@
self.insertProximityEntries(proximity_widList,documentId)
+ T("Proximity")
+
# Stem all words in one run
if self._v_stemmerfunc:
words = self._v_stemmerfunc(words)
+ T("Stemmer")
# We pass the list of words to the corresponding lexicon
# and obtain a list of wordIds. The "old" TextIndex iterated
@@ -388,12 +410,15 @@
widLst = self._v_getWordIdList(words)
assert len(widLst)==len(words)
+ T("Widlist")
# insert forward entries
self._v_insertForwardEntry(widLst,None,documentId)
+ T("ForwardEntries")
# insert backward entries
self.insertBackwardEntries(widLst,documentId)
+ T("BackwardEntries")
return len(widLst)