[Zope-Checkins] CVS: Zope2 - UnTextIndex.py:1.33.2.4.4.6
Jim Fulton
jim@digiciool.com
Mon, 12 Mar 2001 11:46:35 -0500 (EST)
Update of /cvs-repository/Zope2/lib/python/SearchIndex
In directory korak:/tmp/cvs-serv27114
Modified Files:
Tag: Catalog-BTrees-Integration
UnTextIndex.py
Log Message:
Fixed bug in historgram method.
Based on analysis of histogram (for database containing email
messages), words with very few documents (especially one) *do*
predominate. Added back (but reduced) dictionary optimization.
--- Updated File UnTextIndex.py in package Zope2 --
--- UnTextIndex.py 2001/03/02 18:19:56 1.33.2.4.4.5
+++ UnTextIndex.py 2001/03/12 16:46:35 1.33.2.4.4.6
@@ -219,13 +219,14 @@
self._unindex=IOBTree()
convert(_unindex, self._unindex, threshold)
- def histogram(self):
+ def histogram(self, type=type, TupleType=type(())):
"""Return a mapping which provides a histogram of the number of
elements found at each point in the index."""
- histogram = IITreeSet()
+ histogram = IIBucket()
for (key, value) in self._index.items():
- entry = len(value)
+ if type(value) is TupleType: entry=1
+ else: entry = len(value)
histogram[entry] = histogram.get(entry, 0) + 1
return histogram
@@ -273,15 +274,23 @@
indexRow = (documentId, score)
index[entry] = indexRow
else:
- indexRow=IIBTree((indexRow,))
- indexRow[documentId] = score
+ indexRow={
+ indexRow[0]: indexRow[1],
+ documentId: score,
+ }
index[entry] = indexRow
- elif type(indexRow) is DictType:
- indexRow=IIBTree(indexRow)
- indexRow[documentId] = score
- index[entry] = indexRow
else:
- indexRow[documentId] = score
+ if indexRow.get(documentId, -1) != score:
+ # score changed (or new entry)
+
+ if type(indexRow) is DictType:
+ indexRow[documentId] = score
+ if len(indexRow) > 3:
+ # Big enough to give it's own database record
+ indexRow=IIBTree(indexRow)
+ index[entry] = indexRow
+ else:
+ indexRow[documentId] = score
else:
# We don't have any information at this point, so we'll
# put our first entry in, and use a tuple to save space
@@ -392,10 +401,10 @@
del widScores[i]
if widScores:
if type(widScores) is DictType:
- # Update index to use IIBTree for two
- # reasons. 1) we want an IIBTree,
- # 2) dictionart changes aren't seen.
- index[wid]=IIBTree(widScores)
+ if len(widScores) == 1:
+ # convert to tuple
+ widScores = widScores.items()[0]
+ index[wid]=widScores
else:
del index[wid]
except (KeyError, IndexError, TypeError):