[Zope-Checkins] CVS: Zope/lib/python/Products/ZCTextIndex - BaseIndex.py:1.29.12.1 CosineIndex.py:1.22.74.1 IIndex.py:1.11.70.1 OkapiIndex.py:1.29.74.3
Casey Duncan
casey@zope.com
Thu, 5 Jun 2003 15:02:51 -0400
Update of /cvs-repository/Zope/lib/python/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv14371
Modified Files:
Tag: casey-zctextindex-fewer-conflicts-branch
BaseIndex.py CosineIndex.py IIndex.py OkapiIndex.py
Log Message:
Fix flaw in query mechanism by removing a len(BTree) and replacing it with a cached length lookup. This should improve performance and scalability.
Add document_count method for this purpose. This method is overridden by a BTree.Length.Length object in instances.
Added code to opportunistically cache the length when the index is changed
Added tests for length, and upgrade code
Updated interface
=== Zope/lib/python/Products/ZCTextIndex/BaseIndex.py 1.29 => 1.29.12.1 ===
--- Zope/lib/python/Products/ZCTextIndex/BaseIndex.py:1.29 Tue Feb 4 13:29:41 2003
+++ Zope/lib/python/Products/ZCTextIndex/BaseIndex.py Thu Jun 5 15:02:20 2003
@@ -20,7 +20,7 @@
from BTrees.IOBTree import IOBTree
from BTrees.IIBTree import IIBTree, IIBucket, IITreeSet
from BTrees.IIBTree import intersection, difference
-import BTrees.Length
+from BTrees.Length import Length
from Products.ZCTextIndex.IIndex import IIndex
from Products.ZCTextIndex import WidCode
@@ -83,12 +83,18 @@
self._docwords = IOBTree()
# Use a BTree length for efficient length computation w/o conflicts
- self.length = BTrees.Length.Length()
+ self.length = Length()
+ self.document_count = Length()
def length(self):
"""Return the number of words in the index."""
# This is overridden per instance
return len(self._wordinfo)
+
+ def document_count(self):
+ """Return the number of documents in the index"""
+ # This is overridden per instance
+ return len(self._docweight)
def get_words(self, docid):
"""Return a list of the wordids for a given docid."""
@@ -104,6 +110,11 @@
self._mass_add_wordinfo(wid2weight, docid)
self._docweight[docid] = docweight
self._docwords[docid] = WidCode.encode(wids)
+ try:
+ self.document_count.change(1)
+ except AttributeError:
+ # Upgrade document_count to Length object
+ self.document_count = Length(self.document_count())
return len(wids)
# A subclass may wish to extend or override this. This is for adjusting
@@ -165,6 +176,11 @@
self._del_wordinfo(wid, docid)
del self._docwords[docid]
del self._docweight[docid]
+ try:
+ self.document_count.change(-1)
+ except AttributeError:
+ # Upgrade document_count to Length object
+ self.document_count = Length(self.document_count())
def search(self, term):
wids = self._lexicon.termToWordIds(term)
=== Zope/lib/python/Products/ZCTextIndex/CosineIndex.py 1.22 => 1.22.74.1 ===
--- Zope/lib/python/Products/ZCTextIndex/CosineIndex.py:1.22 Tue May 28 19:42:20 2002
+++ Zope/lib/python/Products/ZCTextIndex/CosineIndex.py Thu Jun 5 15:02:20 2003
@@ -69,7 +69,7 @@
def _search_wids(self, wids):
if not wids:
return []
- N = float(len(self._docweight))
+ N = float(self.document_count())
L = []
DictType = type({})
for wid in wids:
@@ -86,7 +86,7 @@
wids = []
for term in terms:
wids += self._lexicon.termToWordIds(term)
- N = float(len(self._docweight))
+ N = float(self.document_count())
sum = 0.0
for wid in self._remove_oov_wids(wids):
wt = inverse_doc_frequency(len(self._wordinfo[wid]), N)
=== Zope/lib/python/Products/ZCTextIndex/IIndex.py 1.11 => 1.11.70.1 ===
--- Zope/lib/python/Products/ZCTextIndex/IIndex.py:1.11 Wed Aug 14 18:25:14 2002
+++ Zope/lib/python/Products/ZCTextIndex/IIndex.py Thu Jun 5 15:02:20 2003
@@ -20,6 +20,9 @@
"""Interface for an Index."""
def length():
+ """Return the number of words in the index."""
+
+ def document_count():
"""Return the number of documents in the index."""
def get_words(docid):
@@ -62,10 +65,13 @@
"""
def index_doc(docid, text):
- "XXX"
+ """Add a document with the specified id and text to the index. If a
+ document by that id already exists, replace its text with the new
+ text provided
+ """
def unindex_doc(docid):
- "XXX"
+ """Remove the document with the specified id from the index"""
def has_doc(docid):
"""Returns true if docid is an id of a document in the index"""
=== Zope/lib/python/Products/ZCTextIndex/OkapiIndex.py 1.29.74.2 => 1.29.74.3 ===
--- Zope/lib/python/Products/ZCTextIndex/OkapiIndex.py:1.29.74.2 Thu Jun 5 00:41:58 2003
+++ Zope/lib/python/Products/ZCTextIndex/OkapiIndex.py Thu Jun 5 15:02:20 2003
@@ -86,7 +86,7 @@
def _search_wids(self, wids):
if not wids:
return []
- N = float(len(self._docweight)) # total # of docs
+ N = float(self.document_count()) # total # of docs
try:
doclen = self._totaldoclen()
except TypeError:
@@ -135,7 +135,7 @@
def _search_wids(self, wids):
if not wids:
return []
- N = float(len(self._docweight)) # total # of docs
+ N = float(self.document_count()) # total # of docs
try:
doclen = self._totaldoclen()
except TypeError: