[Zope-CVS] CVS: Products/ZCTextIndex - Index.py:1.1.2.19

Tim Peters tim.one@comcast.net
Fri, 3 May 2002 02:05:34 -0400


Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv6973

Modified Files:
      Tag: TextIndexDS9-branch
	Index.py 
Log Message:
query_term_weight() and its use in search():  the latter was doing
truncating integer division, and then redundantly converting the
result to int.  Instead changed the former to return a float, so the
division can get rounded properly.


=== Products/ZCTextIndex/Index.py 1.1.2.18 => 1.1.2.19 ===
         self._wordinfo = IOBTree()
 
-        # docid -> W
+        # docid -> W(docid)
         self._docweight = IIBTree()
 
         # docid -> [ wid ]
@@ -59,7 +59,8 @@
     #    I(d, q) = the intersection of the terms in d and q.
     #
     #    w(d, t) = 1 + log f(d, t)
-    #        computed by doc_term_weight()
+    #        computed by doc_term_weight(); for a given word t,
+    #        self._wordinfo[t] is a map from d to w(d, t)
     #
     #    w(q, t) = log(1 + N/f(t))
     #        computed by query_term_weight()
@@ -89,13 +90,14 @@
     def search(self, term):
         wids = self._lexicon.termToWordIds(term)
         result = IIBucket()
-        N = len(self._docweight)
+        N = float(len(self._docweight))
         for wid in wids:
-            map = self._wordinfo[wid]
-            idf = query_term_weight(len(map), N)
-            for docid, tf in map.items():
+            d2w = self._wordinfo[wid] # maps docid to w(docid, wid)
+            idf = query_term_weight(len(d2w), N)  # this is an unscaled float
+            for docid, tf in d2w.items():
+                # scaled int * unscaled flost / scaled int -> unscaled float
                 w = tf * idf / self._docweight[docid]
-                result[docid] = int(result.get(docid, 0) + w)
+                result[docid] = result.get(docid, 0) + scaled_int(w)
         return result
 
     def query_weight(self, terms):
@@ -173,4 +175,4 @@
     total items.
     """
     # implements w(q, t) = log(1 + N/f(t))
-    return scaled_int(math.log(1 + float(num_items) / term_count))
+    return math.log(1. + float(num_items) / term_count)