[Zope-CVS] CVS: Products/ZCTextIndex - Index.py:1.1.2.19
Tim Peters
tim.one@comcast.net
Fri, 3 May 2002 02:05:34 -0400
Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv6973
Modified Files:
Tag: TextIndexDS9-branch
Index.py
Log Message:
query_term_weight() and its use in search(): the latter was doing
truncating integer division, and then redundantly converting the
result to int. Instead changed the former to return a float, so the
division can get rounded properly.
=== Products/ZCTextIndex/Index.py 1.1.2.18 => 1.1.2.19 ===
self._wordinfo = IOBTree()
- # docid -> W
+ # docid -> W(docid)
self._docweight = IIBTree()
# docid -> [ wid ]
@@ -59,7 +59,8 @@
# I(d, q) = the intersection of the terms in d and q.
#
# w(d, t) = 1 + log f(d, t)
- # computed by doc_term_weight()
+ # computed by doc_term_weight(); for a given word t,
+ # self._wordinfo[t] is a map from d to w(d, t)
#
# w(q, t) = log(1 + N/f(t))
# computed by query_term_weight()
@@ -89,13 +90,14 @@
def search(self, term):
wids = self._lexicon.termToWordIds(term)
result = IIBucket()
- N = len(self._docweight)
+ N = float(len(self._docweight))
for wid in wids:
- map = self._wordinfo[wid]
- idf = query_term_weight(len(map), N)
- for docid, tf in map.items():
+ d2w = self._wordinfo[wid] # maps docid to w(docid, wid)
+ idf = query_term_weight(len(d2w), N) # this is an unscaled float
+ for docid, tf in d2w.items():
+ # scaled int * unscaled flost / scaled int -> unscaled float
w = tf * idf / self._docweight[docid]
- result[docid] = int(result.get(docid, 0) + w)
+ result[docid] = result.get(docid, 0) + scaled_int(w)
return result
def query_weight(self, terms):
@@ -173,4 +175,4 @@
total items.
"""
# implements w(q, t) = log(1 + N/f(t))
- return scaled_int(math.log(1 + float(num_items) / term_count))
+ return math.log(1. + float(num_items) / term_count)