[Zope-CVS] CVS: Products/ZCTextIndex - Index.py:1.1.2.16
Tim Peters
tim.one@comcast.net
Fri, 3 May 2002 01:32:02 -0400
Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv24588
Modified Files:
Tag: TextIndexDS9-branch
Index.py
Log Message:
_get_frequencies(): return a triple instead of a pair, to increase
efficiency (zipping the wids and freqs together by hand just so the
tuples can get picked apart again on the other end is a waste of
precious tuples <wink>).
=== Products/ZCTextIndex/Index.py 1.1.2.15 => 1.1.2.16 ===
def index_doc(self, docid, text, threshold=None):
wids = self._lexicon.sourceToWordIds(text)
- freqs, docweight = self._get_frequencies(wids)
- uniqwids = []
- for wid, f in freqs:
- self._add_wordinfo(wid, f, docid)
- uniqwids.append(wid)
+ uniqwids, freqs, docweight = self._get_frequencies(wids)
+ for i in range(len(uniqwids)):
+ self._add_wordinfo(uniqwids[i], freqs[i], docid)
self._docweight[docid] = docweight
self._docwords[docid] = IISet(uniqwids)
@@ -114,17 +112,20 @@
def _get_frequencies(self, wids):
"""Return individual doc-term weights and docweight."""
# computes w(d, t) for each term, and W(d)
- # returns pairt [(wid0, w(d, wid0)), (wid1, w(d, wid1)), ...], W(d)
+ # return triple
+ # [wid0, wid1, ...]
+ # [w(d, wid0), w(d, wid1), ...],
+ # W(d)
d = {}
for wid in wids:
d[wid] = d.get(wid, 0) + 1
Wsquares = 0.
freqs = []
- for wid, count in d.items():
+ for count in d.values():
f = doc_term_weight(count)
Wsquares += f * f
- freqs.append((wid, scaled_int(f)))
- return freqs, scaled_int(math.sqrt(Wsquares))
+ freqs.append(scaled_int(f))
+ return d.keys(), freqs, scaled_int(math.sqrt(Wsquares))
def _add_wordinfo(self, wid, f, docid):
try: