[Zope-CVS] CVS: Products/ZCTextIndex - Index.py:1.1.2.1
Fred L. Drake, Jr.
fdrake@acm.org
Tue, 30 Apr 2002 16:19:44 -0400
Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv16959
Added Files:
Tag: TextIndexDS9-branch
Index.py
Log Message:
First portion of new index.
=== Added File Products/ZCTextIndex/Index.py ===
##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""Text Index.
Plugin text index for ZCatalog, with relevance ranking.
Revision information:
$Id: Index.py,v 1.1.2.1 2002/04/30 20:19:43 fdrake Exp $
"""
import math
from BTrees.IOBTree import IOBTree
from BTrees.IIBTree import IIBTree, IISet
class Index:
def __init__(self, lexicon, fieldname):
self._lexicon = lexicon
self._fieldname = fieldname
# wid -> ( doc-frequency, { docid -> frequency } )
self._wordinfo = IOBTree()
# docid -> W
self._docweight = IIBTree()
# docid -> [ wid ]
# used for un-indexing
self._docwords = IOBTree()
def index_object(self, docid, obj, threshold=None):
wids = self._lexicon.textToWordIDs(self._get_object_text(obj))
freqs, docweight = self._get_frequencies(wids)
uniqwids = []
for wid, f in freqs:
self._add_wordinfo(wid, f, docid)
uniqwids.append(wid)
self._docweight[docid] = docweight
self._docwords[docid] = IISet(uniqwids)
def unindex_object(self, docid):
wids = self._docwords[docid]
for wid in wids:
self._del_wordinfo(wid, docid)
del self._docwords[docid]
del self._docweight[docid]
def _get_object_text(self, obj):
x = getattr(obj, self._fieldname)
if callable(x):
return x()
else:
return x
def _get_frequencies(self, wids):
d = {}
for wid in wids:
d[wid] = d.get(wid, 0) + 1
Wsquares = 0
freqs = []
for wid, count in d.items():
f = frequency(count)
Wsquares += f ** 2
freqs.append((wid, f))
return freqs, int(math.sqrt(Wsquares))
def _add_wordinfo(self, wid, f, docid):
try:
olddocfreq, map = self._wordinfo[wid]
except KeyError:
olddocfreq = 0
map = IIBTree()
map[docid] = f
self._wordinfo[wid] = olddocfreq + 1, map
def _del_wordinfo(self, wid, docid):
olddocfreq, map = self._wordinfo[wid]
if olddocfreq == 1:
del self._wordinfo[wid]
return
del map[docid]
self._wordinfo[wid] = olddocfreq - 1, map
def frequency(count):
return count