[Zope3-checkins] CVS: Zope3/lib/python/Zope/TextIndex - TextIndexWrapper.py:1.1

Guido van Rossum guido@python.org
Tue, 3 Dec 2002 11:45:24 -0500


Update of /cvs-repository/Zope3/lib/python/Zope/TextIndex
In directory cvs.zope.org:/tmp/cvs-serv26041

Added Files:
	TextIndexWrapper.py 
Log Message:
Primitive TextIndex wrapper.
XXX There still is an issue with batching when the scores are the same.

=== Added File Zope3/lib/python/Zope/TextIndex/TextIndexWrapper.py ===
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Text index wrapper.

This exists to implement IInjection and IQuerying.

$Id: TextIndexWrapper.py,v 1.1 2002/12/03 16:45:23 gvanrossum Exp $
"""

from Persistence import Persistent

from TextIndexInterfaces import IInjection, IQuerying

from Zope.TextIndex.OkapiIndex import OkapiIndex
from Zope.TextIndex.Lexicon import Lexicon
from Zope.TextIndex.Lexicon import Splitter, CaseNormalizer, StopWordRemover
from Zope.TextIndex.QueryParser import QueryParser
from Zope.TextIndex.NBest import NBest

class TextIndexWrapper(Persistent):

    __implements__ = (IInjection, IQuerying)

    def __init__(self, lexicon=None, index=None):
        """Provisional constructor.

        This creates the lexicon and index if not passed in."""
        if lexicon is None:
            lexicon = Lexicon(Splitter(), CaseNormalizer(), StopWordRemover())
        if index is None:
            index = OkapiIndex(lexicon)
        self.lexicon = lexicon
        self.index = index

    # Methods implementing IInjection

    def index_doc(self, docid, text):
        self.index.index_doc(docid, text)
        self._p_changed = 1 # XXX why is this needed?

    def unindex_doc(self, docid):
        self.index.unindex_doc(docid)
        self._p_changed = 1 # XXX why is this needed?

    # Methods implementing IQuerying

    def query(self, querytext, start, count):
        parser = QueryParser(self.lexicon)
        tree = parser.parseQuery(querytext)
        results = tree.executeQuery(self.index)
        if results is None:
            return [], 0
        chooser = NBest(start + count)
        chooser.addmany(results.items())
        batch = chooser.getbest()
        batch = batch[start:]
        qw = 1.0 * self.index.query_weight(tree.terms())
        batch = [(docid, score/qw) for docid, score in batch]
        return batch, len(results)