[Zope-CVS] CVS: Products/ZCTextIndex - INbest.py:1.1.2.1 NBest.py:1.1.2.1
Tim Peters
tim.one@comcast.net
Wed, 1 May 2002 17:03:19 -0400
Update of /cvs-repository/Products/ZCTextIndex
In directory cvs.zope.org:/tmp/cvs-serv6649
Added Files:
Tag: TextIndexDS9-branch
INbest.py NBest.py
Log Message:
Adding an NBest object to remember the N best-scoring objects passed to
it. This is intended to be used for final result lists. The dirt-simple
sorted-list + bisect.bisect implementation has the right theoretical
worst-case number of comparisons, but for large N a min-heap would
eventually win on data-movement cost. But don't optimize this
prematurely! I suspect it will work fine as-is.
=== Added File Products/ZCTextIndex/INbest.py ===
##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""NBest Interface.
An NBest object remembers the N best-scoring items ever passed to its
.add(item, score) method. If .add() is called M times, the worst-case
number of comparisons performed overall is M * log2(N).
"""
import Interface
class INBest(Interface.Base):
"""Interface for an N-Best chooser."""
def add(item, score):
"""Record that item 'item' has score 'score'. No return value.
The N best-scoring items are remembered, where N was passed to
the constructor. 'item' can by anything. 'score' should be
a number, and larger numbers are considered better.
"""
def getbest():
"""Return the (at most) N best-scoring items as a sequence.
The return value is a sequence of 2-tuples, (item, score), with
the largest score first. If .add() has been called fewer than
N times, this sequence will contain fewer than N pairs.
"""
def __len__():
"""Return the number of (item, score) pairs currently known.
This is N (the value passed to the constructor), unless .add()
has been called fewer than N times.
"""
def capacity():
"""Return the maximum number of (item, score) pairs.
This is N (the value passed to the constructor).
"""
=== Added File Products/ZCTextIndex/NBest.py ===
##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""NBest
An NBest object remembers the N best-scoring items ever passed to its
.add(item, score) method. If .add() is called M times, the worst-case
number of comparisons performed overall is M * log2(N).
"""
from bisect import bisect
from Products.ZCTextIndex.INBest import INBest
class NBest:
__implements__ = INBest
def __init__(self, N):
"Build an NBest object to remember the N best-scoring objects."
if N < 1:
raise ValueError("NBest() argument must be at least 1")
self._capacity = N
# This does a very simple thing with sorted lists. For large
# N, a min-heap can be unboundedly better in terms of data
# movement time.
self.scores = []
self.items = []
def __len__(self):
return len(self.scores)
def capacity(self):
return self._capacity
def add(self, item, score):
scores = self.scores
# When we're in steady-state, the usual case is that we're filled
# to capacity, and that an incoming item is worse than any of
# the best-seen so far.
if len(scores) >= self._capacity and score <= scores[0]:
return
i = bisect(scores, score)
scores.insert(i, score)
self.items.insert(i, item)
if len(scores) > self._capacity:
del scores[0]
del self.items[0]
def getbest(self):
n = len(self.scores)
return [(self.items[i], self.scores[i])
for i in range(n-1, -1, -1)]