[Zope3-checkins]
SVN: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/
Updated IInjection to emphasize indexing of values (for documents),
Jim Fulton
jim at zope.com
Mon Dec 6 10:04:19 EST 2004
Log message for revision 28574:
Updated IInjection to emphasize indexing of values (for documents),
rather than documents.
Added IIndexSearch, which provides a search that returns integer sets
or mappings.
Updated field indexes to provide IIndexSearch as their only search
method.
Replaced the field-index tests with a doctest.
Changed:
A Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/README.txt
U Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/index.py
D Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/tests/
A Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/tests.py
U Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py
-=-
Added: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/README.txt
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/README.txt 2004-12-06 14:50:20 UTC (rev 28573)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/README.txt 2004-12-06 15:04:19 UTC (rev 28574)
@@ -0,0 +1,100 @@
+Field Indexes
+=============
+
+Field indexes index orderable values. Note that they don't check for
+orderability. That is, all of the values added to the index must be
+orderable together. It is up to applications to provide only mutually
+orderable values.
+
+ >>> from zope.index.field import FieldIndex
+
+ >>> index = FieldIndex()
+ >>> index.index_doc(0, 6)
+ >>> index.index_doc(1, 26)
+ >>> index.index_doc(2, 94)
+ >>> index.index_doc(3, 68)
+ >>> index.index_doc(4, 30)
+ >>> index.index_doc(5, 68)
+ >>> index.index_doc(6, 82)
+ >>> index.index_doc(7, 30)
+ >>> index.index_doc(8, 43)
+ >>> index.index_doc(9, 15)
+
+Fied indexes are searched with apply_index. The argument is a tuple
+with a minimum and maximum value:
+
+ >>> index.apply_index((30, 70))
+ IISet([3, 4, 5, 7, 8])
+
+Open-ended ranges can be provided by provinding None as an end point:
+
+ >>> index.apply_index((30, None))
+ IISet([2, 3, 4, 5, 6, 7, 8])
+
+ >>> index.apply_index((None, 70))
+ IISet([0, 1, 3, 4, 5, 7, 8, 9])
+
+ >>> index.apply_index((None, None))
+ IISet([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+
+To do an exact value search, supply equal minimum and maximum values:
+
+ >>> index.apply_index((30, 30))
+ IISet([4, 7])
+
+ >>> index.apply_index((70, 70))
+ IISet([])
+
+Field indexes support basic statistics:
+
+ >>> index.documentCount()
+ 10
+ >>> index.wordCount()
+ 8
+
+Documents can be reindexed:
+
+ >>> index.apply_index((15, 15))
+ IISet([9])
+ >>> index.index_doc(9, 14)
+
+ >>> index.apply_index((15, 15))
+ IISet([])
+ >>> index.apply_index((14, 14))
+ IISet([9])
+
+Documents can be unindexed:
+
+ >>> index.unindex_doc(7)
+ >>> index.documentCount()
+ 9
+ >>> index.wordCount()
+ 8
+ >>> index.unindex_doc(8)
+ >>> index.documentCount()
+ 8
+ >>> index.wordCount()
+ 7
+
+ >>> index.apply_index((30, 70))
+ IISet([3, 4, 5])
+
+Unindexing a document id that isn't present is ignored:
+
+ >>> index.unindex_doc(8)
+ >>> index.unindex_doc(80)
+ >>> index.documentCount()
+ 8
+ >>> index.wordCount()
+ 7
+
+We can also clear the index entirely:
+
+ >>> index.clear()
+ >>> index.documentCount()
+ 0
+ >>> index.wordCount()
+ 0
+
+ >>> index.apply_index((30, 70))
+ IISet([])
Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/index.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/index.py 2004-12-06 14:50:20 UTC (rev 28573)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/index.py 2004-12-06 15:04:19 UTC (rev 28574)
@@ -15,24 +15,25 @@
$Id$
"""
-from persistent import Persistent
+import persistent
from BTrees.IOBTree import IOBTree
from BTrees.OOBTree import OOBTree
-from BTrees.IIBTree import IITreeSet, IISet, union
+from BTrees.IIBTree import IITreeSet, IISet, multiunion
from BTrees.Length import Length
-from types import ListType, TupleType
-from zope.interface import implements
+import zope.interface
-from zope.index.interfaces import IInjection, ISimpleQuery
-from zope.index.interfaces import IStatistics, IRangeQuerying
+from zope.index import interfaces
+class FieldIndex(persistent.Persistent):
-class FieldIndex(Persistent):
+ zope.interface.implements(
+ interfaces.IInjection,
+ interfaces.IStatistics,
+ interfaces.IIndexSearch,
+ )
- implements(IRangeQuerying, IInjection, ISimpleQuery, IStatistics)
-
def __init__(self):
self.clear()
@@ -52,71 +53,47 @@
"""See interface IStatistics"""
return len(self._fwd_index)
- def has_doc(self, docid):
- return bool(self._rev_index.has_key(docid))
-
def index_doc(self, docid, value):
"""See interface IInjection"""
- if self.has_doc(docid): # unindex doc if present
+ rev_index = self._rev_index
+ if docid in rev_index:
+ # unindex doc if present
self.unindex_doc(docid)
- self._insert_forward(docid, value)
- self._insert_reverse(docid, value)
+ # Insert into forward index.
+ set = self._fwd_index.get(value)
+ if set is None:
+ set = IITreeSet()
+ self._fwd_index[value] = set
+ set.insert(docid)
+
+ # increment doc count
+ self._num_docs.change(1)
+
+ # Insert into reverse index.
+ rev_index[docid] = value
+
def unindex_doc(self, docid):
"""See interface IInjection"""
- try: # ignore non-existing docids, don't raise
- value = self._rev_index[docid]
- except KeyError:
- return
+ rev_index = self._rev_index
+ value = rev_index.get(docid)
+ if value is None:
+ return # not in index
- del self._rev_index[docid]
+ del rev_index[docid]
try:
- self._fwd_index[value].remove(docid)
- if len(self._fwd_index[value]) == 0:
- del self._fwd_index[value]
+ set = self._fwd_index[value]
+ set.remove(docid)
except KeyError:
+ # This is fishy, but we don't want to raise an error.
+ # We should probably log something.
pass
- self._num_docs.change(-1)
- def search(self, values):
- "See interface ISimpleQuerying"
- # values can either be a single value or a sequence of
- # values to be searched.
- if isinstance(values, (ListType, TupleType)):
- result = IISet()
- for value in values:
- try:
- r = IISet(self._fwd_index[value])
- except KeyError:
- continue
- # the results of all subsearches are combined using OR
- result = union(result, r)
- else:
- try:
- result = IISet(self._fwd_index[values])
- except KeyError:
- result = IISet()
+ if not set:
+ del self._fwd_index[value]
- return result
+ self._num_docs.change(-1)
- def query(self, querytext, start=0, count=None):
- """See interface IQuerying"""
- res = self.search(querytext)
- if start or count:
- res = res[start:start+count]
- return res
-
- def rangesearch(self, minvalue, maxvalue):
- return IISet(self._fwd_index.keys(minvalue, maxvalue))
-
- def _insert_forward(self, docid, value):
- """Insert into forward index."""
- if not self._fwd_index.has_key(value):
- self._fwd_index[value] = IITreeSet()
- self._fwd_index[value].insert(docid)
- self._num_docs.change(1)
-
- def _insert_reverse(self, docid, value):
- """Insert into reverse index."""
- self._rev_index[docid] = value
+ def apply_index(self, query):
+ return multiunion(self._fwd_index.values(*query))
Copied: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/tests.py (from rev 28563, Zope3/trunk/src/zope/index/field/tests/test_fieldindex.py)
===================================================================
--- Zope3/trunk/src/zope/index/field/tests/test_fieldindex.py 2004-12-04 19:04:40 UTC (rev 28563)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/field/tests.py 2004-12-06 15:04:19 UTC (rev 28574)
@@ -0,0 +1,25 @@
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""Test field index
+
+$Id$
+"""
+
+def test_suite():
+ from zope.testing.doctest import DocFileSuite
+ return DocFileSuite('README.txt')
+
+if __name__=='__main__':
+ import unittest
+ unittest.main(defaultTest='test_suite')
Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py 2004-12-06 14:50:20 UTC (rev 28573)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py 2004-12-06 15:04:19 UTC (rev 28574)
@@ -21,11 +21,13 @@
class IInjection(Interface):
"""Interface for injecting documents into an index."""
- def index_doc(docid, doc):
+ def index_doc(docid, value):
"""Add a document to the index.
docid: int, identifying the document
- doc: the document to be indexed
+
+ value: the value to be indexed
+
return: None
This can also be used to reindex documents.
@@ -35,6 +37,7 @@
"""Remove a document from the index.
docid: int, identifying the document
+
return: None
This call is a no-op if the docid isn't in the index, however,
@@ -45,6 +48,40 @@
"""Unindex all documents indexed by the index
"""
+class IIndexSearch(Interface):
+
+ def apply_index(query):
+ """Apply an index to the given query
+
+ The type if the query is index specific.
+
+ TODO
+ This is somewhat problemetic. It means that application
+ code that calls apply_index has to be aware of the
+ expected query type. This isn't too much of a problem now,
+ as we have no more general query language nor do we have
+ any sort of automatic query-form generation.
+
+ It would be nice to have a system later for having
+ query-form generation or, perhaps, sme sort of query
+ language. At that point, we'll need some sort of way to
+ determine query types, presumably through introspection of
+ the index objects.
+
+ A result is returned that is:
+
+ - An IIBTree or an IIBucket mapping document ids to integer
+ scores for document ids of documents that match the query,
+
+ - An IISet or IITreeSet containing document ids of documents
+ that match the query, or
+
+ - None, indicating that the index could not use the query and
+ that the result should have no impact on determining a final
+ result.
+
+ """
+
class IQuerying(Interface):
"""An index that can be queried by some text and returns a result set."""
@@ -113,22 +150,6 @@
query.
"""
-class IRangeQuerying(Interface):
- """Query over a range of objects."""
-
- def rangesearch(minval, maxval):
- """Execute a range search.
-
- Return an IISet of docids for all docs where
-
- minval <= value <= maxval if minval<=maxval and
- both minval and maxval are not None
-
- Value <= maxval if minval is not None
-
- value >= minval if maxval is not None
- """
-
class IKeywordQuerying(Interface):
"""Query over a set of keywords, seperated by white space."""
More information about the Zope3-Checkins
mailing list