[Zope3-checkins] SVN: Zope3/branches/jim-index-restructure-2004-12/src/zope/ - Removed the unused pipeline-element framework. WHUI

Jim Fulton jim at zope.com
Tue Dec 7 13:15:58 EST 2004


Log message for revision 28577:
  - Removed the unused pipeline-element framework. WHUI
  
  - Moved the nbest code out of text, as it should generally be
    used by applications that call indexes, not by the indexes
    themselves.
  
  - Moved the text-indexing interfaces into text/interfaces.py.
  
  - Converted the interfaces package into a module
  

Changed:
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/app/catalog/README.txt
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/app/zptpage/textindex/tests.py
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/app/zptpage/textindex/zptpage.py
  D   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py
  D   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/lexicon.py
  D   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/nbest.py
  D   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/pipelineelement.py
  D   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/pipelineelementfactory.py
  D   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/queryparser.py
  D   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/queryparsetree.py
  D   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/searchabletext.py
  D   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/splitter.py
  A   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces.py
  A   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/nbest.py
  A   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/tests.py
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/htmlsplitter.py
  A   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/interfaces.py
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/lexicon.py
  D   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/nbest.py
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/parsetree.py
  D   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/pipelinefactory.py
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/queryparser.py
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/setops.py
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/queryhtml.py
  D   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_nbest.py
  D   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_pipelinefactory.py
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_queryparser.py
  U   Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/textindexwrapper.py

-=-
Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/app/catalog/README.txt
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/app/catalog/README.txt	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/app/catalog/README.txt	2004-12-07 18:15:57 UTC (rev 28577)
@@ -189,20 +189,20 @@
         searchableText. 
 
 For text indexes, one generally uses
-`zope.index.interfaces.searchabletext.ISearchableText`,
+`zope.index.text.interfaces.ISearchableText`,
 `getSearchableText` and True.
 
   >>> print http(r"""
   ... POST /++etc++site/default/Catalog/+/AddTextIndex%3D HTTP/1.1
   ... Authorization: Basic bWdyOm1ncnB3
-  ... Content-Length: 1003
+  ... Content-Length: 1008
   ... Content-Type: multipart/form-data; boundary=---------------------------12609588153518590761493918424
   ... Referer: http://localhost:8081/++etc++site/default/Catalog/+/AddTextIndex=
   ... 
   ... -----------------------------12609588153518590761493918424
   ... Content-Disposition: form-data; name="field.interface"
   ... 
-  ... zope.index.interfaces.searchabletext.ISearchableText
+  ... zope.index.text.interfaces.ISearchableText
   ... -----------------------------12609588153518590761493918424
   ... Content-Disposition: form-data; name="field.interface-empty-marker"
   ... 

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/app/zptpage/textindex/tests.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/app/zptpage/textindex/tests.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/app/zptpage/textindex/tests.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -16,7 +16,7 @@
 $Id$
 """
 
-from zope.index.interfaces.searchabletext import ISearchableText
+from zope.index.text.interfaces import ISearchableText
 from zope.app.tests import ztapi
 from zope.app.tests.placelesssetup import PlacelessSetup
 from zope.app.zptpage.interfaces import IZPTPage

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/app/zptpage/textindex/zptpage.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/app/zptpage/textindex/zptpage.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/app/zptpage/textindex/zptpage.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -17,7 +17,7 @@
 
 from zope.interface import implements
 from zope.app.zptpage.interfaces import IZPTPage
-from zope.index.interfaces.searchabletext import ISearchableText
+from zope.index.text.interfaces import ISearchableText
 import re
 
 tag = re.compile(r"<[^>]+>")

Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,204 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""Basic interfaces shared between different types of index.
-
-$Id$
-"""
-from zope.interface import Interface
-
-
-class IInjection(Interface):
-    """Interface for injecting documents into an index."""
-
-    def index_doc(docid, value):
-        """Add a document to the index.
-
-        docid: int, identifying the document
-
-        value: the value to be indexed
-
-        return: None
-
-        This can also be used to reindex documents.
-        """
-
-    def unindex_doc(docid):
-        """Remove a document from the index.
-
-        docid: int, identifying the document
-
-        return: None
-
-        This call is a no-op if the docid isn't in the index, however,
-        after this call, the index should have no references to the docid.
-        """
-
-    def clear():
-        """Unindex all documents indexed by the index
-        """
-
-class IIndexSearch(Interface):
-
-    def apply(query):
-        """Apply an index to the given query
-
-        The type if the query is index specific.
-
-        TODO
-            This is somewhat problemetic. It means that application
-            code that calls apply has to be aware of the
-            expected query type. This isn't too much of a problem now,
-            as we have no more general query language nor do we have
-            any sort of automatic query-form generation.
-
-            It would be nice to have a system later for having
-            query-form generation or, perhaps, sme sort of query
-            language. At that point, we'll need some sort of way to
-            determine query types, presumably through introspection of
-            the index objects.
-
-        A result is returned that is:
-
-        - An IIBTree or an IIBucket mapping document ids to integer
-          scores for document ids of documents that match the query,
-
-        - An IISet or IITreeSet containing document ids of documents
-          that match the query, or
-
-        - None, indicating that the index could not use the query and
-          that the result should have no impact on determining a final
-          result.
-
-        """
-
-class IQuerying(Interface):
-    """An index that can be queried by some text and returns a result set."""
-
-    def query(querytext, start=0, count=None):
-        """Execute a query.
-
-        querytext: unicode, the query expression
-        start: the first result to return (0-based)
-        count: the maximum number of results to return (default: all)
-        return: ([(docid, rank), ...], total)
-
-        The return value is a tuple:
-            matches: list of (int, float) tuples, docid and rank
-            total: int, the total number of matches
-
-        The matches list represents the requested batch.  The ranks
-        are floats between 0 and 1 (inclusive).
-        """
-
-class IStatistics(Interface):
-    """An index that provides statistical information about itself."""
-
-    def documentCount():
-        """Return the number of documents currently indexed."""
-
-    def wordCount():
-        """Return the number of words currently indexed."""
-
-
-class IExtendedQuerying(Interface):
-    """An index that supports advanced search setups."""
-
-    def search(term):
-        """Execute a search on a single term given as a string.
-
-        Return an IIBTree mapping docid to score, or None if all docs
-        match due to the lexicon returning no wids for the term (e.g.,
-        if the term is entirely composed of stopwords).
-        """
-
-    def search_phrase(phrase):
-        """Execute a search on a phrase given as a string.
-
-        Return an IIBtree mapping docid to score.
-        """
-
-    def search_glob(pattern):
-        """Execute a pattern search.
-
-        The pattern represents a set of words by using * and ?.  For
-        example, "foo*" represents the set of all words in the lexicon
-        starting with "foo".
-
-        Return an IIBTree mapping docid to score.
-        """
-
-    def query_weight(terms):
-        """Return the weight for a set of query terms.
-
-        'terms' is a sequence of all terms included in the query,
-        although not terms with a not.  If a term appears more than
-        once in a query, it should appear more than once in terms.
-
-        Nothing is defined about what "weight" means, beyond that the
-        result is an upper bound on document scores returned for the
-        query.
-        """
-
-class IKeywordQuerying(Interface):
-    """Query over a set of keywords, seperated by white space."""
-
-    def search(query, operator='and'):
-        """Execute a search given by 'query' as a list/tuple of
-           (unicode) strings against the index. 'operator' can be either
-           'and' or 'or' to search for all keywords or any keyword. 
-
-           Return an IISet of docids
-        """
-
-class ITopicQuerying(Interface):
-    """Query over topics, seperated by white space."""
-
-    def search(query, operator='and'):
-        """Execute a search given by 'query' as a list/tuple of filter ids.
-          'operator' can be 'and' or 'or' to search for matches in all
-           or any filter.
-
-           Return an IISet of docids
-        """
-
-class ISimpleQuery(Interface):
-    """A simple query interface."""
-
-    def query(term, start=0, count=None):
-        """Search for the given term, return a sequence of docids"""
-
-
-class ITopicFilteredSet(Interface):
-    """Interface for filtered sets used by topic indexes."""
-    
-    def clear():
-        """Remove all entries from the index."""
-
-    def index_doc(docid, context):
-        """Add an object's info to the index."""
-
-    def unindex_doc(docid):
-        """Remove an object with id 'docid' from the index."""
-
-    def getId():
-        """Return the id of the filter itself."""
-
-    def setExpression(expr):
-        """Set the filter expression, e.g. 'context.meta_type=='...'"""
-        
-    def getExpression():
-        """Return the filter expression."""
-
-    def getIds():
-        """Return an IISet of docids."""

Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/lexicon.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/lexicon.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/lexicon.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,78 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-"""Lexicon interface
-
-$Id$
-"""
-from zope.interface import Interface
-
-class ILexicon(Interface):
-    """Object responsible for converting text to word identifiers."""
-
-    def termToWordIds(text):
-        """Return a sequence of ids of the words parsed from the text.
-
-        The input text may be either a string or a list of strings.
-
-        Parse the text as if they are search terms, and skips words
-        that aren't in the lexicon.
-        """
-
-    def sourceToWordIds(text):
-        """Return a sequence of ids of the words parsed from the text.
-
-        The input text may be either a string or a list of strings.
-
-        Parse the text as if they come from a source document, and
-        creates new word ids for words that aren't (yet) in the
-        lexicon.
-        """
-
-    def globToWordIds(pattern):
-        """Return a sequence of ids of words matching the pattern.
-
-        The argument should be a single word using globbing syntax,
-        e.g. 'foo*' meaning anything starting with 'foo'.
-
-        Return the wids for all words in the lexicon that match the
-        pattern.
-        """
-
-    def wordCount():
-        """Return the number of unique terms in the lexicon."""
-
-    def get_word(wid):
-        """Return the word for the given word id.
-
-        Raise KeyError if the word id is not in the lexicon.
-        """
-
-    def get_wid(word):
-        """Return the wird id for the given word.
-
-        Return 0 of the word is not in the lexicon.
-        """
-
-    def parseTerms(text):
-        """Pass the text through the pipeline.
-
-        Return a list of words, normalized by the pipeline
-        (e.g. stopwords removed, case normalized etc.).
-        """
-
-    def isGlob(word):
-        """Return true if the word is a globbing pattern.
-
-        The word should be one of the words returned by parseTerm().
-        """

Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/nbest.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/nbest.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/nbest.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,74 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""NBest Interface.
-
-An NBest object remembers the N best-scoring items ever passed to its
-.add(item, score) method.  If .add() is called M times, the worst-case
-number of comparisons performed overall is M * log2(N).
-
-$Id$
-"""
-
-
-from zope.interface import Interface
-
-class INBest(Interface):
-    """Interface for an N-Best chooser."""
-
-    def add(item, score):
-        """Record that item 'item' has score 'score'.  No return value.
-
-        The N best-scoring items are remembered, where N was passed to
-        the constructor.  'item' can by anything.  'score' should be
-        a number, and larger numbers are considered better.
-        """
-
-    def addmany(sequence):
-        """Like "for item, score in sequence: self.add(item, score)".
-
-        This is simply faster than calling add() len(seq) times.
-        """
-
-    def getbest():
-        """Return the (at most) N best-scoring items as a sequence.
-
-        The return value is a sequence of 2-tuples, (item, score), with
-        the largest score first.  If .add() has been called fewer than
-        N times, this sequence will contain fewer than N pairs.
-        """
-
-    def pop_smallest():
-        """Return and remove the (item, score) pair with lowest score.
-
-        If len(self) is 0, raise IndexError.
-
-        To be cleaer, this is the lowest score among the N best-scoring
-        seen so far.  This is most useful if the capacity of the NBest
-        object is never exceeded, in which case  pop_smallest() allows
-        using the object as an ordinary smallest-in-first-out priority
-        queue.
-        """
-
-    def __len__():
-        """Return the number of (item, score) pairs currently known.
-
-        This is N (the value passed to the constructor), unless .add()
-        has been called fewer than N times.
-        """
-
-    def capacity():
-        """Return the maximum number of (item, score) pairs.
-
-        This is N (the value passed to the constructor).
-        """

Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/pipelineelement.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/pipelineelement.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/pipelineelement.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,32 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-"""Pipeline Element Interface
-
-$Id$
-"""
-from zope.interface import Interface
-
-class IPipelineElement(Interface):
-
-    def process(source):
-        """Provide a text processing step.
-
-        Process a source sequence of words into a result sequence.
-        """
-
-    def processGlob(source):
-        """Process, passing through globbing metacharaters.
-
-        This is an optional method; if it is not used, process() is used.
-        """

Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/pipelineelementfactory.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/pipelineelementfactory.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/pipelineelementfactory.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,42 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-"""Pipeline Element Factory interface
-
-$Id$
-"""
-from zope.interface import Interface
-
-class IPipelineElementFactory(Interface):
-    """Class for creating pipeline elements by name"""
-
-    def registerFactory(group, name, factory):
-        """Registers a pipeline factory by name and element group.
-
-        Each name can be registered only once for a given group. Duplicate
-        registrations will raise a ValueError
-        """
-
-    def getFactoryGroups():
-        """Returns a sorted list of element group names
-        """
-
-    def getFactoryNames(group):
-        """Returns a sorted list of registered pipeline factory names
-        in the specified element group
-        """
-
-    def instantiate(group, name):
-        """Instantiates a pipeline element by group and name. If name is not
-        registered raise a KeyError.
-        """

Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/queryparser.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/queryparser.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/queryparser.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,54 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""Query Parser Interface.
-
-$Id$
-"""
-from zope.interface import Interface
-
-class IQueryParser(Interface):
-    """Interface for Query Parsers."""
-
-    def parseQuery(query):
-        """Parse a query string.
-
-        Return a parse tree (which implements IQueryParseTree).
-
-        Some of the query terms may be ignored because they are
-        stopwords; use getIgnored() to find out which terms were
-        ignored.  But if the entire query consists only of stop words,
-        or of stopwords and one or more negated terms, an exception is
-        raised.
-
-        May raise ParseTree.ParseError.
-        """
-
-    def getIgnored():
-        """Return the list of ignored terms.
-
-        Return the list of terms that were ignored by the most recent
-        call to parseQuery() because they were stopwords.
-
-        If parseQuery() was never called this returns None.
-        """
-
-    def parseQueryEx(query):
-        """Parse a query string.
-
-        Return a tuple (tree, ignored) where 'tree' is the parse tree
-        as returned by parseQuery(), and 'ignored' is a list of
-        ignored terms as returned by getIgnored().
-
-        May raise ParseTree.ParseError.
-        """

Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/queryparsetree.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/queryparsetree.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/queryparsetree.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,53 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""Query Parser Tree Interface.
-
-$Id$
-"""
-from zope.interface import Interface
-
-class IQueryParseTree(Interface):
-    """Interface for parse trees returned by parseQuery()."""
-
-    def nodeType():
-        """Return the node type.
-
-        This is one of 'AND', 'OR', 'NOT', 'ATOM', 'PHRASE' or 'GLOB'.
-        """
-
-    def getValue():
-        """Return a node-type specific value.
-
-        For node type:    Return:
-        'AND'             a list of parse trees
-        'OR'              a list of parse trees
-        'NOT'             a parse tree
-        'ATOM'            a string (representing a single search term)
-        'PHRASE'          a string (representing a search phrase)
-        'GLOB'            a string (representing a pattern, e.g. "foo*")
-        """
-
-    def terms():
-        """Return a list of all terms in this node, excluding NOT subtrees."""
-
-    def executeQuery(index):
-        """Execute the query represented by this node against the index.
-
-        The index argument must implement the IIndex interface.
-
-        Return an IIBucket or IIBTree mapping document ids to scores
-        (higher scores mean better results).
-
-        May raise ParseTree.QueryError.
-        """

Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/searchabletext.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/searchabletext.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/searchabletext.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,31 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""Interfaces related to text indexing and searching.
-
-$Id: interfaces.py 25353 2004-06-11 15:22:11Z gintautasm $
-"""
-from zope.interface import Interface
-
-class ISearchableText(Interface):
-    """Interface that text-indexable objects should implement."""
-
-    def getSearchableText():
-        """Return a sequence of unicode strings to be indexed.
-
-        Each unicode string in the returned sequence will be run
-        through the splitter pipeline; the combined stream of words
-        coming out of the pipeline will be indexed.
-
-        returning None indicates the object should not be indexed
-        """

Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/splitter.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/splitter.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/splitter.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,24 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-"""Splitter interface
-
-$Id$
-"""
-from zope.interface import Interface
-
-class ISplitter(Interface):
-    """A splitter."""
-
-    def process(text):
-        """Run the splitter over the input text, returning a list of terms."""

Copied: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces.py (from rev 28576, Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py)
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -0,0 +1,255 @@
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""Basic interfaces shared between different types of index.
+
+$Id$
+"""
+from zope.interface import Interface
+
+
+class IInjection(Interface):
+    """Interface for injecting documents into an index."""
+
+    def index_doc(docid, value):
+        """Add a document to the index.
+
+        docid: int, identifying the document
+
+        value: the value to be indexed
+
+        return: None
+
+        This can also be used to reindex documents.
+        """
+
+    def unindex_doc(docid):
+        """Remove a document from the index.
+
+        docid: int, identifying the document
+
+        return: None
+
+        This call is a no-op if the docid isn't in the index, however,
+        after this call, the index should have no references to the docid.
+        """
+
+    def clear():
+        """Unindex all documents indexed by the index
+        """
+
+class IIndexSearch(Interface):
+
+    def apply(query):
+        """Apply an index to the given query
+
+        The type if the query is index specific.
+
+        TODO
+            This is somewhat problemetic. It means that application
+            code that calls apply has to be aware of the
+            expected query type. This isn't too much of a problem now,
+            as we have no more general query language nor do we have
+            any sort of automatic query-form generation.
+
+            It would be nice to have a system later for having
+            query-form generation or, perhaps, sme sort of query
+            language. At that point, we'll need some sort of way to
+            determine query types, presumably through introspection of
+            the index objects.
+
+        A result is returned that is:
+
+        - An IIBTree or an IIBucket mapping document ids to integer
+          scores for document ids of documents that match the query,
+
+        - An IISet or IITreeSet containing document ids of documents
+          that match the query, or
+
+        - None, indicating that the index could not use the query and
+          that the result should have no impact on determining a final
+          result.
+
+        """
+
+class IQuerying(Interface):
+    """An index that can be queried by some text and returns a result set."""
+
+    def query(querytext, start=0, count=None):
+        """Execute a query.
+
+        querytext: unicode, the query expression
+        start: the first result to return (0-based)
+        count: the maximum number of results to return (default: all)
+        return: ([(docid, rank), ...], total)
+
+        The return value is a tuple:
+            matches: list of (int, float) tuples, docid and rank
+            total: int, the total number of matches
+
+        The matches list represents the requested batch.  The ranks
+        are floats between 0 and 1 (inclusive).
+        """
+
+class IStatistics(Interface):
+    """An index that provides statistical information about itself."""
+
+    def documentCount():
+        """Return the number of documents currently indexed."""
+
+    def wordCount():
+        """Return the number of words currently indexed."""
+
+
+class IExtendedQuerying(Interface):
+    """An index that supports advanced search setups."""
+
+    def search(term):
+        """Execute a search on a single term given as a string.
+
+        Return an IIBTree mapping docid to score, or None if all docs
+        match due to the lexicon returning no wids for the term (e.g.,
+        if the term is entirely composed of stopwords).
+        """
+
+    def search_phrase(phrase):
+        """Execute a search on a phrase given as a string.
+
+        Return an IIBtree mapping docid to score.
+        """
+
+    def search_glob(pattern):
+        """Execute a pattern search.
+
+        The pattern represents a set of words by using * and ?.  For
+        example, "foo*" represents the set of all words in the lexicon
+        starting with "foo".
+
+        Return an IIBTree mapping docid to score.
+        """
+
+    def query_weight(terms):
+        """Return the weight for a set of query terms.
+
+        'terms' is a sequence of all terms included in the query,
+        although not terms with a not.  If a term appears more than
+        once in a query, it should appear more than once in terms.
+
+        Nothing is defined about what "weight" means, beyond that the
+        result is an upper bound on document scores returned for the
+        query.
+        """
+
+class IKeywordQuerying(Interface):
+    """Query over a set of keywords, seperated by white space."""
+
+    def search(query, operator='and'):
+        """Execute a search given by 'query' as a list/tuple of
+           (unicode) strings against the index. 'operator' can be either
+           'and' or 'or' to search for all keywords or any keyword. 
+
+           Return an IISet of docids
+        """
+
+class ITopicQuerying(Interface):
+    """Query over topics, seperated by white space."""
+
+    def search(query, operator='and'):
+        """Execute a search given by 'query' as a list/tuple of filter ids.
+          'operator' can be 'and' or 'or' to search for matches in all
+           or any filter.
+
+           Return an IISet of docids
+        """
+
+class ISimpleQuery(Interface):
+    """A simple query interface."""
+
+    def query(term, start=0, count=None):
+        """Search for the given term, return a sequence of docids"""
+
+
+class ITopicFilteredSet(Interface):
+    """Interface for filtered sets used by topic indexes."""
+    
+    def clear():
+        """Remove all entries from the index."""
+
+    def index_doc(docid, context):
+        """Add an object's info to the index."""
+
+    def unindex_doc(docid):
+        """Remove an object with id 'docid' from the index."""
+
+    def getId():
+        """Return the id of the filter itself."""
+
+    def setExpression(expr):
+        """Set the filter expression, e.g. 'context.meta_type=='...'"""
+        
+    def getExpression():
+        """Return the filter expression."""
+
+    def getIds():
+        """Return an IISet of docids."""
+
+
+class INBest(Interface):
+    """Interface for an N-Best chooser."""
+
+    def add(item, score):
+        """Record that item 'item' has score 'score'.  No return value.
+
+        The N best-scoring items are remembered, where N was passed to
+        the constructor.  'item' can by anything.  'score' should be
+        a number, and larger numbers are considered better.
+        """
+
+    def addmany(sequence):
+        """Like "for item, score in sequence: self.add(item, score)".
+
+        This is simply faster than calling add() len(seq) times.
+        """
+
+    def getbest():
+        """Return the (at most) N best-scoring items as a sequence.
+
+        The return value is a sequence of 2-tuples, (item, score), with
+        the largest score first.  If .add() has been called fewer than
+        N times, this sequence will contain fewer than N pairs.
+        """
+
+    def pop_smallest():
+        """Return and remove the (item, score) pair with lowest score.
+
+        If len(self) is 0, raise IndexError.
+
+        To be cleaer, this is the lowest score among the N best-scoring
+        seen so far.  This is most useful if the capacity of the NBest
+        object is never exceeded, in which case  pop_smallest() allows
+        using the object as an ordinary smallest-in-first-out priority
+        queue.
+        """
+
+    def __len__():
+        """Return the number of (item, score) pairs currently known.
+
+        This is N (the value passed to the constructor), unless .add()
+        has been called fewer than N times.
+        """
+
+    def capacity():
+        """Return the maximum number of (item, score) pairs.
+
+        This is N (the value passed to the constructor).
+        """

Copied: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/nbest.py (from rev 28575, Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/nbest.py)
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/nbest.py	2004-12-06 19:11:35 UTC (rev 28575)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/nbest.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -0,0 +1,79 @@
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+"""NBest
+
+An NBest object remembers the N best-scoring items ever passed to its
+.add(item, score) method.  If .add() is called M times, the worst-case
+number of comparisons performed overall is M * log2(N).
+
+$Id$
+"""
+
+from bisect import bisect_left as bisect
+
+from zope.index.interfaces import INBest
+from zope.interface import implements
+
+class NBest(object):
+    implements(INBest)
+
+    def __init__(self, N):
+        "Build an NBest object to remember the N best-scoring objects."
+
+        if N < 1:
+            raise ValueError("NBest() argument must be at least 1")
+        self._capacity = N
+
+        # This does a very simple thing with sorted lists.  For large
+        # N, a min-heap can be unboundedly better in terms of data
+        # movement time.
+        self._scores = []
+        self._items = []
+
+    def __len__(self):
+        return len(self._scores)
+
+    def capacity(self):
+        return self._capacity
+
+    def add(self, item, score):
+        self.addmany([(item, score)])
+
+    def addmany(self, sequence):
+        scores, items, capacity = self._scores, self._items, self._capacity
+        n = len(scores)
+        for item, score in sequence:
+            # When we're in steady-state, the usual case is that we're filled
+            # to capacity, and that an incoming item is worse than any of
+            # the best-seen so far.
+            if n >= capacity and score <= scores[0]:
+                continue
+            i = bisect(scores, score)
+            scores.insert(i, score)
+            items.insert(i, item)
+            if n == capacity:
+                del items[0], scores[0]
+            else:
+                n += 1
+        assert n == len(scores)
+
+    def getbest(self):
+        result = zip(self._items, self._scores)
+        result.reverse()
+        return result
+
+    def pop_smallest(self):
+        if self._scores:
+            return self._items.pop(0), self._scores.pop(0)
+        raise IndexError("pop_smallest() called on empty NBest object")

Copied: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/tests.py (from rev 28575, Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_nbest.py)
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_nbest.py	2004-12-06 19:11:35 UTC (rev 28575)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/tests.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -0,0 +1,100 @@
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""N-Best index tests
+
+$Id$
+"""
+from unittest import TestCase, main, makeSuite
+
+from zope.index.nbest import NBest
+
+class NBestTest(TestCase):
+
+    def testConstructor(self):
+        self.assertRaises(ValueError, NBest, 0)
+        self.assertRaises(ValueError, NBest, -1)
+
+        for n in range(1, 11):
+            nb = NBest(n)
+            self.assertEqual(len(nb), 0)
+            self.assertEqual(nb.capacity(), n)
+
+    def testOne(self):
+        nb = NBest(1)
+        nb.add('a', 0)
+        self.assertEqual(nb.getbest(), [('a', 0)])
+
+        nb.add('b', 1)
+        self.assertEqual(len(nb), 1)
+        self.assertEqual(nb.capacity(), 1)
+        self.assertEqual(nb.getbest(), [('b', 1)])
+
+        nb.add('c', -1)
+        self.assertEqual(len(nb), 1)
+        self.assertEqual(nb.capacity(), 1)
+        self.assertEqual(nb.getbest(), [('b', 1)])
+
+        nb.addmany([('d', 3), ('e', -6), ('f', 5), ('g', 4)])
+        self.assertEqual(len(nb), 1)
+        self.assertEqual(nb.capacity(), 1)
+        self.assertEqual(nb.getbest(), [('f', 5)])
+
+    def testMany(self):
+        import random
+        inputs = [(-i, i) for i in range(50)]
+
+        reversed_inputs = inputs[:]
+        reversed_inputs.reverse()
+
+        # Test the N-best for a variety of n (1, 6, 11, ... 50).
+        for n in range(1, len(inputs)+1, 5):
+            expected = inputs[-n:]
+            expected.reverse()
+
+            random_inputs = inputs[:]
+            random.shuffle(random_inputs)
+
+            for source in inputs, reversed_inputs, random_inputs:
+                # Try feeding them one at a time.
+                nb = NBest(n)
+                for item, score in source:
+                    nb.add(item, score)
+                self.assertEqual(len(nb), n)
+                self.assertEqual(nb.capacity(), n)
+                self.assertEqual(nb.getbest(), expected)
+
+                # And again in one gulp.
+                nb = NBest(n)
+                nb.addmany(source)
+                self.assertEqual(len(nb), n)
+                self.assertEqual(nb.capacity(), n)
+                self.assertEqual(nb.getbest(), expected)
+
+                for i in range(1, n+1):
+                    self.assertEqual(nb.pop_smallest(), expected[-i])
+                self.assertRaises(IndexError, nb.pop_smallest)
+
+    def testAllSameScore(self):
+        inputs = [(i, 0) for i in range(10)]
+        for n in range(1, 12):
+            nb = NBest(n)
+            nb.addmany(inputs)
+            outputs = nb.getbest()
+            self.assertEqual(outputs, inputs[:len(outputs)])
+
+def test_suite():
+    return makeSuite(NBestTest)
+
+if __name__=='__main__':
+    main(defaultTest='test_suite')

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/htmlsplitter.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/htmlsplitter.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/htmlsplitter.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -19,10 +19,8 @@
 
 from zope.interface import implements
 
-from zope.index.interfaces.splitter import ISplitter
-from zope.index.text.pipelinefactory import element_factory
+from zope.index.text.interfaces import ISplitter
 
-
 class HTMLWordSplitter(object):
 
     implements(ISplitter)
@@ -45,10 +43,6 @@
             text = re.sub(pat, " ", text)
         return re.findall(wordpat, text)
 
-element_factory.registerFactory('Word Splitter',
-                                'HTML aware splitter',
-                                HTMLWordSplitter)
-
 if __name__ == "__main__":
     import sys
     splitter = HTMLWordSplitter()

Added: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/interfaces.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/interfaces.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/interfaces.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -0,0 +1,168 @@
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+"""Text-indexing interfaces
+
+$Id$
+"""
+from zope.interface import Interface
+
+class ILexicon(Interface):
+    """Object responsible for converting text to word identifiers."""
+
+    def termToWordIds(text):
+        """Return a sequence of ids of the words parsed from the text.
+
+        The input text may be either a string or a list of strings.
+
+        Parse the text as if they are search terms, and skips words
+        that aren't in the lexicon.
+        """
+
+    def sourceToWordIds(text):
+        """Return a sequence of ids of the words parsed from the text.
+
+        The input text may be either a string or a list of strings.
+
+        Parse the text as if they come from a source document, and
+        creates new word ids for words that aren't (yet) in the
+        lexicon.
+        """
+
+    def globToWordIds(pattern):
+        """Return a sequence of ids of words matching the pattern.
+
+        The argument should be a single word using globbing syntax,
+        e.g. 'foo*' meaning anything starting with 'foo'.
+
+        Return the wids for all words in the lexicon that match the
+        pattern.
+        """
+
+    def wordCount():
+        """Return the number of unique terms in the lexicon."""
+
+    def get_word(wid):
+        """Return the word for the given word id.
+
+        Raise KeyError if the word id is not in the lexicon.
+        """
+
+    def get_wid(word):
+        """Return the wird id for the given word.
+
+        Return 0 of the word is not in the lexicon.
+        """
+
+    def parseTerms(text):
+        """Pass the text through the pipeline.
+
+        Return a list of words, normalized by the pipeline
+        (e.g. stopwords removed, case normalized etc.).
+        """
+
+    def isGlob(word):
+        """Return true if the word is a globbing pattern.
+
+        The word should be one of the words returned by parseTerm().
+        """
+
+class IQueryParser(Interface):
+    """Interface for Query Parsers."""
+
+    def parseQuery(query):
+        """Parse a query string.
+
+        Return a parse tree (which implements IQueryParseTree).
+
+        Some of the query terms may be ignored because they are
+        stopwords; use getIgnored() to find out which terms were
+        ignored.  But if the entire query consists only of stop words,
+        or of stopwords and one or more negated terms, an exception is
+        raised.
+
+        May raise ParseTree.ParseError.
+        """
+
+    def getIgnored():
+        """Return the list of ignored terms.
+
+        Return the list of terms that were ignored by the most recent
+        call to parseQuery() because they were stopwords.
+
+        If parseQuery() was never called this returns None.
+        """
+
+    def parseQueryEx(query):
+        """Parse a query string.
+
+        Return a tuple (tree, ignored) where 'tree' is the parse tree
+        as returned by parseQuery(), and 'ignored' is a list of
+        ignored terms as returned by getIgnored().
+
+        May raise ParseTree.ParseError.
+        """
+
+class IQueryParseTree(Interface):
+    """Interface for parse trees returned by parseQuery()."""
+
+    def nodeType():
+        """Return the node type.
+
+        This is one of 'AND', 'OR', 'NOT', 'ATOM', 'PHRASE' or 'GLOB'.
+        """
+
+    def getValue():
+        """Return a node-type specific value.
+
+        For node type:    Return:
+        'AND'             a list of parse trees
+        'OR'              a list of parse trees
+        'NOT'             a parse tree
+        'ATOM'            a string (representing a single search term)
+        'PHRASE'          a string (representing a search phrase)
+        'GLOB'            a string (representing a pattern, e.g. "foo*")
+        """
+
+    def terms():
+        """Return a list of all terms in this node, excluding NOT subtrees."""
+
+    def executeQuery(index):
+        """Execute the query represented by this node against the index.
+
+        The index argument must implement the IIndex interface.
+
+        Return an IIBucket or IIBTree mapping document ids to scores
+        (higher scores mean better results).
+
+        May raise ParseTree.QueryError.
+        """
+
+class ISearchableText(Interface):
+    """Interface that text-indexable objects should implement."""
+
+    def getSearchableText():
+        """Return a sequence of unicode strings to be indexed.
+
+        Each unicode string in the returned sequence will be run
+        through the splitter pipeline; the combined stream of words
+        coming out of the pipeline will be indexed.
+
+        returning None indicates the object should not be indexed
+        """
+
+class ISplitter(Interface):
+    """A splitter."""
+
+    def process(text):
+        """Run the splitter over the input text, returning a list of terms."""


Property changes on: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/interfaces.py
___________________________________________________________________
Name: svn:keywords
   + Id
Name: svn:eol-style
   + native

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/lexicon.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/lexicon.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/lexicon.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -24,10 +24,9 @@
 
 from persistent import Persistent
 
-from zope.index.interfaces.lexicon import ILexicon
+from zope.index.text.interfaces import ILexicon
 from zope.index.text.stopdict import get_stopdict
 from zope.index.text.parsetree import QueryError
-from zope.index.text.pipelinefactory import element_factory
 
 
 class Lexicon(Persistent):
@@ -175,23 +174,11 @@
             result += self.rxGlob.findall(s)
         return result
 
-element_factory.registerFactory('Word Splitter',
-                                 'Whitespace splitter',
-                                 Splitter)
-
 class CaseNormalizer(object):
 
     def process(self, lst):
         return [w.lower() for w in lst]
 
-element_factory.registerFactory('Case Normalizer',
-                                'Case Normalizer',
-                                CaseNormalizer)
-
-element_factory.registerFactory('Stop Words',
-                                ' Don\'t remove stop words',
-                                None)
-
 class StopWordRemover(object):
 
     dict = get_stopdict().copy()
@@ -206,16 +193,8 @@
         def process(self, lst):
             return self._process(self.dict, lst)
 
-element_factory.registerFactory('Stop Words',
-                                'Remove listed stop words only',
-                                StopWordRemover)
-
 class StopWordAndSingleCharRemover(StopWordRemover):
 
     dict = get_stopdict().copy()
     for c in range(255):
         dict[chr(c)] = None
-
-element_factory.registerFactory('Stop Words',
-                                'Remove listed and single char words',
-                                StopWordAndSingleCharRemover)

Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/nbest.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/nbest.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/nbest.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,79 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-"""NBest
-
-An NBest object remembers the N best-scoring items ever passed to its
-.add(item, score) method.  If .add() is called M times, the worst-case
-number of comparisons performed overall is M * log2(N).
-
-$Id$
-"""
-
-from bisect import bisect_left as bisect
-
-from zope.index.interfaces.nbest import INBest
-from zope.interface import implements
-
-class NBest(object):
-    implements(INBest)
-
-    def __init__(self, N):
-        "Build an NBest object to remember the N best-scoring objects."
-
-        if N < 1:
-            raise ValueError("NBest() argument must be at least 1")
-        self._capacity = N
-
-        # This does a very simple thing with sorted lists.  For large
-        # N, a min-heap can be unboundedly better in terms of data
-        # movement time.
-        self._scores = []
-        self._items = []
-
-    def __len__(self):
-        return len(self._scores)
-
-    def capacity(self):
-        return self._capacity
-
-    def add(self, item, score):
-        self.addmany([(item, score)])
-
-    def addmany(self, sequence):
-        scores, items, capacity = self._scores, self._items, self._capacity
-        n = len(scores)
-        for item, score in sequence:
-            # When we're in steady-state, the usual case is that we're filled
-            # to capacity, and that an incoming item is worse than any of
-            # the best-seen so far.
-            if n >= capacity and score <= scores[0]:
-                continue
-            i = bisect(scores, score)
-            scores.insert(i, score)
-            items.insert(i, item)
-            if n == capacity:
-                del items[0], scores[0]
-            else:
-                n += 1
-        assert n == len(scores)
-
-    def getbest(self):
-        result = zip(self._items, self._scores)
-        result.reverse()
-        return result
-
-    def pop_smallest(self):
-        if self._scores:
-            return self._items.pop(0), self._scores.pop(0)
-        raise IndexError("pop_smallest() called on empty NBest object")

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/parsetree.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/parsetree.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/parsetree.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -17,8 +17,9 @@
 """
 from BTrees.IIBTree import difference
 
-from zope.index.interfaces.queryparsetree import IQueryParseTree
-from zope.index.text.setops import mass_weightedIntersection, mass_weightedUnion
+from zope.index.text.interfaces import IQueryParseTree
+from zope.index.text.setops import mass_weightedIntersection
+from zope.index.text.setops import mass_weightedUnion
 
 from zope.interface import implements
 

Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/pipelinefactory.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/pipelinefactory.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/pipelinefactory.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,55 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-"""Pipeline Element Factory
-
-$Id$
-"""
-from zope.index.interfaces.pipelineelementfactory import IPipelineElementFactory
-from zope.interface import implements
-
-class PipelineElementFactory(object):
-
-    implements(IPipelineElementFactory)
-
-    def __init__(self):
-        self._groups = {}
-
-    def registerFactory(self, group, name, factory):
-        if self._groups.has_key(group) and \
-           self._groups[group].has_key(name):
-            raise ValueError('ZCTextIndex lexicon element "%s" '
-                             'already registered in group "%s"'
-                             % (name, group))
-
-        elements = self._groups.get(group)
-        if elements is None:
-            elements = self._groups[group] = {}
-        elements[name] = factory
-
-    def getFactoryGroups(self):
-        groups = self._groups.keys()
-        groups.sort()
-        return groups
-
-    def getFactoryNames(self, group):
-        names = self._groups[group].keys()
-        names.sort()
-        return names
-
-    def instantiate(self, group, name):
-        factory = self._groups[group][name]
-        if factory is not None:
-            return factory()
-
-element_factory = PipelineElementFactory()

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/queryparser.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/queryparser.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/queryparser.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -59,7 +59,7 @@
 import re
 from zope.interface import implements
 
-from zope.index.interfaces.queryparser import IQueryParser
+from zope.index.text.interfaces import IQueryParser
 from zope.index.text import parsetree
 
 # Create unique symbols for token types.

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/setops.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/setops.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/setops.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -15,10 +15,9 @@
 
 $Id$
 """
-from BTrees.IIBTree import \
-     IIBucket, weightedIntersection, weightedUnion
+from BTrees.IIBTree import IIBucket, weightedIntersection, weightedUnion
 
-from zope.index.text.nbest import NBest
+from zope.index.nbest import NBest
 
 def mass_weightedIntersection(L):
     "A list of (mapping, weight) pairs -> their weightedIntersection IIBucket."

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/queryhtml.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/queryhtml.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/queryhtml.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -43,7 +43,7 @@
     return "http://www.python.org" + p[i:]
 
 from Products.PluginIndexes.TextIndex.TextIndex import And, Or
-from zope.index.text.nbest import NBest
+from zope.index.nbest import NBest
 
 def main(rt):
     index = rt["index"]

Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_nbest.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_nbest.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_nbest.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,100 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""N-Best index tests
-
-$Id$
-"""
-from unittest import TestCase, main, makeSuite
-
-from zope.index.text.nbest import NBest
-
-class NBestTest(TestCase):
-
-    def testConstructor(self):
-        self.assertRaises(ValueError, NBest, 0)
-        self.assertRaises(ValueError, NBest, -1)
-
-        for n in range(1, 11):
-            nb = NBest(n)
-            self.assertEqual(len(nb), 0)
-            self.assertEqual(nb.capacity(), n)
-
-    def testOne(self):
-        nb = NBest(1)
-        nb.add('a', 0)
-        self.assertEqual(nb.getbest(), [('a', 0)])
-
-        nb.add('b', 1)
-        self.assertEqual(len(nb), 1)
-        self.assertEqual(nb.capacity(), 1)
-        self.assertEqual(nb.getbest(), [('b', 1)])
-
-        nb.add('c', -1)
-        self.assertEqual(len(nb), 1)
-        self.assertEqual(nb.capacity(), 1)
-        self.assertEqual(nb.getbest(), [('b', 1)])
-
-        nb.addmany([('d', 3), ('e', -6), ('f', 5), ('g', 4)])
-        self.assertEqual(len(nb), 1)
-        self.assertEqual(nb.capacity(), 1)
-        self.assertEqual(nb.getbest(), [('f', 5)])
-
-    def testMany(self):
-        import random
-        inputs = [(-i, i) for i in range(50)]
-
-        reversed_inputs = inputs[:]
-        reversed_inputs.reverse()
-
-        # Test the N-best for a variety of n (1, 6, 11, ... 50).
-        for n in range(1, len(inputs)+1, 5):
-            expected = inputs[-n:]
-            expected.reverse()
-
-            random_inputs = inputs[:]
-            random.shuffle(random_inputs)
-
-            for source in inputs, reversed_inputs, random_inputs:
-                # Try feeding them one at a time.
-                nb = NBest(n)
-                for item, score in source:
-                    nb.add(item, score)
-                self.assertEqual(len(nb), n)
-                self.assertEqual(nb.capacity(), n)
-                self.assertEqual(nb.getbest(), expected)
-
-                # And again in one gulp.
-                nb = NBest(n)
-                nb.addmany(source)
-                self.assertEqual(len(nb), n)
-                self.assertEqual(nb.capacity(), n)
-                self.assertEqual(nb.getbest(), expected)
-
-                for i in range(1, n+1):
-                    self.assertEqual(nb.pop_smallest(), expected[-i])
-                self.assertRaises(IndexError, nb.pop_smallest)
-
-    def testAllSameScore(self):
-        inputs = [(i, 0) for i in range(10)]
-        for n in range(1, 12):
-            nb = NBest(n)
-            nb.addmany(inputs)
-            outputs = nb.getbest()
-            self.assertEqual(outputs, inputs[:len(outputs)])
-
-def test_suite():
-    return makeSuite(NBestTest)
-
-if __name__=='__main__':
-    main(defaultTest='test_suite')

Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_pipelinefactory.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_pipelinefactory.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_pipelinefactory.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,53 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-"""Pipeline Factory tests
-
-$Id$
-"""
-from unittest import TestCase, main, makeSuite
-from zope.index.interfaces.pipelineelement import IPipelineElement
-from zope.index.text.pipelinefactory import PipelineElementFactory
-from zope.interface import implements
-
-class NullPipelineElement(object):
-    implements(IPipelineElement)
-
-    def process(source):
-        pass
-
-class PipelineFactoryTest(TestCase):
-
-    def setUp(self):
-        self.huey = NullPipelineElement()
-        self.dooey = NullPipelineElement()
-        self.louie = NullPipelineElement()
-        self.daffy = NullPipelineElement()
-
-    def testPipeline(self):
-        pf = PipelineElementFactory()
-        pf.registerFactory('donald', 'huey', self.huey)
-        pf.registerFactory('donald', 'dooey',  self.dooey)
-        pf.registerFactory('donald', 'louie', self.louie)
-        pf.registerFactory('looney', 'daffy', self.daffy)
-        self.assertRaises(ValueError, pf.registerFactory,'donald',  'huey',
-                          self.huey)
-        self.assertEqual(pf.getFactoryGroups(), ['donald', 'looney'])
-        self.assertEqual(pf.getFactoryNames('donald'),
-                         ['dooey', 'huey', 'louie'])
-
-def test_suite():
-    return makeSuite(PipelineFactoryTest)
-
-if __name__=='__main__':
-    main(defaultTest='test_suite')

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_queryparser.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_queryparser.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_queryparser.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -19,8 +19,8 @@
 
 from zope.interface.verify import verifyClass
 
-from zope.index.interfaces.queryparser import IQueryParser
-from zope.index.interfaces.queryparsetree import IQueryParseTree
+from zope.index.text.interfaces import IQueryParser
+from zope.index.text.interfaces import IQueryParseTree
 
 from zope.index.text.queryparser import QueryParser
 from zope.index.text.parsetree import ParseError, ParseTreeNode

Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/textindexwrapper.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/textindexwrapper.py	2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/textindexwrapper.py	2004-12-07 18:15:57 UTC (rev 28577)
@@ -25,10 +25,9 @@
 from zope.index.text.lexicon import Lexicon
 from zope.index.text.lexicon import Splitter, CaseNormalizer, StopWordRemover
 from zope.index.text.queryparser import QueryParser
-from zope.index.text.nbest import NBest
+from zope.index.nbest import NBest
 
-from zope.index.interfaces import \
-     IInjection, IQuerying, IStatistics
+from zope.index.interfaces import IInjection, IQuerying, IStatistics
 
 class TextIndexWrapper(Persistent):
 



More information about the Zope3-Checkins mailing list