[Zope3-checkins]
SVN: Zope3/branches/jim-index-restructure-2004-12/src/zope/
- Removed the unused pipeline-element framework. WHUI
Jim Fulton
jim at zope.com
Tue Dec 7 13:15:58 EST 2004
Log message for revision 28577:
- Removed the unused pipeline-element framework. WHUI
- Moved the nbest code out of text, as it should generally be
used by applications that call indexes, not by the indexes
themselves.
- Moved the text-indexing interfaces into text/interfaces.py.
- Converted the interfaces package into a module
Changed:
U Zope3/branches/jim-index-restructure-2004-12/src/zope/app/catalog/README.txt
U Zope3/branches/jim-index-restructure-2004-12/src/zope/app/zptpage/textindex/tests.py
U Zope3/branches/jim-index-restructure-2004-12/src/zope/app/zptpage/textindex/zptpage.py
D Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py
D Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/lexicon.py
D Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/nbest.py
D Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/pipelineelement.py
D Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/pipelineelementfactory.py
D Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/queryparser.py
D Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/queryparsetree.py
D Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/searchabletext.py
D Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/splitter.py
A Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces.py
A Zope3/branches/jim-index-restructure-2004-12/src/zope/index/nbest.py
A Zope3/branches/jim-index-restructure-2004-12/src/zope/index/tests.py
U Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/htmlsplitter.py
A Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/interfaces.py
U Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/lexicon.py
D Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/nbest.py
U Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/parsetree.py
D Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/pipelinefactory.py
U Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/queryparser.py
U Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/setops.py
U Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/queryhtml.py
D Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_nbest.py
D Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_pipelinefactory.py
U Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_queryparser.py
U Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/textindexwrapper.py
-=-
Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/app/catalog/README.txt
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/app/catalog/README.txt 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/app/catalog/README.txt 2004-12-07 18:15:57 UTC (rev 28577)
@@ -189,20 +189,20 @@
searchableText.
For text indexes, one generally uses
-`zope.index.interfaces.searchabletext.ISearchableText`,
+`zope.index.text.interfaces.ISearchableText`,
`getSearchableText` and True.
>>> print http(r"""
... POST /++etc++site/default/Catalog/+/AddTextIndex%3D HTTP/1.1
... Authorization: Basic bWdyOm1ncnB3
- ... Content-Length: 1003
+ ... Content-Length: 1008
... Content-Type: multipart/form-data; boundary=---------------------------12609588153518590761493918424
... Referer: http://localhost:8081/++etc++site/default/Catalog/+/AddTextIndex=
...
... -----------------------------12609588153518590761493918424
... Content-Disposition: form-data; name="field.interface"
...
- ... zope.index.interfaces.searchabletext.ISearchableText
+ ... zope.index.text.interfaces.ISearchableText
... -----------------------------12609588153518590761493918424
... Content-Disposition: form-data; name="field.interface-empty-marker"
...
Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/app/zptpage/textindex/tests.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/app/zptpage/textindex/tests.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/app/zptpage/textindex/tests.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -16,7 +16,7 @@
$Id$
"""
-from zope.index.interfaces.searchabletext import ISearchableText
+from zope.index.text.interfaces import ISearchableText
from zope.app.tests import ztapi
from zope.app.tests.placelesssetup import PlacelessSetup
from zope.app.zptpage.interfaces import IZPTPage
Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/app/zptpage/textindex/zptpage.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/app/zptpage/textindex/zptpage.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/app/zptpage/textindex/zptpage.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -17,7 +17,7 @@
from zope.interface import implements
from zope.app.zptpage.interfaces import IZPTPage
-from zope.index.interfaces.searchabletext import ISearchableText
+from zope.index.text.interfaces import ISearchableText
import re
tag = re.compile(r"<[^>]+>")
Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,204 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""Basic interfaces shared between different types of index.
-
-$Id$
-"""
-from zope.interface import Interface
-
-
-class IInjection(Interface):
- """Interface for injecting documents into an index."""
-
- def index_doc(docid, value):
- """Add a document to the index.
-
- docid: int, identifying the document
-
- value: the value to be indexed
-
- return: None
-
- This can also be used to reindex documents.
- """
-
- def unindex_doc(docid):
- """Remove a document from the index.
-
- docid: int, identifying the document
-
- return: None
-
- This call is a no-op if the docid isn't in the index, however,
- after this call, the index should have no references to the docid.
- """
-
- def clear():
- """Unindex all documents indexed by the index
- """
-
-class IIndexSearch(Interface):
-
- def apply(query):
- """Apply an index to the given query
-
- The type if the query is index specific.
-
- TODO
- This is somewhat problemetic. It means that application
- code that calls apply has to be aware of the
- expected query type. This isn't too much of a problem now,
- as we have no more general query language nor do we have
- any sort of automatic query-form generation.
-
- It would be nice to have a system later for having
- query-form generation or, perhaps, sme sort of query
- language. At that point, we'll need some sort of way to
- determine query types, presumably through introspection of
- the index objects.
-
- A result is returned that is:
-
- - An IIBTree or an IIBucket mapping document ids to integer
- scores for document ids of documents that match the query,
-
- - An IISet or IITreeSet containing document ids of documents
- that match the query, or
-
- - None, indicating that the index could not use the query and
- that the result should have no impact on determining a final
- result.
-
- """
-
-class IQuerying(Interface):
- """An index that can be queried by some text and returns a result set."""
-
- def query(querytext, start=0, count=None):
- """Execute a query.
-
- querytext: unicode, the query expression
- start: the first result to return (0-based)
- count: the maximum number of results to return (default: all)
- return: ([(docid, rank), ...], total)
-
- The return value is a tuple:
- matches: list of (int, float) tuples, docid and rank
- total: int, the total number of matches
-
- The matches list represents the requested batch. The ranks
- are floats between 0 and 1 (inclusive).
- """
-
-class IStatistics(Interface):
- """An index that provides statistical information about itself."""
-
- def documentCount():
- """Return the number of documents currently indexed."""
-
- def wordCount():
- """Return the number of words currently indexed."""
-
-
-class IExtendedQuerying(Interface):
- """An index that supports advanced search setups."""
-
- def search(term):
- """Execute a search on a single term given as a string.
-
- Return an IIBTree mapping docid to score, or None if all docs
- match due to the lexicon returning no wids for the term (e.g.,
- if the term is entirely composed of stopwords).
- """
-
- def search_phrase(phrase):
- """Execute a search on a phrase given as a string.
-
- Return an IIBtree mapping docid to score.
- """
-
- def search_glob(pattern):
- """Execute a pattern search.
-
- The pattern represents a set of words by using * and ?. For
- example, "foo*" represents the set of all words in the lexicon
- starting with "foo".
-
- Return an IIBTree mapping docid to score.
- """
-
- def query_weight(terms):
- """Return the weight for a set of query terms.
-
- 'terms' is a sequence of all terms included in the query,
- although not terms with a not. If a term appears more than
- once in a query, it should appear more than once in terms.
-
- Nothing is defined about what "weight" means, beyond that the
- result is an upper bound on document scores returned for the
- query.
- """
-
-class IKeywordQuerying(Interface):
- """Query over a set of keywords, seperated by white space."""
-
- def search(query, operator='and'):
- """Execute a search given by 'query' as a list/tuple of
- (unicode) strings against the index. 'operator' can be either
- 'and' or 'or' to search for all keywords or any keyword.
-
- Return an IISet of docids
- """
-
-class ITopicQuerying(Interface):
- """Query over topics, seperated by white space."""
-
- def search(query, operator='and'):
- """Execute a search given by 'query' as a list/tuple of filter ids.
- 'operator' can be 'and' or 'or' to search for matches in all
- or any filter.
-
- Return an IISet of docids
- """
-
-class ISimpleQuery(Interface):
- """A simple query interface."""
-
- def query(term, start=0, count=None):
- """Search for the given term, return a sequence of docids"""
-
-
-class ITopicFilteredSet(Interface):
- """Interface for filtered sets used by topic indexes."""
-
- def clear():
- """Remove all entries from the index."""
-
- def index_doc(docid, context):
- """Add an object's info to the index."""
-
- def unindex_doc(docid):
- """Remove an object with id 'docid' from the index."""
-
- def getId():
- """Return the id of the filter itself."""
-
- def setExpression(expr):
- """Set the filter expression, e.g. 'context.meta_type=='...'"""
-
- def getExpression():
- """Return the filter expression."""
-
- def getIds():
- """Return an IISet of docids."""
Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/lexicon.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/lexicon.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/lexicon.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,78 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-"""Lexicon interface
-
-$Id$
-"""
-from zope.interface import Interface
-
-class ILexicon(Interface):
- """Object responsible for converting text to word identifiers."""
-
- def termToWordIds(text):
- """Return a sequence of ids of the words parsed from the text.
-
- The input text may be either a string or a list of strings.
-
- Parse the text as if they are search terms, and skips words
- that aren't in the lexicon.
- """
-
- def sourceToWordIds(text):
- """Return a sequence of ids of the words parsed from the text.
-
- The input text may be either a string or a list of strings.
-
- Parse the text as if they come from a source document, and
- creates new word ids for words that aren't (yet) in the
- lexicon.
- """
-
- def globToWordIds(pattern):
- """Return a sequence of ids of words matching the pattern.
-
- The argument should be a single word using globbing syntax,
- e.g. 'foo*' meaning anything starting with 'foo'.
-
- Return the wids for all words in the lexicon that match the
- pattern.
- """
-
- def wordCount():
- """Return the number of unique terms in the lexicon."""
-
- def get_word(wid):
- """Return the word for the given word id.
-
- Raise KeyError if the word id is not in the lexicon.
- """
-
- def get_wid(word):
- """Return the wird id for the given word.
-
- Return 0 of the word is not in the lexicon.
- """
-
- def parseTerms(text):
- """Pass the text through the pipeline.
-
- Return a list of words, normalized by the pipeline
- (e.g. stopwords removed, case normalized etc.).
- """
-
- def isGlob(word):
- """Return true if the word is a globbing pattern.
-
- The word should be one of the words returned by parseTerm().
- """
Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/nbest.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/nbest.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/nbest.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,74 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""NBest Interface.
-
-An NBest object remembers the N best-scoring items ever passed to its
-.add(item, score) method. If .add() is called M times, the worst-case
-number of comparisons performed overall is M * log2(N).
-
-$Id$
-"""
-
-
-from zope.interface import Interface
-
-class INBest(Interface):
- """Interface for an N-Best chooser."""
-
- def add(item, score):
- """Record that item 'item' has score 'score'. No return value.
-
- The N best-scoring items are remembered, where N was passed to
- the constructor. 'item' can by anything. 'score' should be
- a number, and larger numbers are considered better.
- """
-
- def addmany(sequence):
- """Like "for item, score in sequence: self.add(item, score)".
-
- This is simply faster than calling add() len(seq) times.
- """
-
- def getbest():
- """Return the (at most) N best-scoring items as a sequence.
-
- The return value is a sequence of 2-tuples, (item, score), with
- the largest score first. If .add() has been called fewer than
- N times, this sequence will contain fewer than N pairs.
- """
-
- def pop_smallest():
- """Return and remove the (item, score) pair with lowest score.
-
- If len(self) is 0, raise IndexError.
-
- To be cleaer, this is the lowest score among the N best-scoring
- seen so far. This is most useful if the capacity of the NBest
- object is never exceeded, in which case pop_smallest() allows
- using the object as an ordinary smallest-in-first-out priority
- queue.
- """
-
- def __len__():
- """Return the number of (item, score) pairs currently known.
-
- This is N (the value passed to the constructor), unless .add()
- has been called fewer than N times.
- """
-
- def capacity():
- """Return the maximum number of (item, score) pairs.
-
- This is N (the value passed to the constructor).
- """
Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/pipelineelement.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/pipelineelement.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/pipelineelement.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,32 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-"""Pipeline Element Interface
-
-$Id$
-"""
-from zope.interface import Interface
-
-class IPipelineElement(Interface):
-
- def process(source):
- """Provide a text processing step.
-
- Process a source sequence of words into a result sequence.
- """
-
- def processGlob(source):
- """Process, passing through globbing metacharaters.
-
- This is an optional method; if it is not used, process() is used.
- """
Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/pipelineelementfactory.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/pipelineelementfactory.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/pipelineelementfactory.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,42 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-"""Pipeline Element Factory interface
-
-$Id$
-"""
-from zope.interface import Interface
-
-class IPipelineElementFactory(Interface):
- """Class for creating pipeline elements by name"""
-
- def registerFactory(group, name, factory):
- """Registers a pipeline factory by name and element group.
-
- Each name can be registered only once for a given group. Duplicate
- registrations will raise a ValueError
- """
-
- def getFactoryGroups():
- """Returns a sorted list of element group names
- """
-
- def getFactoryNames(group):
- """Returns a sorted list of registered pipeline factory names
- in the specified element group
- """
-
- def instantiate(group, name):
- """Instantiates a pipeline element by group and name. If name is not
- registered raise a KeyError.
- """
Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/queryparser.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/queryparser.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/queryparser.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,54 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""Query Parser Interface.
-
-$Id$
-"""
-from zope.interface import Interface
-
-class IQueryParser(Interface):
- """Interface for Query Parsers."""
-
- def parseQuery(query):
- """Parse a query string.
-
- Return a parse tree (which implements IQueryParseTree).
-
- Some of the query terms may be ignored because they are
- stopwords; use getIgnored() to find out which terms were
- ignored. But if the entire query consists only of stop words,
- or of stopwords and one or more negated terms, an exception is
- raised.
-
- May raise ParseTree.ParseError.
- """
-
- def getIgnored():
- """Return the list of ignored terms.
-
- Return the list of terms that were ignored by the most recent
- call to parseQuery() because they were stopwords.
-
- If parseQuery() was never called this returns None.
- """
-
- def parseQueryEx(query):
- """Parse a query string.
-
- Return a tuple (tree, ignored) where 'tree' is the parse tree
- as returned by parseQuery(), and 'ignored' is a list of
- ignored terms as returned by getIgnored().
-
- May raise ParseTree.ParseError.
- """
Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/queryparsetree.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/queryparsetree.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/queryparsetree.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,53 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""Query Parser Tree Interface.
-
-$Id$
-"""
-from zope.interface import Interface
-
-class IQueryParseTree(Interface):
- """Interface for parse trees returned by parseQuery()."""
-
- def nodeType():
- """Return the node type.
-
- This is one of 'AND', 'OR', 'NOT', 'ATOM', 'PHRASE' or 'GLOB'.
- """
-
- def getValue():
- """Return a node-type specific value.
-
- For node type: Return:
- 'AND' a list of parse trees
- 'OR' a list of parse trees
- 'NOT' a parse tree
- 'ATOM' a string (representing a single search term)
- 'PHRASE' a string (representing a search phrase)
- 'GLOB' a string (representing a pattern, e.g. "foo*")
- """
-
- def terms():
- """Return a list of all terms in this node, excluding NOT subtrees."""
-
- def executeQuery(index):
- """Execute the query represented by this node against the index.
-
- The index argument must implement the IIndex interface.
-
- Return an IIBucket or IIBTree mapping document ids to scores
- (higher scores mean better results).
-
- May raise ParseTree.QueryError.
- """
Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/searchabletext.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/searchabletext.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/searchabletext.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,31 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""Interfaces related to text indexing and searching.
-
-$Id: interfaces.py 25353 2004-06-11 15:22:11Z gintautasm $
-"""
-from zope.interface import Interface
-
-class ISearchableText(Interface):
- """Interface that text-indexable objects should implement."""
-
- def getSearchableText():
- """Return a sequence of unicode strings to be indexed.
-
- Each unicode string in the returned sequence will be run
- through the splitter pipeline; the combined stream of words
- coming out of the pipeline will be indexed.
-
- returning None indicates the object should not be indexed
- """
Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/splitter.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/splitter.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/splitter.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,24 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-"""Splitter interface
-
-$Id$
-"""
-from zope.interface import Interface
-
-class ISplitter(Interface):
- """A splitter."""
-
- def process(text):
- """Run the splitter over the input text, returning a list of terms."""
Copied: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces.py (from rev 28576, Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py)
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces/__init__.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/interfaces.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -0,0 +1,255 @@
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""Basic interfaces shared between different types of index.
+
+$Id$
+"""
+from zope.interface import Interface
+
+
+class IInjection(Interface):
+ """Interface for injecting documents into an index."""
+
+ def index_doc(docid, value):
+ """Add a document to the index.
+
+ docid: int, identifying the document
+
+ value: the value to be indexed
+
+ return: None
+
+ This can also be used to reindex documents.
+ """
+
+ def unindex_doc(docid):
+ """Remove a document from the index.
+
+ docid: int, identifying the document
+
+ return: None
+
+ This call is a no-op if the docid isn't in the index, however,
+ after this call, the index should have no references to the docid.
+ """
+
+ def clear():
+ """Unindex all documents indexed by the index
+ """
+
+class IIndexSearch(Interface):
+
+ def apply(query):
+ """Apply an index to the given query
+
+ The type if the query is index specific.
+
+ TODO
+ This is somewhat problemetic. It means that application
+ code that calls apply has to be aware of the
+ expected query type. This isn't too much of a problem now,
+ as we have no more general query language nor do we have
+ any sort of automatic query-form generation.
+
+ It would be nice to have a system later for having
+ query-form generation or, perhaps, sme sort of query
+ language. At that point, we'll need some sort of way to
+ determine query types, presumably through introspection of
+ the index objects.
+
+ A result is returned that is:
+
+ - An IIBTree or an IIBucket mapping document ids to integer
+ scores for document ids of documents that match the query,
+
+ - An IISet or IITreeSet containing document ids of documents
+ that match the query, or
+
+ - None, indicating that the index could not use the query and
+ that the result should have no impact on determining a final
+ result.
+
+ """
+
+class IQuerying(Interface):
+ """An index that can be queried by some text and returns a result set."""
+
+ def query(querytext, start=0, count=None):
+ """Execute a query.
+
+ querytext: unicode, the query expression
+ start: the first result to return (0-based)
+ count: the maximum number of results to return (default: all)
+ return: ([(docid, rank), ...], total)
+
+ The return value is a tuple:
+ matches: list of (int, float) tuples, docid and rank
+ total: int, the total number of matches
+
+ The matches list represents the requested batch. The ranks
+ are floats between 0 and 1 (inclusive).
+ """
+
+class IStatistics(Interface):
+ """An index that provides statistical information about itself."""
+
+ def documentCount():
+ """Return the number of documents currently indexed."""
+
+ def wordCount():
+ """Return the number of words currently indexed."""
+
+
+class IExtendedQuerying(Interface):
+ """An index that supports advanced search setups."""
+
+ def search(term):
+ """Execute a search on a single term given as a string.
+
+ Return an IIBTree mapping docid to score, or None if all docs
+ match due to the lexicon returning no wids for the term (e.g.,
+ if the term is entirely composed of stopwords).
+ """
+
+ def search_phrase(phrase):
+ """Execute a search on a phrase given as a string.
+
+ Return an IIBtree mapping docid to score.
+ """
+
+ def search_glob(pattern):
+ """Execute a pattern search.
+
+ The pattern represents a set of words by using * and ?. For
+ example, "foo*" represents the set of all words in the lexicon
+ starting with "foo".
+
+ Return an IIBTree mapping docid to score.
+ """
+
+ def query_weight(terms):
+ """Return the weight for a set of query terms.
+
+ 'terms' is a sequence of all terms included in the query,
+ although not terms with a not. If a term appears more than
+ once in a query, it should appear more than once in terms.
+
+ Nothing is defined about what "weight" means, beyond that the
+ result is an upper bound on document scores returned for the
+ query.
+ """
+
+class IKeywordQuerying(Interface):
+ """Query over a set of keywords, seperated by white space."""
+
+ def search(query, operator='and'):
+ """Execute a search given by 'query' as a list/tuple of
+ (unicode) strings against the index. 'operator' can be either
+ 'and' or 'or' to search for all keywords or any keyword.
+
+ Return an IISet of docids
+ """
+
+class ITopicQuerying(Interface):
+ """Query over topics, seperated by white space."""
+
+ def search(query, operator='and'):
+ """Execute a search given by 'query' as a list/tuple of filter ids.
+ 'operator' can be 'and' or 'or' to search for matches in all
+ or any filter.
+
+ Return an IISet of docids
+ """
+
+class ISimpleQuery(Interface):
+ """A simple query interface."""
+
+ def query(term, start=0, count=None):
+ """Search for the given term, return a sequence of docids"""
+
+
+class ITopicFilteredSet(Interface):
+ """Interface for filtered sets used by topic indexes."""
+
+ def clear():
+ """Remove all entries from the index."""
+
+ def index_doc(docid, context):
+ """Add an object's info to the index."""
+
+ def unindex_doc(docid):
+ """Remove an object with id 'docid' from the index."""
+
+ def getId():
+ """Return the id of the filter itself."""
+
+ def setExpression(expr):
+ """Set the filter expression, e.g. 'context.meta_type=='...'"""
+
+ def getExpression():
+ """Return the filter expression."""
+
+ def getIds():
+ """Return an IISet of docids."""
+
+
+class INBest(Interface):
+ """Interface for an N-Best chooser."""
+
+ def add(item, score):
+ """Record that item 'item' has score 'score'. No return value.
+
+ The N best-scoring items are remembered, where N was passed to
+ the constructor. 'item' can by anything. 'score' should be
+ a number, and larger numbers are considered better.
+ """
+
+ def addmany(sequence):
+ """Like "for item, score in sequence: self.add(item, score)".
+
+ This is simply faster than calling add() len(seq) times.
+ """
+
+ def getbest():
+ """Return the (at most) N best-scoring items as a sequence.
+
+ The return value is a sequence of 2-tuples, (item, score), with
+ the largest score first. If .add() has been called fewer than
+ N times, this sequence will contain fewer than N pairs.
+ """
+
+ def pop_smallest():
+ """Return and remove the (item, score) pair with lowest score.
+
+ If len(self) is 0, raise IndexError.
+
+ To be cleaer, this is the lowest score among the N best-scoring
+ seen so far. This is most useful if the capacity of the NBest
+ object is never exceeded, in which case pop_smallest() allows
+ using the object as an ordinary smallest-in-first-out priority
+ queue.
+ """
+
+ def __len__():
+ """Return the number of (item, score) pairs currently known.
+
+ This is N (the value passed to the constructor), unless .add()
+ has been called fewer than N times.
+ """
+
+ def capacity():
+ """Return the maximum number of (item, score) pairs.
+
+ This is N (the value passed to the constructor).
+ """
Copied: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/nbest.py (from rev 28575, Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/nbest.py)
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/nbest.py 2004-12-06 19:11:35 UTC (rev 28575)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/nbest.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -0,0 +1,79 @@
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+"""NBest
+
+An NBest object remembers the N best-scoring items ever passed to its
+.add(item, score) method. If .add() is called M times, the worst-case
+number of comparisons performed overall is M * log2(N).
+
+$Id$
+"""
+
+from bisect import bisect_left as bisect
+
+from zope.index.interfaces import INBest
+from zope.interface import implements
+
+class NBest(object):
+ implements(INBest)
+
+ def __init__(self, N):
+ "Build an NBest object to remember the N best-scoring objects."
+
+ if N < 1:
+ raise ValueError("NBest() argument must be at least 1")
+ self._capacity = N
+
+ # This does a very simple thing with sorted lists. For large
+ # N, a min-heap can be unboundedly better in terms of data
+ # movement time.
+ self._scores = []
+ self._items = []
+
+ def __len__(self):
+ return len(self._scores)
+
+ def capacity(self):
+ return self._capacity
+
+ def add(self, item, score):
+ self.addmany([(item, score)])
+
+ def addmany(self, sequence):
+ scores, items, capacity = self._scores, self._items, self._capacity
+ n = len(scores)
+ for item, score in sequence:
+ # When we're in steady-state, the usual case is that we're filled
+ # to capacity, and that an incoming item is worse than any of
+ # the best-seen so far.
+ if n >= capacity and score <= scores[0]:
+ continue
+ i = bisect(scores, score)
+ scores.insert(i, score)
+ items.insert(i, item)
+ if n == capacity:
+ del items[0], scores[0]
+ else:
+ n += 1
+ assert n == len(scores)
+
+ def getbest(self):
+ result = zip(self._items, self._scores)
+ result.reverse()
+ return result
+
+ def pop_smallest(self):
+ if self._scores:
+ return self._items.pop(0), self._scores.pop(0)
+ raise IndexError("pop_smallest() called on empty NBest object")
Copied: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/tests.py (from rev 28575, Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_nbest.py)
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_nbest.py 2004-12-06 19:11:35 UTC (rev 28575)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/tests.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -0,0 +1,100 @@
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""N-Best index tests
+
+$Id$
+"""
+from unittest import TestCase, main, makeSuite
+
+from zope.index.nbest import NBest
+
+class NBestTest(TestCase):
+
+ def testConstructor(self):
+ self.assertRaises(ValueError, NBest, 0)
+ self.assertRaises(ValueError, NBest, -1)
+
+ for n in range(1, 11):
+ nb = NBest(n)
+ self.assertEqual(len(nb), 0)
+ self.assertEqual(nb.capacity(), n)
+
+ def testOne(self):
+ nb = NBest(1)
+ nb.add('a', 0)
+ self.assertEqual(nb.getbest(), [('a', 0)])
+
+ nb.add('b', 1)
+ self.assertEqual(len(nb), 1)
+ self.assertEqual(nb.capacity(), 1)
+ self.assertEqual(nb.getbest(), [('b', 1)])
+
+ nb.add('c', -1)
+ self.assertEqual(len(nb), 1)
+ self.assertEqual(nb.capacity(), 1)
+ self.assertEqual(nb.getbest(), [('b', 1)])
+
+ nb.addmany([('d', 3), ('e', -6), ('f', 5), ('g', 4)])
+ self.assertEqual(len(nb), 1)
+ self.assertEqual(nb.capacity(), 1)
+ self.assertEqual(nb.getbest(), [('f', 5)])
+
+ def testMany(self):
+ import random
+ inputs = [(-i, i) for i in range(50)]
+
+ reversed_inputs = inputs[:]
+ reversed_inputs.reverse()
+
+ # Test the N-best for a variety of n (1, 6, 11, ... 50).
+ for n in range(1, len(inputs)+1, 5):
+ expected = inputs[-n:]
+ expected.reverse()
+
+ random_inputs = inputs[:]
+ random.shuffle(random_inputs)
+
+ for source in inputs, reversed_inputs, random_inputs:
+ # Try feeding them one at a time.
+ nb = NBest(n)
+ for item, score in source:
+ nb.add(item, score)
+ self.assertEqual(len(nb), n)
+ self.assertEqual(nb.capacity(), n)
+ self.assertEqual(nb.getbest(), expected)
+
+ # And again in one gulp.
+ nb = NBest(n)
+ nb.addmany(source)
+ self.assertEqual(len(nb), n)
+ self.assertEqual(nb.capacity(), n)
+ self.assertEqual(nb.getbest(), expected)
+
+ for i in range(1, n+1):
+ self.assertEqual(nb.pop_smallest(), expected[-i])
+ self.assertRaises(IndexError, nb.pop_smallest)
+
+ def testAllSameScore(self):
+ inputs = [(i, 0) for i in range(10)]
+ for n in range(1, 12):
+ nb = NBest(n)
+ nb.addmany(inputs)
+ outputs = nb.getbest()
+ self.assertEqual(outputs, inputs[:len(outputs)])
+
+def test_suite():
+ return makeSuite(NBestTest)
+
+if __name__=='__main__':
+ main(defaultTest='test_suite')
Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/htmlsplitter.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/htmlsplitter.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/htmlsplitter.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -19,10 +19,8 @@
from zope.interface import implements
-from zope.index.interfaces.splitter import ISplitter
-from zope.index.text.pipelinefactory import element_factory
+from zope.index.text.interfaces import ISplitter
-
class HTMLWordSplitter(object):
implements(ISplitter)
@@ -45,10 +43,6 @@
text = re.sub(pat, " ", text)
return re.findall(wordpat, text)
-element_factory.registerFactory('Word Splitter',
- 'HTML aware splitter',
- HTMLWordSplitter)
-
if __name__ == "__main__":
import sys
splitter = HTMLWordSplitter()
Added: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/interfaces.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/interfaces.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/interfaces.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -0,0 +1,168 @@
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+"""Text-indexing interfaces
+
+$Id$
+"""
+from zope.interface import Interface
+
+class ILexicon(Interface):
+ """Object responsible for converting text to word identifiers."""
+
+ def termToWordIds(text):
+ """Return a sequence of ids of the words parsed from the text.
+
+ The input text may be either a string or a list of strings.
+
+ Parse the text as if they are search terms, and skips words
+ that aren't in the lexicon.
+ """
+
+ def sourceToWordIds(text):
+ """Return a sequence of ids of the words parsed from the text.
+
+ The input text may be either a string or a list of strings.
+
+ Parse the text as if they come from a source document, and
+ creates new word ids for words that aren't (yet) in the
+ lexicon.
+ """
+
+ def globToWordIds(pattern):
+ """Return a sequence of ids of words matching the pattern.
+
+ The argument should be a single word using globbing syntax,
+ e.g. 'foo*' meaning anything starting with 'foo'.
+
+ Return the wids for all words in the lexicon that match the
+ pattern.
+ """
+
+ def wordCount():
+ """Return the number of unique terms in the lexicon."""
+
+ def get_word(wid):
+ """Return the word for the given word id.
+
+ Raise KeyError if the word id is not in the lexicon.
+ """
+
+ def get_wid(word):
+ """Return the wird id for the given word.
+
+ Return 0 of the word is not in the lexicon.
+ """
+
+ def parseTerms(text):
+ """Pass the text through the pipeline.
+
+ Return a list of words, normalized by the pipeline
+ (e.g. stopwords removed, case normalized etc.).
+ """
+
+ def isGlob(word):
+ """Return true if the word is a globbing pattern.
+
+ The word should be one of the words returned by parseTerm().
+ """
+
+class IQueryParser(Interface):
+ """Interface for Query Parsers."""
+
+ def parseQuery(query):
+ """Parse a query string.
+
+ Return a parse tree (which implements IQueryParseTree).
+
+ Some of the query terms may be ignored because they are
+ stopwords; use getIgnored() to find out which terms were
+ ignored. But if the entire query consists only of stop words,
+ or of stopwords and one or more negated terms, an exception is
+ raised.
+
+ May raise ParseTree.ParseError.
+ """
+
+ def getIgnored():
+ """Return the list of ignored terms.
+
+ Return the list of terms that were ignored by the most recent
+ call to parseQuery() because they were stopwords.
+
+ If parseQuery() was never called this returns None.
+ """
+
+ def parseQueryEx(query):
+ """Parse a query string.
+
+ Return a tuple (tree, ignored) where 'tree' is the parse tree
+ as returned by parseQuery(), and 'ignored' is a list of
+ ignored terms as returned by getIgnored().
+
+ May raise ParseTree.ParseError.
+ """
+
+class IQueryParseTree(Interface):
+ """Interface for parse trees returned by parseQuery()."""
+
+ def nodeType():
+ """Return the node type.
+
+ This is one of 'AND', 'OR', 'NOT', 'ATOM', 'PHRASE' or 'GLOB'.
+ """
+
+ def getValue():
+ """Return a node-type specific value.
+
+ For node type: Return:
+ 'AND' a list of parse trees
+ 'OR' a list of parse trees
+ 'NOT' a parse tree
+ 'ATOM' a string (representing a single search term)
+ 'PHRASE' a string (representing a search phrase)
+ 'GLOB' a string (representing a pattern, e.g. "foo*")
+ """
+
+ def terms():
+ """Return a list of all terms in this node, excluding NOT subtrees."""
+
+ def executeQuery(index):
+ """Execute the query represented by this node against the index.
+
+ The index argument must implement the IIndex interface.
+
+ Return an IIBucket or IIBTree mapping document ids to scores
+ (higher scores mean better results).
+
+ May raise ParseTree.QueryError.
+ """
+
+class ISearchableText(Interface):
+ """Interface that text-indexable objects should implement."""
+
+ def getSearchableText():
+ """Return a sequence of unicode strings to be indexed.
+
+ Each unicode string in the returned sequence will be run
+ through the splitter pipeline; the combined stream of words
+ coming out of the pipeline will be indexed.
+
+ returning None indicates the object should not be indexed
+ """
+
+class ISplitter(Interface):
+ """A splitter."""
+
+ def process(text):
+ """Run the splitter over the input text, returning a list of terms."""
Property changes on: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/interfaces.py
___________________________________________________________________
Name: svn:keywords
+ Id
Name: svn:eol-style
+ native
Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/lexicon.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/lexicon.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/lexicon.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -24,10 +24,9 @@
from persistent import Persistent
-from zope.index.interfaces.lexicon import ILexicon
+from zope.index.text.interfaces import ILexicon
from zope.index.text.stopdict import get_stopdict
from zope.index.text.parsetree import QueryError
-from zope.index.text.pipelinefactory import element_factory
class Lexicon(Persistent):
@@ -175,23 +174,11 @@
result += self.rxGlob.findall(s)
return result
-element_factory.registerFactory('Word Splitter',
- 'Whitespace splitter',
- Splitter)
-
class CaseNormalizer(object):
def process(self, lst):
return [w.lower() for w in lst]
-element_factory.registerFactory('Case Normalizer',
- 'Case Normalizer',
- CaseNormalizer)
-
-element_factory.registerFactory('Stop Words',
- ' Don\'t remove stop words',
- None)
-
class StopWordRemover(object):
dict = get_stopdict().copy()
@@ -206,16 +193,8 @@
def process(self, lst):
return self._process(self.dict, lst)
-element_factory.registerFactory('Stop Words',
- 'Remove listed stop words only',
- StopWordRemover)
-
class StopWordAndSingleCharRemover(StopWordRemover):
dict = get_stopdict().copy()
for c in range(255):
dict[chr(c)] = None
-
-element_factory.registerFactory('Stop Words',
- 'Remove listed and single char words',
- StopWordAndSingleCharRemover)
Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/nbest.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/nbest.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/nbest.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,79 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-"""NBest
-
-An NBest object remembers the N best-scoring items ever passed to its
-.add(item, score) method. If .add() is called M times, the worst-case
-number of comparisons performed overall is M * log2(N).
-
-$Id$
-"""
-
-from bisect import bisect_left as bisect
-
-from zope.index.interfaces.nbest import INBest
-from zope.interface import implements
-
-class NBest(object):
- implements(INBest)
-
- def __init__(self, N):
- "Build an NBest object to remember the N best-scoring objects."
-
- if N < 1:
- raise ValueError("NBest() argument must be at least 1")
- self._capacity = N
-
- # This does a very simple thing with sorted lists. For large
- # N, a min-heap can be unboundedly better in terms of data
- # movement time.
- self._scores = []
- self._items = []
-
- def __len__(self):
- return len(self._scores)
-
- def capacity(self):
- return self._capacity
-
- def add(self, item, score):
- self.addmany([(item, score)])
-
- def addmany(self, sequence):
- scores, items, capacity = self._scores, self._items, self._capacity
- n = len(scores)
- for item, score in sequence:
- # When we're in steady-state, the usual case is that we're filled
- # to capacity, and that an incoming item is worse than any of
- # the best-seen so far.
- if n >= capacity and score <= scores[0]:
- continue
- i = bisect(scores, score)
- scores.insert(i, score)
- items.insert(i, item)
- if n == capacity:
- del items[0], scores[0]
- else:
- n += 1
- assert n == len(scores)
-
- def getbest(self):
- result = zip(self._items, self._scores)
- result.reverse()
- return result
-
- def pop_smallest(self):
- if self._scores:
- return self._items.pop(0), self._scores.pop(0)
- raise IndexError("pop_smallest() called on empty NBest object")
Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/parsetree.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/parsetree.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/parsetree.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -17,8 +17,9 @@
"""
from BTrees.IIBTree import difference
-from zope.index.interfaces.queryparsetree import IQueryParseTree
-from zope.index.text.setops import mass_weightedIntersection, mass_weightedUnion
+from zope.index.text.interfaces import IQueryParseTree
+from zope.index.text.setops import mass_weightedIntersection
+from zope.index.text.setops import mass_weightedUnion
from zope.interface import implements
Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/pipelinefactory.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/pipelinefactory.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/pipelinefactory.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,55 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-"""Pipeline Element Factory
-
-$Id$
-"""
-from zope.index.interfaces.pipelineelementfactory import IPipelineElementFactory
-from zope.interface import implements
-
-class PipelineElementFactory(object):
-
- implements(IPipelineElementFactory)
-
- def __init__(self):
- self._groups = {}
-
- def registerFactory(self, group, name, factory):
- if self._groups.has_key(group) and \
- self._groups[group].has_key(name):
- raise ValueError('ZCTextIndex lexicon element "%s" '
- 'already registered in group "%s"'
- % (name, group))
-
- elements = self._groups.get(group)
- if elements is None:
- elements = self._groups[group] = {}
- elements[name] = factory
-
- def getFactoryGroups(self):
- groups = self._groups.keys()
- groups.sort()
- return groups
-
- def getFactoryNames(self, group):
- names = self._groups[group].keys()
- names.sort()
- return names
-
- def instantiate(self, group, name):
- factory = self._groups[group][name]
- if factory is not None:
- return factory()
-
-element_factory = PipelineElementFactory()
Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/queryparser.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/queryparser.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/queryparser.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -59,7 +59,7 @@
import re
from zope.interface import implements
-from zope.index.interfaces.queryparser import IQueryParser
+from zope.index.text.interfaces import IQueryParser
from zope.index.text import parsetree
# Create unique symbols for token types.
Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/setops.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/setops.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/setops.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -15,10 +15,9 @@
$Id$
"""
-from BTrees.IIBTree import \
- IIBucket, weightedIntersection, weightedUnion
+from BTrees.IIBTree import IIBucket, weightedIntersection, weightedUnion
-from zope.index.text.nbest import NBest
+from zope.index.nbest import NBest
def mass_weightedIntersection(L):
"A list of (mapping, weight) pairs -> their weightedIntersection IIBucket."
Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/queryhtml.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/queryhtml.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/queryhtml.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -43,7 +43,7 @@
return "http://www.python.org" + p[i:]
from Products.PluginIndexes.TextIndex.TextIndex import And, Or
-from zope.index.text.nbest import NBest
+from zope.index.nbest import NBest
def main(rt):
index = rt["index"]
Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_nbest.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_nbest.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_nbest.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,100 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""N-Best index tests
-
-$Id$
-"""
-from unittest import TestCase, main, makeSuite
-
-from zope.index.text.nbest import NBest
-
-class NBestTest(TestCase):
-
- def testConstructor(self):
- self.assertRaises(ValueError, NBest, 0)
- self.assertRaises(ValueError, NBest, -1)
-
- for n in range(1, 11):
- nb = NBest(n)
- self.assertEqual(len(nb), 0)
- self.assertEqual(nb.capacity(), n)
-
- def testOne(self):
- nb = NBest(1)
- nb.add('a', 0)
- self.assertEqual(nb.getbest(), [('a', 0)])
-
- nb.add('b', 1)
- self.assertEqual(len(nb), 1)
- self.assertEqual(nb.capacity(), 1)
- self.assertEqual(nb.getbest(), [('b', 1)])
-
- nb.add('c', -1)
- self.assertEqual(len(nb), 1)
- self.assertEqual(nb.capacity(), 1)
- self.assertEqual(nb.getbest(), [('b', 1)])
-
- nb.addmany([('d', 3), ('e', -6), ('f', 5), ('g', 4)])
- self.assertEqual(len(nb), 1)
- self.assertEqual(nb.capacity(), 1)
- self.assertEqual(nb.getbest(), [('f', 5)])
-
- def testMany(self):
- import random
- inputs = [(-i, i) for i in range(50)]
-
- reversed_inputs = inputs[:]
- reversed_inputs.reverse()
-
- # Test the N-best for a variety of n (1, 6, 11, ... 50).
- for n in range(1, len(inputs)+1, 5):
- expected = inputs[-n:]
- expected.reverse()
-
- random_inputs = inputs[:]
- random.shuffle(random_inputs)
-
- for source in inputs, reversed_inputs, random_inputs:
- # Try feeding them one at a time.
- nb = NBest(n)
- for item, score in source:
- nb.add(item, score)
- self.assertEqual(len(nb), n)
- self.assertEqual(nb.capacity(), n)
- self.assertEqual(nb.getbest(), expected)
-
- # And again in one gulp.
- nb = NBest(n)
- nb.addmany(source)
- self.assertEqual(len(nb), n)
- self.assertEqual(nb.capacity(), n)
- self.assertEqual(nb.getbest(), expected)
-
- for i in range(1, n+1):
- self.assertEqual(nb.pop_smallest(), expected[-i])
- self.assertRaises(IndexError, nb.pop_smallest)
-
- def testAllSameScore(self):
- inputs = [(i, 0) for i in range(10)]
- for n in range(1, 12):
- nb = NBest(n)
- nb.addmany(inputs)
- outputs = nb.getbest()
- self.assertEqual(outputs, inputs[:len(outputs)])
-
-def test_suite():
- return makeSuite(NBestTest)
-
-if __name__=='__main__':
- main(defaultTest='test_suite')
Deleted: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_pipelinefactory.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_pipelinefactory.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_pipelinefactory.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -1,53 +0,0 @@
-##############################################################################
-#
-# Copyright (c) 2002 Zope Corporation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-"""Pipeline Factory tests
-
-$Id$
-"""
-from unittest import TestCase, main, makeSuite
-from zope.index.interfaces.pipelineelement import IPipelineElement
-from zope.index.text.pipelinefactory import PipelineElementFactory
-from zope.interface import implements
-
-class NullPipelineElement(object):
- implements(IPipelineElement)
-
- def process(source):
- pass
-
-class PipelineFactoryTest(TestCase):
-
- def setUp(self):
- self.huey = NullPipelineElement()
- self.dooey = NullPipelineElement()
- self.louie = NullPipelineElement()
- self.daffy = NullPipelineElement()
-
- def testPipeline(self):
- pf = PipelineElementFactory()
- pf.registerFactory('donald', 'huey', self.huey)
- pf.registerFactory('donald', 'dooey', self.dooey)
- pf.registerFactory('donald', 'louie', self.louie)
- pf.registerFactory('looney', 'daffy', self.daffy)
- self.assertRaises(ValueError, pf.registerFactory,'donald', 'huey',
- self.huey)
- self.assertEqual(pf.getFactoryGroups(), ['donald', 'looney'])
- self.assertEqual(pf.getFactoryNames('donald'),
- ['dooey', 'huey', 'louie'])
-
-def test_suite():
- return makeSuite(PipelineFactoryTest)
-
-if __name__=='__main__':
- main(defaultTest='test_suite')
Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_queryparser.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_queryparser.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/tests/test_queryparser.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -19,8 +19,8 @@
from zope.interface.verify import verifyClass
-from zope.index.interfaces.queryparser import IQueryParser
-from zope.index.interfaces.queryparsetree import IQueryParseTree
+from zope.index.text.interfaces import IQueryParser
+from zope.index.text.interfaces import IQueryParseTree
from zope.index.text.queryparser import QueryParser
from zope.index.text.parsetree import ParseError, ParseTreeNode
Modified: Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/textindexwrapper.py
===================================================================
--- Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/textindexwrapper.py 2004-12-06 21:52:05 UTC (rev 28576)
+++ Zope3/branches/jim-index-restructure-2004-12/src/zope/index/text/textindexwrapper.py 2004-12-07 18:15:57 UTC (rev 28577)
@@ -25,10 +25,9 @@
from zope.index.text.lexicon import Lexicon
from zope.index.text.lexicon import Splitter, CaseNormalizer, StopWordRemover
from zope.index.text.queryparser import QueryParser
-from zope.index.text.nbest import NBest
+from zope.index.nbest import NBest
-from zope.index.interfaces import \
- IInjection, IQuerying, IStatistics
+from zope.index.interfaces import IInjection, IQuerying, IStatistics
class TextIndexWrapper(Persistent):
More information about the Zope3-Checkins
mailing list