[Zope-Checkins] CVS: Zope/lib/python/SearchIndex - GlobbingLexicon.py:1.13 Index.py:1.30 Lexicon.py:1.20 PluggableIndex.py:1.4 ResultList.py:1.6 TextIndex.py:1.31 UnIndex.py:1.31 UnKeywordIndex.py:1.19 UnTextIndex.py:1.53 __init__.py:1.10 randid.py:1.4
Martijn Pieters
mj@zope.com
Wed, 14 Aug 2002 17:46:55 -0400
Update of /cvs-repository/Zope/lib/python/SearchIndex
In directory cvs.zope.org:/tmp/cvs-serv16076
Modified Files:
GlobbingLexicon.py Index.py Lexicon.py PluggableIndex.py
ResultList.py TextIndex.py UnIndex.py UnKeywordIndex.py
UnTextIndex.py __init__.py randid.py
Log Message:
Clean up indentation and trailing whitespace.
=== Zope/lib/python/SearchIndex/GlobbingLexicon.py 1.12 => 1.13 ===
--- Zope/lib/python/SearchIndex/GlobbingLexicon.py:1.12 Wed Nov 28 11:09:08 2001
+++ Zope/lib/python/SearchIndex/GlobbingLexicon.py Wed Aug 14 17:46:23 2002
@@ -1,5 +1,5 @@
##############################################################################
-#
+#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
@@ -8,7 +8,7 @@
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
-#
+#
#############################################################################
from Lexicon import Lexicon
@@ -82,7 +82,7 @@
return digrams
-
+
def getWordId(self, word):
"""Provided 'word', return the matching integer word id."""
@@ -110,7 +110,7 @@
try: insert=inverse.insert
except AttributeError:
# we have an "old" BTree object
- if inverse:
+ if inverse:
wid=inverse.keys()[-1]+1
else:
self._inverseLex=IOBTree()
@@ -133,7 +133,7 @@
return wid
-
+
def get(self, pattern):
""" Query the lexicon for words matching a pattern."""
wc_set = [self.multi_wc, self.single_wc]
@@ -161,7 +161,7 @@
if result is None:
return ()
return (result, )
-
+
## now get all of the intsets that contain the result digrams
result = None
for digram in digrams:
@@ -185,7 +185,7 @@
hits.insert(x)
return hits
-
+
def __getitem__(self, word):
""" """
return self.get(word)
@@ -235,12 +235,11 @@
transTable = string.maketrans("", "")
result = string.translate(pat, transTable,
r'()&|!@#$%^{}\<>.')
-
+
# First, deal with multi-character globbing
result = string.replace(result, '*', '.*')
# Next, we need to deal with single-character globbing
result = string.replace(result, '?', '.')
- return "%s$" % result
-
+ return "%s$" % result
=== Zope/lib/python/SearchIndex/Index.py 1.29 => 1.30 ===
--- Zope/lib/python/SearchIndex/Index.py:1.29 Wed Nov 28 10:51:11 2001
+++ Zope/lib/python/SearchIndex/Index.py Wed Aug 14 17:46:23 2002
@@ -1,14 +1,14 @@
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
+#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
-#
+#
##############################################################################
"""Simple column indices"""
@@ -54,8 +54,8 @@
either an attribute name or a record key.
"""
- ######################################################################
- # For b/w compatability, have to allow __init__ calls with zero args
+ ######################################################################
+ # For b/w compatability, have to allow __init__ calls with zero args
if not data==schema==id==ignore_ex==call_methods==None:
self._data = data
@@ -64,7 +64,7 @@
self.ignore_ex=ignore_ex
self.call_methods=call_methods
self._index = OOBTree()
-
+
self._reindex()
else:
pass
@@ -95,7 +95,7 @@
if not withLengths: return tuple(
filter(nonEmpty,self._index.keys())
)
- else:
+ else:
rl=[]
for i in self._index.keys():
if not nonEmpty(i): continue
@@ -112,7 +112,7 @@
index=self._index
get=index.get
-
+
if not start: index.clear()
id = self.id
@@ -180,7 +180,7 @@
if set is not None: set.remove(i)
- def _apply_index(self, request, cid=''):
+ def _apply_index(self, request, cid=''):
"""Apply the index to query parameters given in the argument,
request
@@ -244,20 +244,3 @@
else: return None
return r, (id,)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
=== Zope/lib/python/SearchIndex/Lexicon.py 1.19 => 1.20 ===
--- Zope/lib/python/SearchIndex/Lexicon.py:1.19 Wed Nov 28 10:51:11 2001
+++ Zope/lib/python/SearchIndex/Lexicon.py Wed Aug 14 17:46:23 2002
@@ -1,14 +1,14 @@
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
+#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
-#
+#
##############################################################################
__doc__=""" Module breaks out Zope specific methods and behavior. In
@@ -50,7 +50,7 @@
def clear(self):
self._lexicon = OIBTree()
self._inverseLex = IOBTree()
-
+
def _convertBTrees(self, threshold=200):
if (type(self._lexicon) is OIBTree and
type(getattr(self, '_inverseLex', None)) is IOBTree):
@@ -73,7 +73,7 @@
self._inverseLex._p_jar=self._p_jar
convert(inverseLex, self._inverseLex, threshold)
-
+
def set_stop_syn(self, stop_syn):
""" pass in a mapping of stopwords and synonyms. Format is:
@@ -84,22 +84,22 @@
"""
self.stop_syn = stop_syn
-
+
def getWordId(self, word):
""" return the word id of 'word' """
wid=self._lexicon.get(word, None)
- if wid is None:
+ if wid is None:
wid=self.assignWordId(word)
return wid
-
+
set = getWordId
def getWord(self, wid):
""" post-2.3.1b2 method, will not work with unconverted lexicons """
return self._inverseLex.get(wid, None)
-
+
def assignWordId(self, word):
"""Assigns a new word id to the provided word and returns it."""
# First make sure it's not already in there
@@ -148,7 +148,7 @@
def query_hook(self, q):
""" we don't want to modify the query cuz we're dumb """
return q
-
+
@@ -200,7 +200,3 @@
)
stop_word_dict={}
for word in stop_words: stop_word_dict[word]=None
-
-
-
-
=== Zope/lib/python/SearchIndex/PluggableIndex.py 1.3 => 1.4 ===
--- Zope/lib/python/SearchIndex/PluggableIndex.py:1.3 Wed Nov 28 10:51:11 2001
+++ Zope/lib/python/SearchIndex/PluggableIndex.py Wed Aug 14 17:46:23 2002
@@ -1,14 +1,14 @@
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
+#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
-#
+#
##############################################################################
"""Pluggable Index Base Class """
=== Zope/lib/python/SearchIndex/ResultList.py 1.5 => 1.6 ===
--- Zope/lib/python/SearchIndex/ResultList.py:1.5 Wed Nov 28 10:51:11 2001
+++ Zope/lib/python/SearchIndex/ResultList.py Wed Aug 14 17:46:23 2002
@@ -1,14 +1,14 @@
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
+#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
-#
+#
##############################################################################
from BTrees.IIBTree import IIBucket
@@ -16,13 +16,13 @@
from BTrees.OOBTree import OOSet, union
class ResultList:
-
+
def __init__(self, d, words, index, TupleType=type(())):
self._index = index
if type(words) is not OOSet: words=OOSet(words)
self._words = words
-
+
if (type(d) is TupleType):
d = IIBucket((d,))
elif type(d) is not IIBucket:
@@ -43,7 +43,7 @@
def has_key(self, key): return self._dict.has_key(key)
- def items(self): return self._dict.items()
+ def items(self): return self._dict.items()
def __and__(self, x):
return self.__class__(
@@ -58,7 +58,7 @@
self._words,
self._index,
)
-
+
def __or__(self, x):
return self.__class__(
weightedUnion(self._dict, x._dict)[1],
@@ -89,7 +89,6 @@
if d==lp: score = min(score,xdict[id]) # synonyms
else: score = (score+xdict[id])/d
result[id] = score
-
+
return self.__class__(
result, union(self._words, x._words), self._index)
-
=== Zope/lib/python/SearchIndex/TextIndex.py 1.30 => 1.31 ===
--- Zope/lib/python/SearchIndex/TextIndex.py:1.30 Wed Nov 28 10:51:11 2001
+++ Zope/lib/python/SearchIndex/TextIndex.py Wed Aug 14 17:46:23 2002
@@ -1,14 +1,14 @@
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
+#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
-#
+#
##############################################################################
"""Text Index
@@ -71,7 +71,7 @@
tree introduces overhead.
Trie structure currently introduces an excessive number of nodes.
- Typically, a node per two or three words. Trie has potential to
+ Typically, a node per two or three words. Trie has potential to
reduce storage because key storage is shared between words.
Maybe an alternative to a Trie is some sort of nested BTree. Or
@@ -86,9 +86,9 @@
Then:
- After some point, tree objects no longer change
-
+
If this is case, then it doesn\'t make sense to optimize tree for
- change.
+ change.
Additional notes
@@ -112,7 +112,7 @@
textSearchResult -- id -> (score, positions)
id -- integer, say 4-byte.
-
+
positions -- sequence of integers.
score -- numeric measure of relevence, f(numberOfWords, positions)
@@ -242,7 +242,7 @@
pass
- def unindex_item(self, i, obj=None):
+ def unindex_item(self, i, obj=None):
return self.index_item(i, obj, 1)
@@ -255,12 +255,12 @@
tupleType=type(()),
dictType=type({}),
):
- src = Splitter(document_text, self._syn)
+ src = Splitter(document_text, self._syn)
d = {}
old = d.has_key
last = None
-
+
for s in src:
if s[0] == '\"': last=self.subindex(s[1:-1], d, old, last)
else:
@@ -305,7 +305,7 @@
def _subindex(self, isrc, d, old, last):
- src = Splitter(isrc, self._syn)
+ src = Splitter(isrc, self._syn)
for s in src:
if s[0] == '\"': last=self.subindex(s[1:-1],d,old,last)
@@ -328,7 +328,7 @@
r = self._index.get(word,None)
if r is None: r = {}
return ResultList(r, (word,), self)
-
+
r = None
for word in src:
rr = self[word]
@@ -338,7 +338,7 @@
return r
- def _apply_index(self, request, cid='', ListType=[]):
+ def _apply_index(self, request, cid='', ListType=[]):
""" Apply the index to query parameters given in the argument,
request
@@ -350,7 +350,7 @@
Otherwise two objects are returned. The first object is a
ResultSet containing the record numbers of the matching
records. The second object is a tuple containing the names of
- all data fields used.
+ all data fields used.
"""
id = self.id
@@ -376,7 +376,7 @@
if r is None: r = rr
else:
# Note that we *and*/*narrow* multiple search terms.
- r = r.intersection(rr)
+ r = r.intersection(rr)
if r is not None: return r, (id,)
return IISet(), (id,)
@@ -433,7 +433,7 @@
if ((i % 2) != 0):
# This word should be an operator; if it is not, splice in
# the default operator.
-
+
if type(q[i]) is not ListType and isop(q[i]):
q[i] = operator_dict[q[i]]
else: q[i : i] = [ default_operator ]
@@ -450,7 +450,7 @@
while 1:
index = parens_re(s, index)
if index is None : break
-
+
if s[index] == '(':
paren_count = paren_count + 1
if open_index == 0 : open_index = index + 1
@@ -465,36 +465,36 @@
if paren_count == 0: # No parentheses Found
return None
else:
- raise QueryError, "Mismatched parentheses"
+ raise QueryError, "Mismatched parentheses"
def quotes(s, ws = (string.whitespace,)):
- # split up quoted regions
- splitted = re.split( '[%s]*\"[%s]*' % (ws * 2),s)
- split=string.split
-
- if (len(splitted) > 1):
- if ((len(splitted) % 2) == 0): raise QueryError, "Mismatched quotes"
-
- for i in range(1,len(splitted),2):
- # split the quoted region into words
- splitted[i] = filter(None, split(splitted[i]))
-
- # put the Proxmity operator in between quoted words
- for j in range(1, len(splitted[i])):
- splitted[i][j : j] = [ Near ]
-
- for i in range(len(splitted)-1,-1,-2):
- # split the non-quoted region into words
- splitted[i:i+1] = filter(None, split(splitted[i]))
-
- splitted = filter(None, splitted)
- else:
- # No quotes, so just split the string into words
- splitted = filter(None, split(s))
+ # split up quoted regions
+ splitted = re.split( '[%s]*\"[%s]*' % (ws * 2),s)
+ split=string.split
+
+ if (len(splitted) > 1):
+ if ((len(splitted) % 2) == 0): raise QueryError, "Mismatched quotes"
+
+ for i in range(1,len(splitted),2):
+ # split the quoted region into words
+ splitted[i] = filter(None, split(splitted[i]))
+
+ # put the Proxmity operator in between quoted words
+ for j in range(1, len(splitted[i])):
+ splitted[i][j : j] = [ Near ]
+
+ for i in range(len(splitted)-1,-1,-2):
+ # split the non-quoted region into words
+ splitted[i:i+1] = filter(None, split(splitted[i]))
+
+ splitted = filter(None, splitted)
+ else:
+ # No quotes, so just split the string into words
+ splitted = filter(None, split(s))
- return splitted
+ return splitted
def get_operands(q, i, index, ListType=type([]), StringType=type('')):
'''Evaluate and return the left and right operands for an operator'''
@@ -523,7 +523,7 @@
return evaluate(q[0], index)
return index[q[0]]
-
+
i = 0
while (i < len(q)):
if q[i] is AndNot:
=== Zope/lib/python/SearchIndex/UnIndex.py 1.30 => 1.31 ===
--- Zope/lib/python/SearchIndex/UnIndex.py:1.30 Wed Nov 28 10:51:11 2001
+++ Zope/lib/python/SearchIndex/UnIndex.py Wed Aug 14 17:46:23 2002
@@ -1,14 +1,14 @@
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
+#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
-#
+#
##############################################################################
"""Simple column indices"""
@@ -42,7 +42,7 @@
UnIndexes are indexes that contain two index components, the
forward index (like plain index objects) and an inverted
- index. The inverted index is so that objects can be unindexed
+ index. The inverted index is so that objects can be unindexed
even when the old value of the object is not known.
e.g.
@@ -63,7 +63,7 @@
to ignore exceptions raised while indexing instead of
propagating them.
- 'call_methods' -- should be set to true if you want the index
+ 'call_methods' -- should be set to true if you want the index
to call the attribute 'id' (note: 'id' should be callable!)
You will also need to pass in an object in the index and
uninded methods for this to work.
@@ -90,7 +90,7 @@
_index=self._index
self._index=OOBTree()
-
+
def convertSet(s,
IITreeSet=IITreeSet, IntType=type(0),
type=type, len=len,
@@ -103,7 +103,7 @@
except: pass # This is just an optimization.
return IITreeSet(s)
-
+
convert(_index, self._index, threshold, convertSet)
_unindex=self._unindex
@@ -152,8 +152,8 @@
return self._unindex.get(documentId)
else:
return self._unindex.get(documentId, default)
-
-
+
+
def removeForwardIndexEntry(self, entry, documentId):
"""Take the entry provided and remove any reference to documentId
in its entry in the index."""
@@ -170,7 +170,7 @@
# index row is an int
del self._index[entry]
try: self.__len__.change(-1)
- except AttributeError: pass # pre-BTrees-module instance
+ except AttributeError: pass # pre-BTrees-module instance
except:
LOG(self.__class__.__name__, ERROR,
('unindex_object could not remove '
@@ -184,7 +184,7 @@
'from index %s but couldn\'t. This '
'should not happen.' % (repr(entry), str(self.id))))
-
+
def insertForwardIndexEntry(self, entry, documentId):
"""Take the entry provided and put it in the correct place
in the forward index.
@@ -192,7 +192,7 @@
This will also deal with creating the entire row if necessary."""
global _marker
indexRow = self._index.get(entry, _marker)
-
+
# Make sure there's actually a row there already. If not, create
# an IntSet and stuff it in first.
if indexRow is _marker:
@@ -221,7 +221,7 @@
datum = datum()
except AttributeError:
datum = _marker
-
+
# We don't want to do anything that we don't have to here, so we'll
# check to see if the new and existing information is the same.
oldDatum = self._unindex.get(documentId, _marker)
@@ -247,14 +247,14 @@
return None
self.removeForwardIndexEntry(unindexRecord, documentId)
-
+
try:
del self._unindex[documentId]
except:
LOG('UnIndex', ERROR, 'Attempt to unindex nonexistent document'
' with id %s' % documentId)
- def _apply_index(self, request, cid='', type=type, None=None):
+ def _apply_index(self, request, cid='', type=type, None=None):
"""Apply the index to query parameters given in the request arg.
The request argument should be a mapping object.
@@ -366,7 +366,7 @@
if not withLengths:
return tuple(self._index.keys())
- else:
+ else:
rl=[]
for i in self._index.keys():
set = self._index[i]
@@ -387,4 +387,3 @@
v = IISet((v,))
items.append((k, v))
return items
-
=== Zope/lib/python/SearchIndex/UnKeywordIndex.py 1.18 => 1.19 ===
--- Zope/lib/python/SearchIndex/UnKeywordIndex.py:1.18 Wed Nov 28 10:51:11 2001
+++ Zope/lib/python/SearchIndex/UnKeywordIndex.py Wed Aug 14 17:46:23 2002
@@ -1,14 +1,14 @@
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
+#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
-#
+#
##############################################################################
from UnIndex import UnIndex
@@ -19,9 +19,9 @@
class UnKeywordIndex(UnIndex):
meta_type = 'Keyword Index'
-
+
"""Like an UnIndex only it indexes sequences of items
-
+
Searches match any keyword.
This should have an _apply_index that returns a relevance score
=== Zope/lib/python/SearchIndex/UnTextIndex.py 1.52 => 1.53 ===
--- Zope/lib/python/SearchIndex/UnTextIndex.py:1.52 Wed Nov 28 10:51:11 2001
+++ Zope/lib/python/SearchIndex/UnTextIndex.py Wed Aug 14 17:46:23 2002
@@ -1,14 +1,14 @@
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
+#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
-#
+#
##############################################################################
"""Text Index
@@ -84,13 +84,13 @@
'lexicon' is the lexicon object to specify, if None, the
index will use a private lexicon."""
-
+
self.id = id
self.ignore_ex = ignore_ex
self.call_methods = call_methods
self.clear()
-
+
if lexicon is None:
## if no lexicon is provided, create a default one
self._lexicon = Lexicon()
@@ -102,7 +102,7 @@
def getLexicon(self, vocab_id):
"""Return the Lexicon in use.
-
+
Bit of a hack, indexes have been made acquirers so that they
can acquire a vocabulary object from the object system in
Zope. I don't think indexes were ever intended to participate
@@ -117,7 +117,7 @@
def __nonzero__(self):
return not not self._unindex
-
+
# Too expensive
#def __len__(self):
# """Return the number of objects indexed."""
@@ -132,7 +132,7 @@
def _convertBTrees(self, threshold=200):
if type(self._lexicon) is type(''):
- # Turn the name reference into a hard reference.
+ # Turn the name reference into a hard reference.
self._lexicon=self.getLexicon(self._lexicon)
if type(self._index) is IOBTree: return
@@ -148,7 +148,7 @@
if type(scores) is not TupleType and type(scores) is not IIBTree():
scores=IIBTree(scores)
return scores
-
+
convert(_index, self._index, threshold, convertScores)
@@ -182,8 +182,8 @@
else:
return tuple(map(self.getLexicon(self._lexicon).getWord,
results))
-
-
+
+
def insertForwardIndexEntry(self, entry, documentId, score=1):
"""Uses the information provided to update the indexes.
@@ -219,12 +219,12 @@
else:
if indexRow.get(documentId, -1) != score:
# score changed (or new entry)
-
+
if type(indexRow) is DictType:
indexRow[documentId] = score
if len(indexRow) > 3:
# Big enough to give it's own database record
- indexRow=IIBTree(indexRow)
+ indexRow=IIBTree(indexRow)
index[entry] = indexRow
else:
indexRow[documentId] = score
@@ -236,7 +236,7 @@
def index_object(self, documentId, obj, threshold=None):
""" Index an object:
'documentId' is the integer id of the document
-
+
'obj' is the objects to be indexed
'threshold' is the number of words to process between
@@ -253,13 +253,13 @@
source = str(source)
except (AttributeError, TypeError):
return 0
-
+
lexicon = self.getLexicon(self._lexicon)
splitter=lexicon.Splitter
wordScores = OIBTree()
last = None
-
+
# Run through the words and score them
for word in splitter(source):
if word[0] == '\"':
@@ -281,7 +281,7 @@
# Get rid of document words that are no longer indexed
self.unindex_objectWids(documentId, difference(currentWids, widScores))
-
+
# Now index the words. Note that the new xIBTrees are clever
# enough to do nothing when there isn't a change. Woo hoo.
insert=self.insertForwardIndexEntry
@@ -307,10 +307,10 @@
return last
- def unindex_object(self, i):
+ def unindex_object(self, i):
""" carefully unindex document with integer id 'i' from the text
index and do not fail if it does not exist """
-
+
index = self._index
unindex = self._unindex
wids = unindex.get(i, None)
@@ -318,7 +318,7 @@
self.unindex_objectWids(i, wids)
del unindex[i]
- def unindex_objectWids(self, i, wids):
+ def unindex_objectWids(self, i, wids):
""" carefully unindex document with integer id 'i' from the text
index and do not fail if it does not exist """
@@ -355,7 +355,7 @@
Note that this differentiates between being passed an Integer
and a String. Strings are looked up in the lexicon, whereas
Integers are assumed to be resolved word ids. """
-
+
if isinstance(word, IntType):
# We have a word ID
result = self._index.get(word, {})
@@ -365,7 +365,7 @@
if not splitSource:
return ResultList({}, (word,), self)
-
+
if len(splitSource) == 1:
splitSource = splitSource[0]
if splitSource[:1] == splitSource[-1:] == '"':
@@ -392,7 +392,7 @@
return r
- def _apply_index(self, request, cid=''):
+ def _apply_index(self, request, cid=''):
""" Apply the index to query parameters given in the argument,
request
@@ -400,11 +400,11 @@
If the request does not contain the needed parameters, then
None is returned.
-
+
Otherwise two objects are returned. The first object is a
ResultSet containing the record numbers of the matching
records. The second object is a tuple containing the names of
- all data fields used.
+ all data fields used.
"""
if request.has_key(self.id):
keys = request[self.id]
@@ -430,9 +430,9 @@
if not keys or not string.strip(keys):
return None
keys = [keys]
-
+
r = None
-
+
for key in keys:
key = string.strip(key)
if not key:
@@ -443,7 +443,7 @@
if r is not None:
return r, (self.id,)
-
+
return (IIBucket(), (self.id,))
@@ -481,7 +481,7 @@
def query(self, s, default_operator=Or):
""" Evaluate a query string.
-
+
Convert the query string into a data structure of nested lists
and strings, based on the grouping of whitespace-separated
strings by parentheses and quotes. The 'Near' operator is
@@ -525,7 +525,7 @@
if operandType is IntType:
left = self[left]
elif operandType is StringType:
- left = self[left]
+ left = self[left]
elif operandType is ListType:
left = self.evaluate(left)
@@ -533,7 +533,7 @@
if operandType is IntType:
right = self[right]
elif operandType is StringType:
- right = self[right]
+ right = self[right]
elif operandType is ListType:
right = self.evaluate(right)
@@ -638,12 +638,12 @@
mo = parens_re(s)
if mo is None:
return
-
+
open_index = mo.start(0) + 1
paren_count = 0
while mo is not None:
index = mo.start(0)
-
+
if s[index] == '(':
paren_count = paren_count + 1
else:
@@ -655,23 +655,23 @@
break
mo = parens_re(s, index + 1)
- raise QueryError, "Mismatched parentheses"
+ raise QueryError, "Mismatched parentheses"
def quotes(s):
split=string.split
if '"' not in s:
return split(s)
-
+
# split up quoted regions
splitted = re.split('\s*\"\s*', s)
if (len(splitted) % 2) == 0: raise QueryError, "Mismatched quotes"
-
+
for i in range(1,len(splitted),2):
# split the quoted region into words
words = splitted[i] = split(splitted[i])
-
+
# put the Proxmity operator in between quoted words
j = len(words) - 1
while j > 0:
=== Zope/lib/python/SearchIndex/__init__.py 1.9 => 1.10 ===
--- Zope/lib/python/SearchIndex/__init__.py:1.9 Wed Nov 28 10:51:11 2001
+++ Zope/lib/python/SearchIndex/__init__.py Wed Aug 14 17:46:24 2002
@@ -1,14 +1,14 @@
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
+#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
-#
+#
##############################################################################
__doc__='''Collected utilities to support database indexing.
@@ -23,4 +23,3 @@
\n\
Please use instead the re-factored modules in Products/PluginIndexes.\n\
",DeprecationWarning)
-
=== Zope/lib/python/SearchIndex/randid.py 1.3 => 1.4 ===
--- Zope/lib/python/SearchIndex/randid.py:1.3 Wed Nov 28 11:09:08 2001
+++ Zope/lib/python/SearchIndex/randid.py Wed Aug 14 17:46:24 2002
@@ -1,5 +1,5 @@
##############################################################################
-#
+#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
@@ -8,7 +8,7 @@
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
-#
+#
#############################################################################
import whrandom