[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - GlobbingLexiconNG.py:1.1.2.3
Andreas Jung
andreas@digicool.com
Wed, 9 Jan 2002 19:25:51 -0500
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv22399
Modified Files:
Tag: ajung-textindexng-branch
GlobbingLexiconNG.py
Log Message:
re-arranged code and code cleanup. should be somewhat faster now
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/GlobbingLexiconNG.py 1.1.2.2 => 1.1.2.3 ===
from LexiconNG import LexiconNG
from Products.PluginIndexes.TextIndex.randid import randid
+from types import ListType
from BTrees.IIBTree import IISet, union, IITreeSet
from BTrees.OIBTree import OIBTree
@@ -51,6 +52,9 @@
single_wc = '?'
eow = '$'
+ wc_set = [self.multi_wc, self.single_wc]
+ glob_reg = re.compile('[\?\*]')
+
def __init__(self):
self.clear()
@@ -128,22 +132,32 @@
# single word pattern produce a slicing problem below.
# Because the splitter throws away single characters we can
# return an empty tuple here.
-
+
+ # This NEEDS to be fixed because the splitter can now produce
+ # single characters
if len(pattern)==1: return ()
- wc_set = [self.multi_wc, self.single_wc]
+ # no globbing
+ if not glob_reg.match(pattern):
+
+ result = self._lexicon.get(pattern, None)
+
+ if result is None: return ()
+ else: return (result, )
+
+
+ # we are in globbing country
digrams = []
- globbing = 0
+
for i in range(len(pattern)):
- if pattern[i] in wc_set:
- globbing = 1
- continue
if i == 0:
digrams.insert(i, (self.eow + pattern[i]) )
digrams.append((pattern[i] + pattern[i+1]))
+
else:
+
try:
if pattern[i+1] not in wc_set:
digrams.append( pattern[i] + pattern[i+1] )
@@ -151,20 +165,19 @@
except IndexError:
digrams.append( (pattern[i] + self.eow) )
- if not globbing:
- result = self._lexicon.get(pattern, None)
- if result is None:
- return ()
- return (result, )
-
+
## now get all of the intsets that contain the result digrams
+
result = None
+
for digram in digrams:
result=union(result, self._digrams.get(digram, None))
if not result:
return ()
+
else:
+
## now we have narrowed the list of possible candidates
## down to those words which contain digrams. However,
## some words may have been returned that match digrams,
@@ -175,9 +188,11 @@
expr = re.compile(self.createRegex(pattern))
words = []
hits = IISet()
+
for x in result:
if expr.match(self._inverseLex[x]):
hits.insert(x)
+
return hits
@@ -188,8 +203,9 @@
def query_hook(self, q):
"""expand wildcards"""
- ListType = type([])
+
i = len(q) - 1
+
while i >= 0:
e = q[i]
if isinstance(e, ListType):