[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - GlobbingLexiconNG.py:1.1.2.3

Andreas Jung andreas@digicool.com
Wed, 9 Jan 2002 19:25:51 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv22399

Modified Files:
      Tag: ajung-textindexng-branch
	GlobbingLexiconNG.py 
Log Message:
re-arranged code and code cleanup. should be somewhat faster now


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/GlobbingLexiconNG.py 1.1.2.2 => 1.1.2.3 ===
 from LexiconNG import LexiconNG
 from Products.PluginIndexes.TextIndex.randid import randid
+from types import ListType
 
 from BTrees.IIBTree import IISet, union, IITreeSet
 from BTrees.OIBTree import OIBTree
@@ -51,6 +52,9 @@
     single_wc = '?'
     eow = '$'
 
+    wc_set = [self.multi_wc, self.single_wc]
+    glob_reg = re.compile('[\?\*]')
+
 
     def __init__(self):
         self.clear()
@@ -128,22 +132,32 @@
         # single word pattern  produce a slicing problem below.
         # Because the splitter throws away single characters we can
         # return an empty tuple here.
-
+       
+        # This NEEDS to be fixed because the splitter can now produce
+        # single characters
         if len(pattern)==1: return ()
 
-        wc_set = [self.multi_wc, self.single_wc]
+        # no globbing
+        if not glob_reg.match(pattern):
+
+            result =  self._lexicon.get(pattern, None)
+
+            if result is None:  return ()
+            else:               return (result, )
+
+
+        # we are in globbing country
 
         digrams = []
-        globbing = 0
+
         for i in range(len(pattern)):
-            if pattern[i] in wc_set:
-                globbing = 1
-                continue
 
             if i == 0:
                 digrams.insert(i, (self.eow + pattern[i]) )
                 digrams.append((pattern[i] + pattern[i+1]))
+
             else:
+
                 try:
                     if pattern[i+1] not in wc_set:
                         digrams.append( pattern[i] + pattern[i+1] )
@@ -151,20 +165,19 @@
                 except IndexError:
                     digrams.append( (pattern[i] + self.eow) )
 
-        if not globbing:
-            result =  self._lexicon.get(pattern, None)
-            if result is None:
-                return ()
-            return (result, )
-        
+
         ## now get all of the intsets that contain the result digrams
+
         result = None
+
         for digram in digrams:
             result=union(result, self._digrams.get(digram, None))
 
         if not result:
             return ()
+
         else:
+
             ## now we have narrowed the list of possible candidates
             ## down to those words which contain digrams.  However,
             ## some words may have been returned that match digrams,
@@ -175,9 +188,11 @@
             expr = re.compile(self.createRegex(pattern))
             words = []
             hits = IISet()
+
             for x in result:
                 if expr.match(self._inverseLex[x]):
                     hits.insert(x)
+
             return hits
 
                 
@@ -188,8 +203,9 @@
 
     def query_hook(self, q):
         """expand wildcards"""
-        ListType = type([])
+
         i = len(q) - 1
+
         while i >= 0:
             e = q[i]
             if isinstance(e, ListType):