[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndex - GlobbingLexicon.py:1.5 Lexicon.py:1.4 TextIndex.py:1.21
Andreas Jung
andreas@zope.com
Wed, 17 Oct 2001 12:44:52 -0400
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndex
In directory cvs.zope.org:/tmp/cvs-serv19747
Modified Files:
GlobbingLexicon.py Lexicon.py TextIndex.py
Log Message:
Objects to be indexed with a TextIndex can now define an additional
attribute column+'_encoding' to specify an encoding other than 'latin1'.
This encoding is needed when the UnicodeSplitter is used to convert
a Python string to unicode. Not setting the column+"_encoding attribute
retains the standard behaviour.
=== Zope/lib/python/Products/PluginIndexes/TextIndex/GlobbingLexicon.py 1.4 => 1.5 ===
return q
- def Splitter(self, astring, words=None):
+ def Splitter(self, astring, words=None, encoding="latin1"):
""" wrap the splitter """
## don't do anything, less efficient but there's not much
## sense in stemming a globbing lexicon.
- return self.SplitterFunc(astring)
+ return self.SplitterFunc(astring,encoding=encoding)
def createRegex(self, pat):
=== Zope/lib/python/Products/PluginIndexes/TextIndex/Lexicon.py 1.3 => 1.4 ===
- def Splitter(self, astring, words=None):
+ def Splitter(self, astring, words=None, encoding = "latin1"):
""" wrap the splitter """
if words is None:
words = self.stop_syn
- return self.SplitterFunc(astring, words)
+ return self.SplitterFunc(astring, words, encoding)
def query_hook(self, q):
=== Zope/lib/python/Products/PluginIndexes/TextIndex/TextIndex.py 1.20 => 1.21 ===
except (AttributeError, TypeError):
return 0
+
+ # sniff the object for 'id'+'_encoding'
+
+ try:
+ encoding = getattr(obj, self.id+'_encoding')
+ if callable(encoding ):
+ encoding = str(encoding())
+ else:
+ encoding = str(encoding)
+ except (AttributeError, TypeError):
+ encoding = 'latin1'
+
lexicon = self.getLexicon()
@@ -365,7 +377,7 @@
# Run through the words and score them
- for word in list(splitter(source)):
+ for word in list(splitter(source,encoding=encoding)):
if word[0] == '\"':
last = self._subindex(word[1:-1], wordScores, last, splitter)
else: