[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - TextIndexNG.py:1.2.2.16
Andreas Jung
andreas@digicool.com
Mon, 21 Jan 2002 15:12:18 -0500
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv28022
Modified Files:
Tag: ajung-textindexng-branch
TextIndexNG.py
Log Message:
- code cleanup
- incorporated all splitter extensions
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/TextIndexNG.py 1.2.2.15 => 1.2.2.16 ===
self.useSplitter = getattr(extra,'useSplitter', 'ZopeSplitter')
-
# max len of splitted words
self.splitterMaxLen= getattr(extra,'splitterMaxLen', 64)
@@ -109,6 +108,9 @@
# allow single characters
self.splitterSingleChars = getattr(extra,'splitterSingleChars',0)
+ # allow single characters
+ self.splitterCasefolding = getattr(extra,'splitterCasefolding',1)
+
# name of stemmer or None
self.useStemmer = getattr(extra,'useStemmer', None) or None
@@ -121,17 +123,14 @@
# lexicon to be used (name, reference or None(internal))
self.lexicon = getattr(extra,'lexicon', None) or None
- # support near search: 1/0 (requires more storage)
- self.useNearSearch = getattr(extra,'useNearSearch', 1)
-
# default maximum distance for words with near search
self.nearDistance = getattr(extra,'nearDistance', 5)
# use proximity algorithm
self.useProximity = getattr(extra,'useProximity', None) or None
- # storage of positions for near search ('internal','documentLookup')
- self.nearStorage = getattr(extra,'nearStorage', 'internal')
+ # Support for near search (None,'internal','documentLookup')
+ self.nearSearch = getattr(extra,'nearSearch', None)
# Stopwords: either filename or StopWord object
self.stopWords = getattr(extra,'stopWords', None) or None
@@ -139,11 +138,10 @@
# Thesaurus: either filename or StopWord object
self.thesaurus = getattr(extra,'thesaurus', None) or None
- if not self.nearStorage in ('internal','documentLookup'):
- raise ValueError,'nearStorage must be either "internal"'\
+ if not self.nearSearch in (None,'internal','documentLookup'):
+ raise ValueError,'nearSearch must be either None, "internal"'\
' or "documentLookup"'
-
self.clear()
@@ -188,7 +186,7 @@
# near Search
- if self.nearStorage == 'internal':
+ if self.nearSearch == 'internal':
self._v_positions = self.positionsFromInternalStorage
self._v_insertForwardEntry = self.insertForwardEntryInternal
else:
@@ -375,7 +373,13 @@
# Split the text into a list of words
# The splitterfunc just returns an iterator-like object.
- words = self._v_splitterfunc(source,encoding=encoding).split()
+ words = self._v_splitterfunc(source,
+ encoding = encoding,
+ casefolding = self.splitterCasefolding,
+ maxlen = self.splitterMaxLen,
+ indexnumbers = self.splitterIndexNumbers,
+ singlechar = self.splitterSingleChars
+ ).split()
T("Splitter")
# apply stopwords list
@@ -515,6 +519,11 @@
word = self._v_stemmerfunc(word)
debug("\tStemming: ", word)
+ # perform casefolding if necessary
+ if self.splitterCasefolding:
+ word = word.lower()
+ debug('\tCasefolding: ',word)
+
# Lookup list of wordIds (usually should contain only *one*)
wids = self._v_getIdByWord(word)
debug("\tWids: ", wids)
@@ -526,8 +535,9 @@
# (documentId, list of positions) for one word/wid
docIds = self._IDX.get(wids[0])
- debug('\tDocIds: ', list(docIds.keys()))
- debug('\tPositions: ', list(docIds.values()))
+
+# debug('\tDocIds: ', list(docIds.keys()))
+# debug('\tPositions: ', list(docIds.values()))
r = ResultSet( docIds, (word,))