[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG - Thesaurus.py:1.1.2.1 BaseProximityLexicon.py:1.1.2.4 BaseStopWords.py:1.1.2.3 BaseThesaurus.py:1.1.2.2 StopWords.py:1.1.2.4 TextIndexNG.py:1.2.2.9 test.py:1.2.2.2
Andreas Jung
andreas@zope.com
Tue, 8 Jan 2002 16:00:05 -0500
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG
In directory cvs.zope.org:/tmp/cvs-serv22553
Modified Files:
Tag: ajung-textindexng-branch
BaseProximityLexicon.py BaseStopWords.py BaseThesaurus.py
StopWords.py TextIndexNG.py test.py
Added Files:
Tag: ajung-textindexng-branch
Thesaurus.py
Log Message:
update
=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/Thesaurus.py ===
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
__doc__=""" same as BaseProximityLexicon but usable for Zope """
from Persistence import Persistent
from Acquisition import Implicit
from BaseThesaurus import BaseThesaurus
class Thesaurus(Persistent, Implicit, BaseThesaurus):
pass
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/BaseProximityLexicon.py 1.1.2.3 => 1.1.2.4 ===
return len(self._lexicon)
-
def query_hook(self, q):
""" we don't want to modify the query cuz we're dumb """
return q
+ def __call__(self,word):
+ return self._v_proximity(word)
+
+
def test():
for a in ('metaphone','soundex'):
@@ -129,8 +132,10 @@
PL = BaseProximityLexicon(a)
- for w in 'the quick brown fox jumps over the '\
- 'lazy dog brown fox'.split():
+ words = 'the quick brown fox jumps over the '\
+ 'lazy dog brown fox'.split()
+
+ for w in words:
wid = PL.getWordId(w)
word = PL.getWord(wid)
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/BaseStopWords.py 1.1.2.2 => 1.1.2.3 ===
#############################################################################
+__version__ = "$Id$"
+
from types import DictType, StringType
from defaultStopWords import stop_word_dict
from BTrees.OOBTree import OOBTree
+import os
+
+_basedir = os.path.dirname(__file__)
class BaseStopWords:
+ """ base class for all StopWord objects """
- def __init__(self, words=stop_word_dict):
+ def __init__(self, words={}):
self._words = OOBTree()
@@ -37,15 +43,25 @@
self.get = self._words.get
- def __del__(self,k): del self._word[k]
+ def __del__(self,k): del self._word[k]
def __len__(self): return len(self._words)
def readStopWords(self, fname):
+ """ read a stopword file (line-by-line) from disk.
+ 'fname' is either relative to ./stopwords/
+ or has an absolute path.
+ """
d = {}
- lines = open(fname).readlines()
+ try:
+ f = os.path.join(_basedir,'stopwords',fname)
+ lines = open(f).readlines()
+ except:
+ try: lines = open(fname).readlines()
+ except: raise
+
for l in lines:
l = l.strip()
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/BaseThesaurus.py 1.1.2.1 => 1.1.2.2 ===
#############################################################################
+__version__ = "$Id$"
+
from types import DictType, StringType
from BTrees.OOBTree import OOBTree
+import os
+
+_basedir = os.path.dirname(__file__)
class BaseThesaurus:
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/StopWords.py 1.1.2.3 => 1.1.2.4 ===
##############################################################################
-__doc__=""" same as BaseProximityLexicon but usable for Zope """
+__doc__=""" same as BaseStopWords but to be used in Zope """
from Persistence import Persistent
from Acquisition import Implicit
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/TextIndexNG.py 1.2.2.8 => 1.2.2.9 ===
self.useSplitter = getattr(extra,'useSplitter', 'ZopeSplitter')
+
+ # max len of splitted words
+ self.splitterMaxLen= getattr(extra,'splitterMaxLen', 64)
+
+ # index numbers
+ self.splitterIndexNumbers = getattr(extra,'splitterIndexNumbers')
+
+ # allow single characters
+ self.splitterSingleChars = getattr(extra,'splitterSingleChars')
+
# name of stemmer or None
self.useStemmer = getattr(extra,'useStemmer', None) or None
@@ -155,10 +165,7 @@
# in the future. This requires some more work on the Python
# Proximity extension
- if self.useProximity=='soundex':
- self._v_proximityfunc = Proximity.soundex
- else:
- raise RuntimeError,'unsupported soundex'
+ self._v_proximityfunc = getattr(Proximity,self.useProximity)
# near Search
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/test.py 1.2.2.1 => 1.2.2.2 ===
from Products.PluginIndexes.TextIndexNG import TextIndexNG
+import ExtensionClass
import os, sys, re,traceback, atexit
import readline
@@ -16,7 +17,7 @@
class extra: pass
-class TO:
+class TO(ExtensionClass.Base):
def __init__(self,txt):
self.text = txt