[Zope-CVS] CVS: Products/ZCTextIndex/tests - testQueryParser.py:1.5 testZCTextIndex.py:1.29
Guido van Rossum
guido@python.org
Mon, 20 May 2002 09:55:39 -0400
Update of /cvs-repository/Products/ZCTextIndex/tests
In directory cvs.zope.org:/tmp/cvs-serv7771/tests
Modified Files:
testQueryParser.py testZCTextIndex.py
Log Message:
Refactor the query parser to rely on the lexicon for parsing terms.
ILexicon.py:
- Added parseTerms() and isGlob().
- Added get_word(), get_wid() (get_word() is old; get_wid() for symmetry).
- Reflowed some text.
IQueryParser.py:
- Expanded docs for parseQuery().
- Added getIgnored() and parseQueryEx().
IPipelineElement.py:
- Added processGlob().
Lexicon.py:
- Added parseTerms() and isGlob().
- Added get_wid().
- Some pipeline elements now support processGlob().
ParseTree.py:
- Clarified the error message for calling executeQuery() on a
NotNode.
QueryParser.py (lots of changes):
- Change private names __tokens etc. into protected _tokens etc.
- Add getIgnored() and parseQueryEx() methods.
- The atom parser now uses the lexicon's parseTerms() and isGlob()
methods.
- Query parts that consist only of stopwords (as determined by the
lexicon), or of stopwords and negated terms, yield None instead of
a parse tree node; the ignored term is added to self._ignored.
None is ignored when combining terms for AND/OR/NOT operators, and
when an operator has no non-None operands, the operator itself
returns None. When this None percolates all the way to the top,
the parser raises a ParseError exception.
tests/testQueryParser.py:
- Changed test expressions of the form "a AND b AND c" to "aa AND bb
AND cc" so that the terms won't be considered stopwords.
- The test for "and/" can only work for the base class.
tests/testZCTextIndex.py:
- Added copyright notice.
- Refactor testStopWords() to have two helpers, one for success, one
for failures.
- Change testStopWords() to require parser failure for those queries
that have only stopwords or stopwords plus negated terms.
- Improve compareSet() to sort the sets of keys, and use a more
direct way of extracting the keys. This wasn't strictly needed
(nothing fails without this), but the old approach of copying the
keys into a dict in a loop depends on the dict hashing to always
return keys in the same order.
=== Products/ZCTextIndex/tests/testQueryParser.py 1.4 => 1.5 ===
self.expect("foo", AtomNode("foo"))
self.expect("note", AtomNode("note"))
- self.expect("a and b AND c",
- AndNode([AtomNode("a"), AtomNode("b"), AtomNode("c")]))
- self.expect("a OR b or c",
- OrNode([AtomNode("a"), AtomNode("b"), AtomNode("c")]))
- self.expect("a AND b OR c AnD d",
- OrNode([AndNode([AtomNode("a"), AtomNode("b")]),
- AndNode([AtomNode("c"), AtomNode("d")])]))
- self.expect("(a OR b) AND (c OR d)",
- AndNode([OrNode([AtomNode("a"), AtomNode("b")]),
- OrNode([AtomNode("c"), AtomNode("d")])]))
- self.expect("a AND not b",
- AndNode([AtomNode("a"), NotNode(AtomNode("b"))]))
+ self.expect("aa and bb AND cc",
+ AndNode([AtomNode("aa"), AtomNode("bb"), AtomNode("cc")]))
+ self.expect("aa OR bb or cc",
+ OrNode([AtomNode("aa"), AtomNode("bb"), AtomNode("cc")]))
+ self.expect("aa AND bb OR cc AnD dd",
+ OrNode([AndNode([AtomNode("aa"), AtomNode("bb")]),
+ AndNode([AtomNode("cc"), AtomNode("dd")])]))
+ self.expect("(aa OR bb) AND (cc OR dd)",
+ AndNode([OrNode([AtomNode("aa"), AtomNode("bb")]),
+ OrNode([AtomNode("cc"), AtomNode("dd")])]))
+ self.expect("aa AND not bb",
+ AndNode([AtomNode("aa"), NotNode(AtomNode("bb"))]))
self.expect('"foo bar"', PhraseNode("foo bar"))
self.expect("foo bar", AndNode([AtomNode("foo"), AtomNode("bar")]))
@@ -80,7 +80,10 @@
self.expect('(("foo bar"))"', PhraseNode("foo bar"))
self.expect("((foo bar))", AndNode([AtomNode("foo"), AtomNode("bar")]))
- self.expect('and/', AtomNode("and"))
+ if self.__class__ is TestQueryParser:
+ # This test fails when testZCTextIndex subclasses this class,
+ # because its lexicon's pipeline removes stopwords
+ self.expect('and/', AtomNode("and"))
self.expect("foo-bar", PhraseNode("foo bar"))
self.expect("foo -bar", AndNode([AtomNode("foo"),
=== Products/ZCTextIndex/tests/testZCTextIndex.py 1.28 => 1.29 ===
+#
+# Copyright (c) 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+
from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex
from Products.ZCTextIndex.tests \
import testIndex, testQueryEngine, testQueryParser
@@ -9,6 +23,7 @@
from Products.ZCTextIndex.Lexicon import CaseNormalizer, StopWordRemover
from Products.ZCTextIndex.QueryParser import QueryParser
from Products.ZCTextIndex.StopDict import get_stopdict
+from Products.ZCTextIndex.ParseTree import ParseError
import re
import unittest
@@ -84,6 +99,15 @@
self.index = self.zc_index.index
self.lexicon = self.zc_index.lexicon
+ def parserFailure(self, query):
+ self.assertRaises(ParseError, self.zc_index.query, query)
+
+ def parserSuccess(self, query, n):
+ r, num = self.zc_index.query(query)
+ self.assertEqual(num, n)
+ if n:
+ self.assertEqual(r[0][0], 1)
+
def testStopWords(self):
# the only non-stopword is question
text = ("to be or not to be "
@@ -96,61 +120,23 @@
self.assertEqual(wids, [])
self.assertEqual(len(self.index.get_words(1)), 1)
- r, num = self.zc_index.query('question')
- self.assertEqual(num, 1)
- self.assertEqual(r[0][0], 1)
-
- r, num = self.zc_index.query('question AND to AND be')
- self.assertEqual(num, 1)
- self.assertEqual(r[0][0], 1)
-
- r, num = self.zc_index.query('to AND question AND be')
- self.assertEqual(num, 1)
- self.assertEqual(r[0][0], 1)
-
- r, num = self.zc_index.query('to AND NOT question')
- self.assertEqual(num, 0)
-
- r, num = self.zc_index.query('to AND NOT gardenia')
- self.assertEqual(num, 0)
-
- r, num = self.zc_index.query('question AND NOT gardenia')
- self.assertEqual(num, 1)
- self.assertEqual(r[0][0], 1)
-
- r, num = self.zc_index.query('question AND gardenia')
- self.assertEqual(num, 0)
-
- r, num = self.zc_index.query('gardenia')
- self.assertEqual(num, 0)
-
- r, num = self.zc_index.query('question OR gardenia')
- self.assertEqual(num, 1)
- self.assertEqual(r[0][0], 1)
-
- r, num = self.zc_index.query('question AND NOT to AND NOT be')
- self.assertEqual(num, 1)
- self.assertEqual(r[0][0], 1)
-
- r, num = self.zc_index.query('question OR to OR be')
- self.assertEqual(num, 1)
- self.assertEqual(r[0][0], 1)
-
- r, num = self.zc_index.query('question to be')
- self.assertEqual(num, 1)
- self.assertEqual(r[0][0], 1)
-
- r, num = self.zc_index.query('to be')
- self.assertEqual(num, 0)
-
- r, num = self.zc_index.query('to AND be')
- self.assertEqual(num, 0)
-
- r, num = self.zc_index.query('to OR be')
- self.assertEqual(num, 0)
-
- r, num = self.zc_index.query('to AND NOT be')
- self.assertEqual(num, 0)
+ self.parserSuccess('question', 1)
+ self.parserSuccess('question AND to AND be', 1)
+ self.parserSuccess('to AND question AND be', 1)
+ self.parserSuccess('question AND NOT gardenia', 1)
+ self.parserSuccess('question AND gardenia', 0)
+ self.parserSuccess('gardenia', 0)
+ self.parserSuccess('question OR gardenia', 1)
+ self.parserSuccess('question AND NOT to AND NOT be', 1)
+ self.parserSuccess('question OR to OR be', 1)
+ self.parserSuccess('question to be', 1)
+
+ self.parserFailure('to be')
+ self.parserFailure('to AND be')
+ self.parserFailure('to OR be')
+ self.parserFailure('to AND NOT be')
+ self.parserFailure('to AND NOT question')
+ self.parserFailure('to AND NOT gardenia')
def testDocUpdate(self):
docid = 1 # doesn't change -- we index the same doc repeatedly
@@ -482,10 +468,11 @@
# XXX The FauxIndex and the real Index score documents very
# differently. The set comparison can't actually compare the
# items, but it can compare the keys. That will have to do for now.
- d = {}
- for k, v in set.items():
- d[k] = v
- self.assertEqual(d.keys(), dict.keys())
+ setkeys = list(set.keys())
+ dictkeys = dict.keys()
+ setkeys.sort()
+ dictkeys.sort()
+ self.assertEqual(setkeys, dictkeys)
class CosineQueryTests(QueryTestsBase):
IndexFactory = CosineIndex