[Zope-CVS] CVS: Products/ZCTextIndex/tests - testQueryParser.py:1.5 testZCTextIndex.py:1.29

Guido van Rossum guido@python.org
Mon, 20 May 2002 09:55:39 -0400


Update of /cvs-repository/Products/ZCTextIndex/tests
In directory cvs.zope.org:/tmp/cvs-serv7771/tests

Modified Files:
	testQueryParser.py testZCTextIndex.py 
Log Message:
Refactor the query parser to rely on the lexicon for parsing terms.

ILexicon.py:

  - Added parseTerms() and isGlob().

  - Added get_word(), get_wid() (get_word() is old; get_wid() for symmetry).

  - Reflowed some text.

IQueryParser.py:

  - Expanded docs for parseQuery().

  - Added getIgnored() and parseQueryEx().

IPipelineElement.py:

  - Added processGlob().

Lexicon.py:

  - Added parseTerms() and isGlob().

  - Added get_wid().

  - Some pipeline elements now support processGlob().

ParseTree.py:

  - Clarified the error message for calling executeQuery() on a
    NotNode.

QueryParser.py (lots of changes):

  - Change private names __tokens etc. into protected _tokens etc.

  - Add getIgnored() and parseQueryEx() methods.

  - The atom parser now uses the lexicon's parseTerms() and isGlob()
    methods.

  - Query parts that consist only of stopwords (as determined by the
    lexicon), or of stopwords and negated terms, yield None instead of
    a parse tree node; the ignored term is added to self._ignored.
    None is ignored when combining terms for AND/OR/NOT operators, and
    when an operator has no non-None operands, the operator itself
    returns None.  When this None percolates all the way to the top,
    the parser raises a ParseError exception.

tests/testQueryParser.py:

  - Changed test expressions of the form "a AND b AND c" to "aa AND bb
    AND cc" so that the terms won't be considered stopwords.

  - The test for "and/" can only work for the base class.

tests/testZCTextIndex.py:

  - Added copyright notice.

  - Refactor testStopWords() to have two helpers, one for success, one
    for failures.

  - Change testStopWords() to require parser failure for those queries
    that have only stopwords or stopwords plus negated terms.

  - Improve compareSet() to sort the sets of keys, and use a more
    direct way of extracting the keys.  This wasn't strictly needed
    (nothing fails without this), but the old approach of copying the
    keys into a dict in a loop depends on the dict hashing to always
    return keys in the same order.



=== Products/ZCTextIndex/tests/testQueryParser.py 1.4 => 1.5 ===
         self.expect("foo", AtomNode("foo"))
         self.expect("note", AtomNode("note"))
-        self.expect("a and b AND c",
-                    AndNode([AtomNode("a"), AtomNode("b"), AtomNode("c")]))
-        self.expect("a OR b or c",
-                    OrNode([AtomNode("a"), AtomNode("b"), AtomNode("c")]))
-        self.expect("a AND b OR c AnD d",
-                    OrNode([AndNode([AtomNode("a"), AtomNode("b")]),
-                            AndNode([AtomNode("c"), AtomNode("d")])]))
-        self.expect("(a OR b) AND (c OR d)",
-                    AndNode([OrNode([AtomNode("a"), AtomNode("b")]),
-                             OrNode([AtomNode("c"), AtomNode("d")])]))
-        self.expect("a AND not b",
-                    AndNode([AtomNode("a"), NotNode(AtomNode("b"))]))
+        self.expect("aa and bb AND cc",
+                    AndNode([AtomNode("aa"), AtomNode("bb"), AtomNode("cc")]))
+        self.expect("aa OR bb or cc",
+                    OrNode([AtomNode("aa"), AtomNode("bb"), AtomNode("cc")]))
+        self.expect("aa AND bb OR cc AnD dd",
+                    OrNode([AndNode([AtomNode("aa"), AtomNode("bb")]),
+                            AndNode([AtomNode("cc"), AtomNode("dd")])]))
+        self.expect("(aa OR bb) AND (cc OR dd)",
+                    AndNode([OrNode([AtomNode("aa"), AtomNode("bb")]),
+                             OrNode([AtomNode("cc"), AtomNode("dd")])]))
+        self.expect("aa AND not bb",
+                    AndNode([AtomNode("aa"), NotNode(AtomNode("bb"))]))
 
         self.expect('"foo bar"', PhraseNode("foo bar"))
         self.expect("foo bar", AndNode([AtomNode("foo"), AtomNode("bar")]))
@@ -80,7 +80,10 @@
         self.expect('(("foo bar"))"', PhraseNode("foo bar"))
         self.expect("((foo bar))", AndNode([AtomNode("foo"), AtomNode("bar")]))
 
-        self.expect('and/', AtomNode("and"))
+        if self.__class__ is TestQueryParser:
+            # This test fails when testZCTextIndex subclasses this class,
+            # because its lexicon's pipeline removes stopwords
+            self.expect('and/', AtomNode("and"))
 
         self.expect("foo-bar", PhraseNode("foo bar"))
         self.expect("foo -bar", AndNode([AtomNode("foo"),


=== Products/ZCTextIndex/tests/testZCTextIndex.py 1.28 => 1.29 ===
+#
+# Copyright (c) 2002 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+
 from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex
 from Products.ZCTextIndex.tests \
      import testIndex, testQueryEngine, testQueryParser
@@ -9,6 +23,7 @@
 from Products.ZCTextIndex.Lexicon import CaseNormalizer, StopWordRemover
 from Products.ZCTextIndex.QueryParser import QueryParser
 from Products.ZCTextIndex.StopDict import get_stopdict
+from Products.ZCTextIndex.ParseTree import ParseError
 
 import re
 import unittest
@@ -84,6 +99,15 @@
         self.index = self.zc_index.index
         self.lexicon = self.zc_index.lexicon
 
+    def parserFailure(self, query):
+        self.assertRaises(ParseError, self.zc_index.query, query)
+
+    def parserSuccess(self, query, n):
+        r, num = self.zc_index.query(query)
+        self.assertEqual(num, n)
+        if n:
+            self.assertEqual(r[0][0], 1)
+
     def testStopWords(self):
         # the only non-stopword is question
         text = ("to be or not to be "
@@ -96,61 +120,23 @@
                 self.assertEqual(wids, [])
         self.assertEqual(len(self.index.get_words(1)), 1)
 
-        r, num = self.zc_index.query('question')
-        self.assertEqual(num, 1)
-        self.assertEqual(r[0][0], 1)
-
-        r, num = self.zc_index.query('question AND to AND be')
-        self.assertEqual(num, 1)
-        self.assertEqual(r[0][0], 1)
-
-        r, num = self.zc_index.query('to AND question AND be')
-        self.assertEqual(num, 1)
-        self.assertEqual(r[0][0], 1)
-
-        r, num = self.zc_index.query('to AND NOT question')
-        self.assertEqual(num, 0)
-
-        r, num = self.zc_index.query('to AND NOT gardenia')
-        self.assertEqual(num, 0)
-
-        r, num = self.zc_index.query('question AND NOT gardenia')
-        self.assertEqual(num, 1)
-        self.assertEqual(r[0][0], 1)
-
-        r, num = self.zc_index.query('question AND gardenia')
-        self.assertEqual(num, 0)
-
-        r, num = self.zc_index.query('gardenia')
-        self.assertEqual(num, 0)
-
-        r, num = self.zc_index.query('question OR gardenia')
-        self.assertEqual(num, 1)
-        self.assertEqual(r[0][0], 1)
-
-        r, num = self.zc_index.query('question AND NOT to AND NOT be')
-        self.assertEqual(num, 1)
-        self.assertEqual(r[0][0], 1)
-
-        r, num = self.zc_index.query('question OR to OR be')
-        self.assertEqual(num, 1)
-        self.assertEqual(r[0][0], 1)
-
-        r, num = self.zc_index.query('question to be')
-        self.assertEqual(num, 1)
-        self.assertEqual(r[0][0], 1)
-
-        r, num = self.zc_index.query('to be')
-        self.assertEqual(num, 0)
-
-        r, num = self.zc_index.query('to AND be')
-        self.assertEqual(num, 0)
-
-        r, num = self.zc_index.query('to OR be')
-        self.assertEqual(num, 0)
-
-        r, num = self.zc_index.query('to AND NOT be')
-        self.assertEqual(num, 0)
+        self.parserSuccess('question', 1)
+        self.parserSuccess('question AND to AND be', 1)
+        self.parserSuccess('to AND question AND be', 1)
+        self.parserSuccess('question AND NOT gardenia', 1)
+        self.parserSuccess('question AND gardenia', 0)
+        self.parserSuccess('gardenia', 0)
+        self.parserSuccess('question OR gardenia', 1)
+        self.parserSuccess('question AND NOT to AND NOT be', 1)
+        self.parserSuccess('question OR to OR be', 1)
+        self.parserSuccess('question to be', 1)
+
+        self.parserFailure('to be')
+        self.parserFailure('to AND be')
+        self.parserFailure('to OR be')
+        self.parserFailure('to AND NOT be')
+        self.parserFailure('to AND NOT question')
+        self.parserFailure('to AND NOT gardenia')
 
     def testDocUpdate(self):
         docid = 1   # doesn't change -- we index the same doc repeatedly
@@ -482,10 +468,11 @@
         # XXX The FauxIndex and the real Index score documents very
         # differently.  The set comparison can't actually compare the
         # items, but it can compare the keys.  That will have to do for now.
-        d = {}
-        for k, v in set.items():
-            d[k] = v
-        self.assertEqual(d.keys(), dict.keys())
+        setkeys = list(set.keys())
+        dictkeys = dict.keys()
+        setkeys.sort()
+        dictkeys.sort()
+        self.assertEqual(setkeys, dictkeys)
 
 class CosineQueryTests(QueryTestsBase):
     IndexFactory = CosineIndex