[Zope-Checkins] SVN: Zope/trunk/src/Products/ZCTextIndex/ Treat fullwidth space characters defined in Unicode as valid whitespace.
Yusei Tahara
yusei at domen.cx
Wed Feb 3 10:13:48 EST 2010
Log message for revision 108734:
Treat fullwidth space characters defined in Unicode as valid whitespace.
Patch by Manabu TERADA.
Changed:
U Zope/trunk/src/Products/ZCTextIndex/QueryParser.py
U Zope/trunk/src/Products/ZCTextIndex/tests/testQueryParser.py
-=-
Modified: Zope/trunk/src/Products/ZCTextIndex/QueryParser.py
===================================================================
--- Zope/trunk/src/Products/ZCTextIndex/QueryParser.py 2010-02-03 14:15:23 UTC (rev 108733)
+++ Zope/trunk/src/Products/ZCTextIndex/QueryParser.py 2010-02-03 15:13:48 UTC (rev 108734)
@@ -94,6 +94,11 @@
)
""", re.VERBOSE)
+# Use unicode regex to treat fullwidth space characters defined in Unicode
+# as valid whitespace.
+_tokenizer_unicode_regex = re.compile(
+ _tokenizer_regex.pattern, _tokenizer_regex.flags|re.UNICODE)
+
class QueryParser:
implements(IQueryParser)
@@ -109,7 +114,13 @@
def parseQuery(self, query):
# Lexical analysis.
- tokens = _tokenizer_regex.findall(query)
+ try:
+ # Try to use unicode and treat fullwidth whitespace as valid one.
+ if not isinstance(query, unicode):
+ query = query.decode('utf-8')
+ tokens = _tokenizer_unicode_regex.findall(query)
+ except UnicodeDecodeError:
+ tokens = _tokenizer_regex.findall(query)
self._tokens = tokens
# classify tokens
self._tokentypes = [_keywords.get(token.upper(), _ATOM)
Modified: Zope/trunk/src/Products/ZCTextIndex/tests/testQueryParser.py
===================================================================
--- Zope/trunk/src/Products/ZCTextIndex/tests/testQueryParser.py 2010-02-03 14:15:23 UTC (rev 108733)
+++ Zope/trunk/src/Products/ZCTextIndex/tests/testQueryParser.py 2010-02-03 15:13:48 UTC (rev 108734)
@@ -210,6 +210,18 @@
self.expect("foo* bar", AndNode([GlobNode("foo*"),
AtomNode("bar")]))
+ def test024(self):
+ # Split by UTF-8 fullwidth space
+ from Products.ZCTextIndex.ParseTree import AndNode
+ from Products.ZCTextIndex.ParseTree import AtomNode
+ self.expect("foo\xe3\x80\x80bar", AndNode([AtomNode("foo"), AtomNode("bar")]))
+
+ def test025(self):
+ # Split by Unicode fullwidth space
+ from Products.ZCTextIndex.ParseTree import AndNode
+ from Products.ZCTextIndex.ParseTree import AtomNode
+ self.expect(u"foo\u3000bar", AndNode([AtomNode(u"foo"), AtomNode(u"bar")]))
+
def test101(self):
self.failure("")
More information about the Zope-Checkins
mailing list