[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/src - UnicodeSplitter.c:1.13.4.4
Andreas Jung
andreas@digicool.com
Mon, 8 Apr 2002 14:00:25 -0400
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/src
In directory cvs.zope.org:/tmp/cvs-serv25599/UnicodeSplitter/src
Modified Files:
Tag: Zope-2_5-branch
UnicodeSplitter.c
Log Message:
Splitter were broken when the casefolding default parameter has
been overwritten.
=== Zope/lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/src/UnicodeSplitter.c 1.13.4.3 => 1.13.4.4 ===
int allow_single_chars;
int index_numbers;
+ int casefolding;
}
Splitter;
static
-PyUnicodeObject *prepareString(PyUnicodeObject *o);
+PyUnicodeObject *prepareString(Splitter *self, PyUnicodeObject *o);
static PyObject *checkSynword(Splitter *self, PyObject *word)
{
@@ -203,7 +204,7 @@
int i=0;
int start=0;
- doc1 = prepareString(doc);
+ doc1 = prepareString(self,doc);
if (doc1 == NULL)
return -1;
@@ -299,18 +300,20 @@
static
-PyUnicodeObject *prepareString(PyUnicodeObject *o)
+PyUnicodeObject *prepareString(Splitter *self,PyUnicodeObject *o)
{
PyUnicodeObject *u;
u = (PyUnicodeObject*) PyUnicode_FromUnicode(o->str, o->length);
- if (u != NULL)
- fixlower(u);
+ if (u != NULL){
+ if (self->casefolding)
+ fixlower(u);
+ }
return u;
}
-static char *splitter_args[]={"doc","synstop","encoding","indexnumbers","singlechar","maxlen",NULL};
+static char *splitter_args[]={"doc","synstop","encoding","indexnumbers","singlechar","maxlen","casefolding",NULL};
static PyObject *
@@ -322,8 +325,9 @@
int index_numbers = 0;
int max_len=64;
int single_char = 0;
+ int casefolding=1;
- if (! (PyArg_ParseTupleAndKeywords(args,keywds,"O|Osiii",splitter_args,&doc,&synstop,&encoding,&index_numbers,&single_char,&max_len))) return NULL;
+ if (! (PyArg_ParseTupleAndKeywords(args,keywds,"O|Osiiii",splitter_args,&doc,&synstop,&encoding,&index_numbers,&single_char,&max_len,&casefolding))) return NULL;
#ifdef DEBUG
puts("got text");
@@ -336,6 +340,11 @@
return NULL;
}
+ if (casefolding<0 || casefolding>1) {
+ PyErr_SetString(PyExc_ValueError,"casefolding must be 0 or 1");
+ return NULL;
+ }
+
if (single_char<0 || single_char>1) {
PyErr_SetString(PyExc_ValueError,"singlechar must be 0 or 1");
return NULL;
@@ -373,6 +382,7 @@
self->index_numbers = index_numbers;
self->max_len = max_len;
self->allow_single_chars = single_char;
+ self->casefolding = casefolding;
if ((splitUnicodeString(self,(PyUnicodeObject *)unicodedoc)) < 0)
goto err;
@@ -391,7 +401,7 @@
{
{ "UnicodeSplitter", (PyCFunction)newSplitter,
METH_VARARGS|METH_KEYWORDS,
- "UnicodeSplitter(doc[,synstop][,encoding='latin1']) "
+ "UnicodeSplitter(doc[,synstop][,encoding='latin1'][,indexnumbers][,maxlen][,singlechar][,casefolding]) "
"-- Return a word splitter"
},
{ NULL, NULL }