[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG/src - normalizer.c:1.1.2.3
Andreas Jung
andreas@digicool.com
Sat, 9 Feb 2002 10:28:36 -0500
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG/src
In directory cvs.zope.org:/tmp/cvs-serv1404/src
Modified Files:
Tag: ajung-textindexng-branch
normalizer.c
Log Message:
YET another rewrite:
- added optional 'encoding' parameter to override the default encoding
- all strings are now converted to unicode according to the encoding parameter
(default=latin1).
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/normalizer.c 1.1.2.2 => 1.1.2.3 ===
-
typedef struct
{
PyObject_HEAD
PyObject * table;
+ char *encoding;
}
normalizer;
@@ -16,36 +16,53 @@
PyMem_DEL(self);
}
-static PyUnicodeObject *NormalizeWord(normalizer *self,PyUnicodeObject *word) {
+static PyObject *NormalizeWord(normalizer *self,PyObject *word)
+{
- PyUnicodeObject *temp;
int i;
+ PyObject *temp;
- temp = (PyUnicodeObject *) PyUnicode_FromUnicode(word->str,word->length);
+ if (PyString_Check(word)) {
+ if (! (temp = PyUnicode_FromEncodedObject(word,self->encoding,"strict"))) {
+ PyErr_SetString(PyExc_UnicodeError,"unicode conversion failed");
+ return NULL;
+ }
+ } else {
+ temp = PyUnicode_FromObject(word);
+ }
for (i=0; i<PyList_GET_SIZE(self->table); i++) {
- PyUnicodeObject *s;
- PyObject *item, *key, *value;
+ PyObject *s, *item, *key, *value;
item = PyList_GetItem(self->table, i);
key = PyTuple_GetItem(item,0);
value = PyTuple_GetItem(item,1);
- s = (PyUnicodeObject *) PyUnicode_Replace((PyObject *) temp, key, value, -1);
+ if (PyString_Check(key))
+ key = PyUnicode_FromEncodedObject(key, self->encoding,"strict");
+
+ if (PyString_Check(value))
+ value = PyUnicode_FromEncodedObject(value, self->encoding,"strict");
+
+ if (! (s = PyUnicode_Replace( temp, key, value, -1)))
+ return NULL;
+
Py_DECREF(temp);
+
temp = s;
}
return temp;
}
-static PyObject *normalize(normalizer *self, PyObject*args)
+static PyObject *normalize(normalizer *self, PyObject *args)
{
int j;
PyObject * data=NULL ;
- if (! (PyArg_ParseTuple(args,"O", &data))) return NULL;
+ if (! (PyArg_ParseTuple(args,"O", &data)))
+ return NULL;
if (PyList_Check(data)) {
@@ -54,47 +71,25 @@
list = PyList_New(0);
for (j=0; j<PyList_Size(data); j++) {
- PyUnicodeObject *word=NULL,*item=NULL;
-
- item = (PyUnicodeObject *) PyList_GetItem(data,j);
-
- if (PyUnicode_Check(item)) {
- word = NormalizeWord(self,item);
- } else if (PyString_Check(item)) {
- PyUnicodeObject *unicodeword;
-
- unicodeword = (PyUnicodeObject *) PyUnicode_FromObject((PyObject *) item);
+ PyObject *word=NULL,*item=NULL;
- word = NormalizeWord(self,unicodeword);
+ item = PyList_GetItem(data,j);
- Py_DECREF(unicodeword);
- }
+ word = NormalizeWord(self, item);
PyList_Append(list, (PyObject *) word);
- Py_DECREF(word);
}
Py_DECREF(data);
return list;
- } else if (PyUnicode_Check(data)) {
+ } else if (PyUnicode_Check(data) || PyString_Check(data) ) {
- PyUnicodeObject *word;
+ PyObject *word;
- word = NormalizeWord(self, (PyUnicodeObject *) data);
-
- return (PyObject *) word;
-
- } else if (PyString_Check(data)) {
-
- PyUnicodeObject *word,*unicodeword;
-
- unicodeword = (PyUnicodeObject *) PyUnicode_FromObject(data);
-
- word = NormalizeWord(self,unicodeword);
-
- Py_DECREF(unicodeword);
+ if (! (word = NormalizeWord(self,data)))
+ return NULL;
return (PyObject *) word;
@@ -125,7 +120,6 @@
}
if (PyTuple_Size(item) != 2) {
-
PyErr_SetString(PyExc_TypeError,"nested arguments must be 2-tuples of strings/unicode strings");
goto err;
}
@@ -171,40 +165,43 @@
static char normalizerType__doc__[] = "normalizer object";
static PyTypeObject normalizerType = {
- PyObject_HEAD_INIT(NULL)
- 0, /*ob_size*/
- "normalizer", /*tp_name*/
- sizeof(normalizer), /*tp_basicsize*/
- 0, /*tp_itemsize*/
- /* methods */
- (destructor)normalizer_dealloc, /*tp_dealloc*/
- (printfunc)0, /*tp_print*/
- (getattrfunc)normalizer_getattr, /*tp_getattr*/
- (setattrfunc)0, /*tp_setattr*/
- (cmpfunc)0, /*tp_compare*/
- (reprfunc)0, /*tp_repr*/
- 0, /*tp_as_number*/
- 0, /*tp_as_sequence*/
- 0, /*tp_as_mapping*/
- (hashfunc)0, /*tp_hash*/
- (ternaryfunc)0, /*tp_call*/
- (reprfunc)0, /*tp_str*/
-
- /* Space for future expansion */
- 0L,0L,0L,0L,
- normalizerType__doc__ /* Documentation string */
-};
+ PyObject_HEAD_INIT(NULL)
+ 0, /*ob_size*/
+ "normalizer", /*tp_name*/
+ sizeof(normalizer), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ /* methods */
+ (destructor)normalizer_dealloc, /*tp_dealloc*/
+ (printfunc)0, /*tp_print*/
+ (getattrfunc)normalizer_getattr, /*tp_getattr*/
+ (setattrfunc)0, /*tp_setattr*/
+ (cmpfunc)0, /*tp_compare*/
+ (reprfunc)0, /*tp_repr*/
+ 0, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ (hashfunc)0, /*tp_hash*/
+ (ternaryfunc)0, /*tp_call*/
+ (reprfunc)0, /*tp_str*/
+
+ /* Space for future expansion */
+ 0L,0L,0L,0L,
+ normalizerType__doc__ /* Documentation string */
+ };
+
+static char *normalizer_args[]={"translation","encoding",NULL};
static PyObject *
newnormalizer(PyObject *modinfo, PyObject *args, PyObject *keywds)
{
normalizer *self=NULL;
PyObject *table;
+ static char * encoding = "latin1";
-
- if (! (PyArg_ParseTuple(args,"O", &table)))
+ if (! (PyArg_ParseTupleAndKeywords(args,keywds,"O|s",normalizer_args,&table,&encoding)))
return NULL;
+
if (! checkList(table))
return NULL;
@@ -212,6 +209,8 @@
return NULL;
self->table = table;
+ self->encoding = encoding;
+
Py_INCREF(self->table);
return (PyObject*)self;
@@ -219,7 +218,7 @@
static struct PyMethodDef normalizer_module_methods[] =
{
- { "Normalizer", (PyCFunction)newnormalizer, METH_VARARGS,
+ { "Normalizer", (PyCFunction)newnormalizer, METH_VARARGS|METH_KEYWORDS,
"Normalizer(list) " "-- Normalizer module - takes a list of 2-tuples of strings/unicode strings"
},
{ NULL, NULL }