[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/src - Stemmer.c:1.1.2.1
Andreas Jung
andreas@digicool.com
Wed, 13 Feb 2002 11:26:28 -0500
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/src
In directory cvs.zope.org:/tmp/cvs-serv30556/PyStemmer/src
Added Files:
Tag: ajung-textindexng-branch
Stemmer.c
Log Message:
added PyStemmer
=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/src/Stemmer.c ===
#include "Python.h"
#include "german/stem.h"
#include "french/stem.h"
#include "porter/stem.h"
#include "english/stem.h"
#include "dutch/stem.h"
#include "spanish/stem.h"
#include "portuguese/stem.h"
#include "swedish/stem.h"
#include "italian/stem.h"
#include "russian/stem.h"
#include "danish/stem.h"
#include "norwegian/stem.h"
#include "header.h"
typedef struct
{
PyObject_HEAD
PyObject *cache;
int cache_size;
struct SN_env *env;
char * language;
int (* stem_func)(struct SN_env *);
}
Stemmer;
static void
Stemmer_dealloc(Stemmer *self)
{
if (!strcmp(self->language,"porter")) {
porter_close_env(self->env);
} else if (!strcmp(self->language,"german")) {
german_close_env(self->env);
} else if (!strcmp(self->language,"french")) {
french_close_env(self->env);
} else if (!strcmp(self->language,"dutch")) {
dutch_close_env(self->env);
} else if (!strcmp(self->language,"spanish")) {
spanish_close_env(self->env);
} else if (!strcmp(self->language,"english")) {
english_close_env(self->env);
} else if (!strcmp(self->language,"swedish")) {
swedish_close_env(self->env);
} else if (!strcmp(self->language,"italian")) {
italian_close_env(self->env);
} else if (!strcmp(self->language,"portuguese")) {
portuguese_close_env(self->env);
} else if (!strcmp(self->language,"danish")) {
danish_close_env(self->env);
} else if (!strcmp(self->language,"russian")) {
russian_close_env(self->env);
} else if (!strcmp(self->language,"norwegian")) {
norwegian_close_env(self->env);
}
free(self->language);
Py_DECREF(self->cache);
PyMem_DEL(self);
}
static PyObject *Stemmer_availableStemmers(Stemmer *self,PyObject*args)
{
PyObject *list;
list = PyList_New(0);
PyList_Append(list,PyString_FromString("german"));
PyList_Append(list,PyString_FromString("french"));
PyList_Append(list,PyString_FromString("porter"));
PyList_Append(list,PyString_FromString("english"));
PyList_Append(list,PyString_FromString("dutch"));
PyList_Append(list,PyString_FromString("spanish"));
PyList_Append(list,PyString_FromString("portuguese"));
PyList_Append(list,PyString_FromString("swedish"));
PyList_Append(list,PyString_FromString("italian"));
PyList_Append(list,PyString_FromString("russian"));
PyList_Append(list,PyString_FromString("danish" ));
PyList_Append(list,PyString_FromString("norwegian"));
PyList_Sort(list);
return list;
}
static PyObject *Stemmer_language(Stemmer *self,PyObject*args)
{
PyObject *language;
language = PyString_FromString(self->language);
return language;
}
static PyObject *Stemmer_getCacheSize(Stemmer *self)
{
PyObject * size;
size = PyInt_FromLong( (long) self->cache_size);
return size;
}
static PyObject *Stemmer_setCacheSize(Stemmer *self,PyObject*args)
{
int size;
if (! (PyArg_ParseTuple(args,"i",&size)))
return NULL;
self->cache_size = size;
return Stemmer_getCacheSize(self);
}
static PyObject *stem_word(Stemmer *self,PyObject *pyword) {
char * word;
PyObject *stemmed;
stemmed = PyDict_GetItem(self->cache,pyword);
if (stemmed==NULL) {
word = PyString_AsString(pyword);
SN_set_current(self->env,strlen(word),word);
self->stem_func(self->env);
self->env->p[self->env->l] = '\0';
stemmed = PyString_FromString(self->env->p);
PyDict_SetItem(self->cache, pyword, stemmed);
Py_INCREF(stemmed);
} else {
Py_INCREF(stemmed);
}
return stemmed;
}
static PyObject *Stemmer_stem(Stemmer *self,PyObject *args)
{
PyObject *stemmed,*data;
if (self==NULL) {
PyErr_SetString(PyExc_TypeError, "can not call stem() on unbound method");
return NULL;
}
if (! (PyArg_ParseTuple(args,"O",&data)))
return NULL;
if (PyString_Check(data)) {
stemmed = stem_word(self,data);
return stemmed;
} else if (PyList_Check(data)) {
PyObject * item;
PyObject * res;
int i;
res = PyList_New(0);
for (i=0; i<PyList_Size(data);i++) {
item = PyList_GetItem(data,i);
if (!PyString_Check(item)) {
PyErr_SetString(PyExc_TypeError, "Unsupported datatype found in list (only strings allowed)");
return NULL;
}
stemmed = stem_word(self,item);
PyList_Append(res, stemmed);
Py_DECREF(stemmed);
}
return res;
} else {
PyErr_SetString(PyExc_TypeError, "Unsupported datatype (must be string or list)");
return NULL;
}
}
static struct PyMethodDef Stemmer_methods[] =
{
{ "language", (PyCFunction)Stemmer_language, METH_VARARGS,
"language() -- Returns the language of the stemmer object",
} ,
{ "getCacheSize", (PyCFunction)Stemmer_getCacheSize, METH_VARARGS,
"getCacheSize() -- Returns the size of the stemmer cache",
} ,
{ "setCacheSize", (PyCFunction)Stemmer_setCacheSize, METH_VARARGS,
"setCacheSize(n) -- Set the size of the internal stemmer cache to n",
} ,
{ "stem", (PyCFunction)Stemmer_stem, METH_VARARGS,
"stem(word) -- Return stemmed word",
},
{ NULL, NULL } /* sentinel */
};
static PyObject *
Stemmer_getattr(Stemmer *self, char *name)
{
return Py_FindMethod(Stemmer_methods, (PyObject *)self, name);
}
static char StemmerType__doc__[] = "Stemmer object";
static PyTypeObject StemmerType = {
PyObject_HEAD_INIT(NULL)
0, /*ob_size*/
"Stemmer", /*tp_name*/
sizeof(Stemmer), /*tp_basicsize*/
0, /*tp_itemsize*/
/* methods */
(destructor)Stemmer_dealloc, /*tp_dealloc*/
(printfunc)0, /*tp_print*/
(getattrfunc)Stemmer_getattr, /*tp_getattr*/
(setattrfunc)0, /*tp_setattr*/
(cmpfunc)0, /*tp_compare*/
(reprfunc)0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
(hashfunc)0, /*tp_hash*/
(ternaryfunc)0, /*tp_call*/
(reprfunc)0, /*tp_str*/
/* Space for future expansion */
0L,0L,0L,0L,
StemmerType__doc__ /* Documentation string */
};
static PyObject *
newStemmer(PyObject *modinfo, PyObject *args)
{
Stemmer *self=NULL;
char *language;
if (! (self = PyObject_NEW(Stemmer, &StemmerType)))
return NULL;
self->cache = PyDict_New();
self->cache_size = 5000;
if (! (PyArg_ParseTuple(args,"s",&language)))
return NULL;
strcpy(self->language=malloc(strlen(language)+1),language);
if (!strcmp(language,"porter")) {
self->env = porter_create_env();
self->stem_func = porter_stem;
} else if (!strcmp(language,"german")) {
self->env = german_create_env();
self->stem_func = german_stem;
} else if (!strcmp(language,"french")) {
self->env = french_create_env();
self->stem_func = french_stem;
} else if (!strcmp(language,"dutch")) {
self->env = dutch_create_env();
self->stem_func = dutch_stem;
} else if (!strcmp(language,"spanish")) {
self->env = spanish_create_env();
self->stem_func = spanish_stem;
} else if (!strcmp(language,"english")) {
self->env = english_create_env();
self->stem_func = english_stem;
} else if (!strcmp(language,"swedish")) {
self->env = swedish_create_env();
self->stem_func = swedish_stem;
} else if (!strcmp(language,"italian")) {
self->env = italian_create_env();
self->stem_func = italian_stem;
} else if (!strcmp(language,"portuguese")) {
self->env = portuguese_create_env();
self->stem_func = portuguese_stem;
} else if (!strcmp(language,"danish")) {
self->env = danish_create_env();
self->stem_func = danish_stem;
} else if (!strcmp(language,"russian")) {
self->env = russian_create_env();
self->stem_func = russian_stem;
} else if (!strcmp(language,"norwegian")) {
self->env = norwegian_create_env();
self->stem_func = norwegian_stem;
} else {
char err[255];
sprintf(err,"PyStemmer: Unsupported language '%s'",language);
PyErr_SetString(PyExc_TypeError, err);
goto err;
}
return (PyObject*)self;
err:
Py_DECREF(self);
return NULL;
}
static struct PyMethodDef Stemmer_module_methods[] =
{
{ "availableStemmers", (PyCFunction) Stemmer_availableStemmers, METH_VARARGS,
"availableStemmers() -- Return a list of all available stemmers"
},
{ "Stemmer", (PyCFunction)newStemmer,
METH_VARARGS,
"Stemmer(language) "
"-- Return a new language specific stemmer"
},
{ NULL, NULL }
};
static char Stemmer_module_documentation[] =
"Stemmer module for eleven different languages.\n"
"\n"
"$Id: Stemmer.c,v 1.1.2.1 2002/02/13 16:26:26 andreasjung Exp $\n"
;
void
initStemmer(void)
{
PyObject *m, *d;
char *rev="$Revision: 1.1.2.1 $";
/* Create the module and add the functions */
m = Py_InitModule4("Stemmer", Stemmer_module_methods,
Stemmer_module_documentation,
(PyObject*)NULL,PYTHON_API_VERSION);
/* Add some symbolic constants to the module */
d = PyModule_GetDict(m);
PyDict_SetItemString(d, "__version__",
PyString_FromStringAndSize(rev+11,strlen(rev+11)-2));
if (PyErr_Occurred())
Py_FatalError("can't initialize module Stemmer");
}