[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PySimilarity/src - Similarity.c:1.1.2.1 Proximity.c:NONE

Andreas Jung andreas@digicool.com
Sat, 16 Feb 2002 19:31:28 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PySimilarity/src
In directory cvs.zope.org:/tmp/cvs-serv3647/src

Added Files:
      Tag: ajung-textindexng-branch
	Similarity.c 
Removed Files:
      Tag: ajung-textindexng-branch
	Proximity.c 
Log Message:
renamed "Proximity" to "Similarity"


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PySimilarity/src/Similarity.c ===
#include "Python.h"

extern int metaphone(char *word, int max_phonemes, char **phoned_word, int traditional);
extern char * soundex(char *word);
extern int levenshtein(char *word1, char *word2);

static PyObject *
PyAvailableAlgorithms(PyObject *modinfo, PyObject *args)
{
    PyObject *list;

    list = PyList_New(0);

    PyList_Append(list,PyString_FromString("metaphone"));
    PyList_Append(list,PyString_FromString("soundex"));
    PyList_Append(list,PyString_FromString("levenshtein"));

    return list;
}


static PyObject *
PyMetaphone(PyObject *modinfo, PyObject *args)
{

    PyObject *data;
    char * meta;
    int    meta_len = 6;

    if (! (PyArg_ParseTuple(args,"O",&data)))
        return NULL;


    if (PyString_Check(data)) {
        PyObject *encoded;
        char *word;

        word = PyString_AsString(data);

        metaphone(word,meta_len,&meta,0);

        encoded= PyString_FromString(meta);
        free(meta);

        return encoded;

    } else if (PySequence_Check(data)) {

        PyObject * item=NULL,*list=NULL,*encoded=NULL;
        char *word = NULL;
        int i;

        list = PyList_New(0);

        for (i=0; i<PySequence_Size(data);i++) {

            item = PySequence_GetItem(data,i);
            if (!PyString_Check(item)) {

                PyErr_SetString(PyExc_TypeError, "Unsupported datatype found in list (only strings allowed)");
                return NULL;
            }

            word = PyString_AsString(item);

            metaphone(word,meta_len,&meta,0);

            encoded= PyString_FromString(meta);
            free(meta);

            PyList_Append(list, encoded);
            Py_DECREF(encoded);
        }

        return list;

    } else {

        PyErr_SetString(PyExc_TypeError, "Unsupported datatype (must be string or sequence of strings)");

        return NULL;
    }
}



static PyObject *
PySoundex(PyObject *modinfo, PyObject *args)
{
    PyObject *data;

    if (! (PyArg_ParseTuple(args,"O",&data)))
        return NULL;


    if (PyString_Check(data)) {
        PyObject *encoded;
        char * res,*word;

        word = PyString_AsString(data);
        res = soundex(word);

        encoded = PyString_FromString(res);
        free(res);

        return encoded;

    } else if (PySequence_Check(data)) {

        PyObject * item=NULL,*list=NULL,*encoded=NULL;
        char *word = NULL,*res = NULL;
        int i;

        list = PyList_New(0);

        for (i=0; i<PySequence_Size(data);i++) {

            item = PySequence_GetItem(data,i);
            if (!PyString_Check(item)) {

                PyErr_SetString(PyExc_TypeError, "Unsupported datatype found in list (only strings allowed)");
                return NULL;
            }

            word = PyString_AsString(item);

            res = soundex(word);
            encoded = PyString_FromString(res);
            free(res);

            PyList_Append(list, encoded);
            Py_DECREF(encoded);
        }

        return list;

    } else {

        PyErr_SetString(PyExc_TypeError, "Unsupported datatype (must be string or sequence of strings)");

        return NULL;
    }
}


static  PyObject *
PyLevenshtein(PyObject *modinfo, PyObject *args)
{
    PyObject * res=NULL;
    char *word1, *word2;
    int distance;

    if (! (PyArg_ParseTuple(args,"ss",&word1,&word2)))
        return NULL;

    distance = levenshtein(word1,word2);

    res = PyInt_FromLong( (long) distance);
    printf("%d\n",distance); fflush(stdout);

    return res;
}

static struct PyMethodDef Similarity_module_methods[] =
    {
        { "availableAlgorithms", (PyCFunction)PyAvailableAlgorithms,
            METH_VARARGS,
            "availableAlgorithms() "
            "-- return list of available string Similarity algorithms"
        },
        { "metaphone", (PyCFunction)PyMetaphone,
          METH_VARARGS,
          "metaphone(word,[encoding_len=6]) "
          "-- return metaphone encoding for word"
        },
        { "soundex", (PyCFunction)PySoundex,
          METH_VARARGS,
          "soundex(word) "
          "-- return soundex encoding for word"
        },
        { "levenshtein", (PyCFunction)PyLevenshtein,
          METH_VARARGS,
          "levenshtein(word1,word2)"
          "-- return computed distances between word1 and word2"
        },
        { NULL, NULL }
    };

static char Similarity_module_documentation[] =
    "Module for string Similarity algorithms\n"
    "\n"
    "$Id: Similarity.c,v 1.1.2.1 2002/02/17 00:31:26 andreasjung Exp $\n"
    ;


void
initSimilarity(void)
{
    PyObject *m, *d;
    char *rev="$Revision: 1.1.2.1 $";

    /* Create the module and add the functions */
    m = Py_InitModule4("Similarity", Similarity_module_methods,
                       Similarity_module_documentation,
                       (PyObject*)NULL,PYTHON_API_VERSION);

    /* Add some symbolic constants to the module */
    d = PyModule_GetDict(m);
    PyDict_SetItemString(d, "__version__",
                         PyString_FromStringAndSize(rev+11,strlen(rev+11)-2));

    if (PyErr_Occurred())
        Py_FatalError("can't initialize module Similarity");
}

=== Removed File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PySimilarity/src/Proximity.c ===