[Zope] Zope 2.3.2 version
JL BERLIET
jl.berliet@wanadoo.fr
Fri, 18 May 2001 18:54:26 +0200
C'est un message de format MIME en plusieurs parties.
------=_NextPart_000_0007_01C0DFCB.F5F36CA0
Content-Type: text/plain;
charset="iso-8859-1"
Content-Transfer-Encoding: 7bit
> The search has to bring related
> results to the keyword, not only results that has it. A
> basic feature to do that is to ignore accentuation
A patch for "Splitter.c" i'm using with french words to ignore accentuation
! (see the function "sans_accent" and add yours portuguese caracters)
Jean-Louis BERLIET
------=_NextPart_000_0007_01C0DFCB.F5F36CA0
Content-Type: application/octet-stream;
name="Splitter.c"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
filename="Splitter.c"
/************************************************************************=
*****=0A=
=0A=
Zope Public License (ZPL) Version 1.0=0A=
-------------------------------------=0A=
=0A=
Copyright (c) Digital Creations. All rights reserved.=0A=
=0A=
This license has been certified as Open Source(tm).=0A=
=0A=
Redistribution and use in source and binary forms, with or without=0A=
modification, are permitted provided that the following conditions are=0A=
met:=0A=
=0A=
1. Redistributions in source code must retain the above copyright=0A=
notice, this list of conditions, and the following disclaimer.=0A=
=0A=
2. Redistributions in binary form must reproduce the above copyright=0A=
notice, this list of conditions, and the following disclaimer in=0A=
the documentation and/or other materials provided with the=0A=
distribution.=0A=
=0A=
3. Digital Creations requests that attribution be given to Zope=0A=
in any manner possible. Zope includes a "Powered by Zope"=0A=
button that is installed by default. While it is not a license=0A=
violation to remove this button, it is requested that the=0A=
attribution remain. A significant investment has been put=0A=
into Zope, and this effort will continue if the Zope community=0A=
continues to grow. This is one way to assure that growth.=0A=
=0A=
4. All advertising materials and documentation mentioning=0A=
features derived from or use of this software must display=0A=
the following acknowledgement:=0A=
=0A=
"This product includes software developed by Digital Creations=0A=
for use in the Z Object Publishing Environment=0A=
(http://www.zope.org/)."=0A=
=0A=
In the event that the product being advertised includes an=0A=
intact Zope distribution (with copyright and license included)=0A=
then this clause is waived.=0A=
=0A=
5. Names associated with Zope or Digital Creations must not be used to=0A=
endorse or promote products derived from this software without=0A=
prior written permission from Digital Creations.=0A=
=0A=
6. Modified redistributions of any form whatsoever must retain=0A=
the following acknowledgment:=0A=
=0A=
"This product includes software developed by Digital Creations=0A=
for use in the Z Object Publishing Environment=0A=
(http://www.zope.org/)."=0A=
=0A=
Intact (re-)distributions of any official Zope release do not=0A=
require an external acknowledgement.=0A=
=0A=
7. Modifications are encouraged but must be packaged separately as=0A=
patches to official Zope releases. Distributions that do not=0A=
clearly separate the patches from the original work must be clearly=0A=
labeled as unofficial distributions. Modifications which do not=0A=
carry the name Zope may be packaged in any form, as long as they=0A=
conform to all of the clauses above.=0A=
=0A=
=0A=
Disclaimer=0A=
=0A=
THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY=0A=
EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE=0A=
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR=0A=
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS=0A=
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,=0A=
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT=0A=
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF=0A=
USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND=0A=
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,=0A=
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT=0A=
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF=0A=
SUCH DAMAGE.=0A=
=0A=
=0A=
This software consists of contributions made by Digital Creations and=0A=
many individuals on behalf of Digital Creations. Specific=0A=
attributions are listed in the accompanying credits file.=0A=
=0A=
=
*************************************************************************=
***/=0A=
#include "Python.h"=0A=
#include <ctype.h>=0A=
=0A=
#define ASSIGN(V,E) {PyObject *__e; __e=3D(E); Py_XDECREF(V); (V)=3D__e;}=0A=
#define UNLESS(E) if(!(E))=0A=
#define UNLESS_ASSIGN(V,E) ASSIGN(V,E) UNLESS(V)=0A=
=0A=
static PyObject *next_word();=0A=
=0A=
typedef struct =0A=
{=0A=
PyObject_HEAD=0A=
PyObject *text, *synstop;=0A=
char *here, *end;=0A=
int index;=0A=
} Splitter;=0A=
=0A=
static void=0A=
Splitter_reset(Splitter *self)=0A=
{=0A=
self->here =3D PyString_AsString(self->text);=0A=
self->index =3D -1;=0A=
}=0A=
=0A=
static void=0A=
Splitter_dealloc(Splitter *self) =0A=
{=0A=
Py_XDECREF(self->text);=0A=
Py_XDECREF(self->synstop);=0A=
PyMem_DEL(self);=0A=
}=0A=
=0A=
static int=0A=
Splitter_length(Splitter *self)=0A=
{=0A=
PyObject *res=3D0;=0A=
=0A=
Splitter_reset(self);=0A=
while(1)=0A=
{=0A=
UNLESS_ASSIGN(res,next_word(self,NULL,NULL)) return -1;=0A=
UNLESS(PyString_Check(res))=0A=
{=0A=
Py_DECREF(res);=0A=
break;=0A=
}=0A=
}=0A=
return self->index+1;=0A=
}=0A=
=0A=
static PyObject *=0A=
Splitter_concat(Splitter *self, PyObject *other)=0A=
{=0A=
PyErr_SetString(PyExc_TypeError, "Cannot concatenate Splitters.");=0A=
return NULL;=0A=
}=0A=
=0A=
static PyObject *=0A=
Splitter_repeat(Splitter *self, long n)=0A=
{=0A=
PyErr_SetString(PyExc_TypeError, "Cannot repeat Splitters.");=0A=
return NULL;=0A=
}=0A=
=0A=
/*=0A=
Map an input word to an output word by applying standard=0A=
filtering/mapping words, including synonyms/stop words.=0A=
=0A=
Input is a word.=0A=
=0A=
Output is:=0A=
=0A=
None -- The word is a stop word=0A=
=0A=
sometext -- A replacement for the word=0A=
*/=0A=
static PyObject *=0A=
check_synstop(Splitter *self, PyObject *word)=0A=
{=0A=
PyObject *value;=0A=
char *cword;=0A=
int len;=0A=
=0A=
cword =3D PyString_AsString(word);=0A=
len =3D PyString_Size(word);=0A=
if(len < 2) /* Single-letter words are stop words! */=0A=
{=0A=
Py_INCREF(Py_None);=0A=
return Py_None;=0A=
}=0A=
=0A=
/*************************************************************=0A=
Test whether a word has any letters. *=0A=
*/ =0A=
for (; --len >=3D 0 && ! isalpha((unsigned char)cword[len]); );=0A=
if (len < 0)=0A=
{=0A=
Py_INCREF(Py_None);=0A=
return Py_None;=0A=
}=0A=
/*=0A=
* If no letters, treat it as a stop word.=0A=
*************************************************************/=0A=
=0A=
Py_INCREF(word);=0A=
=0A=
if (self->synstop =3D=3D NULL) return word;=0A=
=0A=
while ((value =3D PyObject_GetItem(self->synstop, word)) &&=0A=
PyString_Check(value))=0A=
{=0A=
ASSIGN(word,value);=0A=
if(len++ > 100) break; /* Avoid infinite recurssion */=0A=
}=0A=
=0A=
if (value =3D=3D NULL)=0A=
{=0A=
PyErr_Clear();=0A=
return word;=0A=
}=0A=
=0A=
return value; /* Which must be None! */=0A=
}=0A=
=0A=
static char =0A=
sans_accent(char c) =0A=
{ =0A=
switch (c) { =0A=
case '=E0': return 'a'; =0A=
case '=C0': return 'a'; =0A=
case '=E2': return 'a'; =0A=
case '=C2': return 'a'; =0A=
case '=E4': return 'a'; =0A=
case '=C4': return 'a'; =0A=
case '=E9': return 'e'; =0A=
case '=C9': return 'e';=0A=
case '=E8': return 'e';=0A=
case '=C8': return 'e';=0A=
case '=EA': return 'e';=0A=
case '=CA': return 'e';=0A=
case '=EB': return 'e';=0A=
case '=CB': return 'e';=0A=
case '=E7': return 'c';=0A=
case '=C7': return 'c';=0A=
case '=F4': return 'o';=0A=
case '=D4': return 'o';=0A=
case '=F6': return 'o';=0A=
case '=D6': return 'o';=0A=
case '=EE': return 'i';=0A=
case '=CE': return 'i';=0A=
case '=EF': return 'i';=0A=
case '=CF': return 'i';=0A=
case '=FB': return 'u';=0A=
case '=DB': return 'u';=0A=
case '=F9': return 'u';=0A=
case '=D9': return 'u';=0A=
case '=FC': return 'u';=0A=
case '=DC': return 'u';=0A=
default: return c;=0A=
}=0A=
}=0A=
=0A=
=0A=
#define MAX_WORD 64 /* Words longer than MAX_WORD are stemmed */=0A=
=0A=
static PyObject *=0A=
next_word(Splitter *self, char **startpos, char **endpos)=0A=
{=0A=
char wbuf[MAX_WORD];=0A=
char *end, *here, *b;=0A=
int i =3D 0, c;=0A=
PyObject *pyword, *res;=0A=
=0A=
here=3Dself->here;=0A=
end=3Dself->end;=0A=
b=3Dwbuf;=0A=
while (here < end)=0A=
{=0A=
/* skip hyphens */ =0A=
if ((i > 0) && (*here =3D=3D '-'))=0A=
{=0A=
here++;=0A=
while (isspace((unsigned char) *here) && (here < end)) here++;=0A=
continue;=0A=
}=0A=
=0A=
c=3Dtolower((unsigned char) sans_accent(*here));=0A=
=0A=
/* Check to see if this character is part of a word */=0A=
if(isalnum((unsigned char)c) || c=3D=3D'/' || c=3D=3D'_')=0A=
{ /* Found a word character */=0A=
if(startpos && i=3D=3D0) *startpos=3Dhere;=0A=
if(i++ < MAX_WORD) *b++ =3D c;=0A=
}=0A=
else if (i !=3D 0)=0A=
{ /* We've found the end of a word */=0A=
if(i >=3D MAX_WORD) i=3DMAX_WORD; /* "stem" the long word */=0A=
=0A=
UNLESS(pyword =3D PyString_FromStringAndSize(wbuf, i))=0A=
{=0A=
self->here=3Dhere;=0A=
return NULL;=0A=
}=0A=
=0A=
UNLESS(res =3D check_synstop(self, pyword))=0A=
{=0A=
self->here=3Dhere;=0A=
Py_DECREF(pyword);=0A=
return NULL;=0A=
}=0A=
=0A=
if (res !=3D Py_None)=0A=
{=0A=
if(endpos) *endpos=3Dhere;=0A=
self->here=3Dhere;=0A=
Py_DECREF(pyword);=0A=
self->index++;=0A=
return res;=0A=
}=0A=
=0A=
/* The word is a stopword, so ignore it */ =0A=
=0A=
Py_DECREF(res); =0A=
Py_DECREF(pyword);=0A=
i =3D 0;=0A=
b=3Dwbuf;=0A=
}=0A=
=0A=
here++;=0A=
}=0A=
=0A=
self->here=3Dhere;=0A=
=0A=
/* We've reached the end of the string */=0A=
=0A=
if(i >=3D MAX_WORD) i=3DMAX_WORD; /* "stem" the long word */=0A=
if (i =3D=3D 0)=0A=
{ =0A=
/* No words */=0A=
self->here=3Dhere;=0A=
Py_INCREF(Py_None);=0A=
return Py_None;=0A=
}=0A=
=0A=
UNLESS(pyword =3D PyString_FromStringAndSize(wbuf, i)) return NULL;=0A=
=0A=
if(endpos) *endpos=3Dhere;=0A=
res =3D check_synstop(self, pyword);=0A=
Py_DECREF(pyword);=0A=
if(PyString_Check(res)) self->index++;=0A=
return res;=0A=
}=0A=
=0A=
static PyObject *=0A=
Splitter_item(Splitter *self, int i)=0A=
{=0A=
PyObject *word =3D NULL;=0A=
=0A=
if (i <=3D self->index) Splitter_reset(self);=0A=
=0A=
while(self->index < i)=0A=
{=0A=
Py_XDECREF(word);=0A=
=0A=
UNLESS(word =3D next_word(self,NULL,NULL)) return NULL; =0A=
if (word =3D=3D Py_None)=0A=
{=0A=
Py_DECREF(word);=0A=
PyErr_SetString(PyExc_IndexError,=0A=
"Splitter index out of range");=0A=
return NULL;=0A=
}=0A=
}=0A=
=0A=
return word;=0A=
}=0A=
=0A=
static PyObject *=0A=
Splitter_slice(Splitter *self, int i, int j)=0A=
{=0A=
PyErr_SetString(PyExc_TypeError, "Cannot slice Splitters.");=0A=
return NULL;=0A=
}=0A=
=0A=
static PySequenceMethods Splitter_as_sequence =3D {=0A=
(inquiry)Splitter_length, /*sq_length*/=0A=
(binaryfunc)Splitter_concat, /*sq_concat*/=0A=
(intargfunc)Splitter_repeat, /*sq_repeat*/=0A=
(intargfunc)Splitter_item, /*sq_item*/=0A=
(intintargfunc)Splitter_slice, /*sq_slice*/=0A=
(intobjargproc)0, /*sq_ass_item*/=0A=
(intintobjargproc)0, /*sq_ass_slice*/=0A=
};=0A=
=0A=
static PyObject *=0A=
Splitter_pos(Splitter *self, PyObject *args)=0A=
{=0A=
char *start, *end, *ctext;=0A=
PyObject *res;=0A=
int i;=0A=
=0A=
UNLESS(PyArg_Parse(args, "i", &i)) return NULL;=0A=
=0A=
if (i <=3D self->index) Splitter_reset(self);=0A=
=0A=
while(self->index < i)=0A=
{=0A=
UNLESS(res=3Dnext_word(self, &start, &end)) return NULL;=0A=
if(PyString_Check(res))=0A=
{=0A=
self->index++;=0A=
Py_DECREF(res);=0A=
continue;=0A=
}=0A=
Py_DECREF(res);=0A=
PyErr_SetString(PyExc_IndexError, "Splitter index out of range");=0A=
return NULL;=0A=
}=0A=
=0A=
ctext=3DPyString_AsString(self->text);=0A=
return Py_BuildValue("(ii)", start - ctext, end - ctext);=0A=
}=0A=
=0A=
static PyObject *=0A=
Splitter_indexes(Splitter *self, PyObject *args)=0A=
{=0A=
PyObject *word, *r, *w=3D0, *index=3D0;=0A=
int i=3D0;=0A=
=0A=
UNLESS(PyArg_ParseTuple(args,"O",&word)) return NULL;=0A=
UNLESS(r=3DPyList_New(0)) return NULL;=0A=
UNLESS(word=3Dcheck_synstop(self, word)) goto err;=0A=
=0A=
Splitter_reset(self);=0A=
while(1)=0A=
{=0A=
UNLESS_ASSIGN(w,next_word(self, NULL, NULL)) goto err;=0A=
UNLESS(PyString_Check(w)) break;=0A=
if(PyObject_Compare(word,w)=3D=3D0)=0A=
{=0A=
UNLESS_ASSIGN(index,PyInt_FromLong(i)) goto err;=0A=
if(PyList_Append(r,index) < 0) goto err;=0A=
}=0A=
i++;=0A=
}=0A=
Py_XDECREF(w);=0A=
Py_XDECREF(index);=0A=
return r;=0A=
=0A=
err:=0A=
Py_DECREF(r);=0A=
Py_XDECREF(index);=0A=
return NULL;=0A=
}=0A=
=0A=
static struct PyMethodDef Splitter_methods[] =3D {=0A=
{ "pos", (PyCFunction)Splitter_pos, 0,=0A=
"pos(index) -- Return the starting and ending position of a token" =
},=0A=
{ "indexes", (PyCFunction)Splitter_indexes, METH_VARARGS,=0A=
"indexes(word) -- Return al list of the indexes of word in the =
sequence",=0A=
},=0A=
{ NULL, NULL } /* sentinel */=0A=
};=0A=
=0A=
static PyObject *=0A=
Splitter_getattr(Splitter *self, char *name) =0A=
{=0A=
return Py_FindMethod(Splitter_methods, (PyObject *)self, name);=0A=
}=0A=
=0A=
static char SplitterType__doc__[] =3D "";=0A=
=0A=
static PyTypeObject SplitterType =3D {=0A=
PyObject_HEAD_INIT(NULL)=0A=
0, /*ob_size*/=0A=
"Splitter", /*tp_name*/=0A=
sizeof(Splitter), /*tp_basicsize*/=0A=
0, /*tp_itemsize*/=0A=
/* methods */=0A=
(destructor)Splitter_dealloc, /*tp_dealloc*/=0A=
(printfunc)0, /*tp_print*/=0A=
(getattrfunc)Splitter_getattr, /*tp_getattr*/=0A=
(setattrfunc)0, /*tp_setattr*/=0A=
(cmpfunc)0, /*tp_compare*/=0A=
(reprfunc)0, /*tp_repr*/=0A=
0, /*tp_as_number*/=0A=
&Splitter_as_sequence, /*tp_as_sequence*/=0A=
0, /*tp_as_mapping*/=0A=
(hashfunc)0, /*tp_hash*/=0A=
(ternaryfunc)0, /*tp_call*/=0A=
(reprfunc)0, /*tp_str*/=0A=
=0A=
/* Space for future expansion */=0A=
0L,0L,0L,0L,=0A=
SplitterType__doc__ /* Documentation string */=0A=
};=0A=
=0A=
static PyObject *=0A=
get_Splitter(PyObject *modinfo, PyObject *args)=0A=
{=0A=
Splitter *self;=0A=
PyObject *doc, *synstop =3D NULL;=0A=
=0A=
UNLESS(PyArg_ParseTuple(args,"O|O",&doc,&synstop)) return NULL;=0A=
=0A=
UNLESS(self =3D PyObject_NEW(Splitter, &SplitterType)) return NULL;=0A=
=0A=
if(synstop)=0A=
{=0A=
self->synstop=3Dsynstop;=0A=
Py_INCREF(synstop);=0A=
}=0A=
else self->synstop=3DNULL;=0A=
=0A=
UNLESS(self->text =3D PyObject_Str(doc)) goto err;=0A=
UNLESS(self->here=3DPyString_AsString(self->text)) goto err;=0A=
self->end =3D self->here + PyString_Size(self->text);=0A=
self->index =3D -1;=0A=
return (PyObject*)self;=0A=
err:=0A=
Py_DECREF(self);=0A=
return NULL;=0A=
}=0A=
=0A=
static struct PyMethodDef Splitter_module_methods[] =3D {=0A=
{ "Splitter", (PyCFunction)get_Splitter, METH_VARARGS,=0A=
"Splitter(doc[,synstop]) -- Return a word splitter" },=0A=
{ NULL, NULL }=0A=
};=0A=
=0A=
static char Splitter_module_documentation[] =3D =0A=
"Parse source strings into sequences of words\n"=0A=
"\n"=0A=
"for use in an inverted index\n"=0A=
"\n"=0A=
"$Id: Splitter.c,v 1.14.28.2 2001/03/21 16:37:53 jim Exp $\n"=0A=
;=0A=
=0A=
=0A=
void=0A=
initSplitter() =0A=
{=0A=
PyObject *m, *d;=0A=
char *rev=3D"$Revision: 1.14.28.2 $";=0A=
=0A=
/* Create the module and add the functions */=0A=
m =3D Py_InitModule4("Splitter", Splitter_module_methods,=0A=
Splitter_module_documentation,=0A=
(PyObject*)NULL,PYTHON_API_VERSION);=0A=
=0A=
/* Add some symbolic constants to the module */=0A=
d =3D PyModule_GetDict(m);=0A=
PyDict_SetItemString(d, "__version__",=0A=
PyString_FromStringAndSize(rev+11,strlen(rev+11)-2));=0A=
=0A=
if (PyErr_Occurred()) Py_FatalError("can't initialize module =
Splitter");=0A=
}=0A=
------=_NextPart_000_0007_01C0DFCB.F5F36CA0--