[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/dutch - dutchstem.c:1.1.2.1 output.txt:1.1.2.1 stem.h:1.1.2.1 stem.sbl:1.1.2.1 stemmer.html:1.1.2.1 voc.txt:1.1.2.1

Andreas Jung andreas@digicool.com
Wed, 13 Feb 2002 11:26:19 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/dutch
In directory cvs.zope.org:/tmp/cvs-serv30556/PyStemmer/dutch

Added Files:
      Tag: ajung-textindexng-branch
	dutchstem.c output.txt stem.h stem.sbl stemmer.html voc.txt 
Log Message:
added PyStemmer


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/dutch/dutchstem.c ===

#include "header.h"

extern int dutch_stem(struct SN_env * z);
static int r_standard_suffix(struct SN_env * z);
static int r_undouble(struct SN_env * z);
static int r_R2(struct SN_env * z);
static int r_R1(struct SN_env * z);
static int r_mark_regions(struct SN_env * z);
static int r_en_ending(struct SN_env * z);
static int r_e_ending(struct SN_env * z);
static int r_postlude(struct SN_env * z);
static int r_prelude(struct SN_env * z);

static struct among a_0[11] =
{
/*  0 */ { 0, (byte *)"", -1, 6, 0},
/*  1 */ { 1, (byte *)"\x81" "", 0, 5, 0},
/*  2 */ { 1, (byte *)"\x82" "", 0, 2, 0},
/*  3 */ { 1, (byte *)"\x84" "", 0, 1, 0},
/*  4 */ { 1, (byte *)"\x89" "", 0, 2, 0},
/*  5 */ { 1, (byte *)"\x8B" "", 0, 3, 0},
/*  6 */ { 1, (byte *)"\x94" "", 0, 4, 0},
/*  7 */ { 1, (byte *)"\xA0" "", 0, 1, 0},
/*  8 */ { 1, (byte *)"\xA1" "", 0, 3, 0},
/*  9 */ { 1, (byte *)"\xA2" "", 0, 4, 0},
/* 10 */ { 1, (byte *)"\xA3" "", 0, 5, 0}
};

static struct among a_1[3] =
{
/*  0 */ { 0, (byte *)"", -1, 3, 0},
/*  1 */ { 1, (byte *)"I", 0, 2, 0},
/*  2 */ { 1, (byte *)"Y", 0, 1, 0}
};

static struct among a_2[3] =
{
/*  0 */ { 2, (byte *)"dd", -1, -1, 0},
/*  1 */ { 2, (byte *)"kk", -1, -1, 0},
/*  2 */ { 2, (byte *)"tt", -1, -1, 0}
};

static struct among a_3[5] =
{
/*  0 */ { 3, (byte *)"ene", -1, 2, 0},
/*  1 */ { 2, (byte *)"se", -1, 3, 0},
/*  2 */ { 2, (byte *)"en", -1, 2, 0},
/*  3 */ { 5, (byte *)"heden", 2, 1, 0},
/*  4 */ { 1, (byte *)"s", -1, 3, 0}
};

static struct among a_4[6] =
{
/*  0 */ { 3, (byte *)"end", -1, 1, 0},
/*  1 */ { 2, (byte *)"ig", -1, 2, 0},
/*  2 */ { 3, (byte *)"ing", -1, 1, 0},
/*  3 */ { 4, (byte *)"lijk", -1, 3, 0},
/*  4 */ { 4, (byte *)"baar", -1, 4, 0},
/*  5 */ { 3, (byte *)"bar", -1, 5, 0}
};

static struct among a_5[4] =
{
/*  0 */ { 2, (byte *)"aa", -1, -1, 0},
/*  1 */ { 2, (byte *)"ee", -1, -1, 0},
/*  2 */ { 2, (byte *)"oo", -1, -1, 0},
/*  3 */ { 2, (byte *)"uu", -1, -1, 0}
};


static byte g_v[] = { 17, 65, 16, 1, 0, 2 };

static byte g_v_I[] = { 1, 0, 0, 17, 65, 16, 1, 0, 2 };

static byte g_v_j[] = { 17, 67, 16, 1, 0, 2 };

static int r_prelude(struct SN_env * z) {
    int among_var;
    {   int c_test = z->c; /* test, line 42 */
        while(1) { /* repeat, line 42 */
            int c = z->c;
            z->bra = z->c; /* [, line 43 */
            among_var = find_among(z, a_0, 11); /* substring, line 43 */
            if (!(among_var)) goto lab0;
            z->ket = z->c; /* ], line 43 */
            switch(among_var) {
                case 0: goto lab0;
                case 1:
                    slice_from_s(z, 1, "a"); /* <-, line 45 */
                    break;
                case 2:
                    slice_from_s(z, 1, "e"); /* <-, line 47 */
                    break;
                case 3:
                    slice_from_s(z, 1, "i"); /* <-, line 49 */
                    break;
                case 4:
                    slice_from_s(z, 1, "o"); /* <-, line 51 */
                    break;
                case 5:
                    slice_from_s(z, 1, "u"); /* <-, line 53 */
                    break;
                case 6:
                    if (z->c >= z->l) goto lab0;
                    z->c++; /* next, line 54 */
                    break;
            }
            continue;
        lab0:
            z->c = c;
            break;
        }
        z->c = c_test;
    }
    {   int c = z->c; /* try, line 57 */
        z->bra = z->c; /* [, line 57 */
        if (!(eq_s(z, 1, "y"))) { z->c = c; goto lab1; }
        z->ket = z->c; /* ], line 57 */
        slice_from_s(z, 1, "Y"); /* <-, line 57 */
    lab1:
    }
    while(1) { /* repeat, line 58 */
        int c = z->c;
        while(1) { /* goto, line 58 */
            int c = z->c;
            if (!(in_grouping(z, g_v, 97, 138))) goto lab3;
            z->bra = z->c; /* [, line 59 */
            {   int c = z->c; /* or, line 59 */
                if (!(eq_s(z, 1, "i"))) goto lab5;
                z->ket = z->c; /* ], line 59 */
                if (!(in_grouping(z, g_v, 97, 138))) goto lab5;
                slice_from_s(z, 1, "I"); /* <-, line 59 */
                goto lab4;
            lab5:
                z->c = c;
                if (!(eq_s(z, 1, "y"))) goto lab3;
                z->ket = z->c; /* ], line 60 */
                slice_from_s(z, 1, "Y"); /* <-, line 60 */
            }
        lab4:
            z->c = c;
            break;
        lab3:
            z->c = c;
            if (z->c >= z->l) goto lab2;
            z->c++;
        }
        continue;
    lab2:
        z->c = c;
        break;
    }
    return 1;
}

static int r_mark_regions(struct SN_env * z) {
    z->I[0] = z->l;
    z->I[1] = z->l;
    while(1) { /* gopast, line 69 */
        if (!(in_grouping(z, g_v, 97, 138))) goto lab0;
        break;
    lab0:
        if (z->c >= z->l) return 0;
        z->c++;
    }
    while(1) { /* gopast, line 69 */
        if (!(out_grouping(z, g_v, 97, 138))) goto lab1;
        break;
    lab1:
        if (z->c >= z->l) return 0;
        z->c++;
    }
    z->I[0] = z->c; /* setmark p1, line 69 */
     /* try, line 70 */
    if (!(z->I[0] < 3)) goto lab2;
    z->I[0] = 3;
lab2:
    while(1) { /* gopast, line 71 */
        if (!(in_grouping(z, g_v, 97, 138))) goto lab3;
        break;
    lab3:
        if (z->c >= z->l) return 0;
        z->c++;
    }
    while(1) { /* gopast, line 71 */
        if (!(out_grouping(z, g_v, 97, 138))) goto lab4;
        break;
    lab4:
        if (z->c >= z->l) return 0;
        z->c++;
    }
    z->I[1] = z->c; /* setmark p2, line 71 */
    return 1;
}

static int r_postlude(struct SN_env * z) {
    int among_var;
    while(1) { /* repeat, line 75 */
        int c = z->c;
        z->bra = z->c; /* [, line 77 */
        among_var = find_among(z, a_1, 3); /* substring, line 77 */
        if (!(among_var)) goto lab0;
        z->ket = z->c; /* ], line 77 */
        switch(among_var) {
            case 0: goto lab0;
            case 1:
                slice_from_s(z, 1, "y"); /* <-, line 78 */
                break;
            case 2:
                slice_from_s(z, 1, "i"); /* <-, line 79 */
                break;
            case 3:
                if (z->c >= z->l) goto lab0;
                z->c++; /* next, line 80 */
                break;
        }
        continue;
    lab0:
        z->c = c;
        break;
    }
    return 1;
}

static int r_R1(struct SN_env * z) {
    if (!(z->I[0] <= z->c)) return 0;
    return 1;
}

static int r_R2(struct SN_env * z) {
    if (!(z->I[1] <= z->c)) return 0;
    return 1;
}

static int r_undouble(struct SN_env * z) {
    {   int m_test = z->l - z->c; /* test, line 91 */
        if (!(find_among_b(z, a_2, 3))) return 0; /* among, line 91 */
        z->c = z->l - m_test;
    }
    z->ket = z->c; /* [, line 91 */
    if (z->c <= z->lb) return 0;
    z->c--; /* next, line 91 */
    z->bra = z->c; /* ], line 91 */
    slice_del(z); /* delete, line 91 */
    return 1;
}

static int r_e_ending(struct SN_env * z) {
    z->B[0] = 0; /* unset e_found, line 95 */
    z->ket = z->c; /* [, line 96 */
    if (!(eq_s_b(z, 1, "e"))) return 0;
    z->bra = z->c; /* ], line 96 */
    if (!r_R1(z)) return 0; /* call R1, line 96 */
    {   int m_test = z->l - z->c; /* test, line 96 */
        if (!(out_grouping_b(z, g_v, 97, 138))) return 0;
        z->c = z->l - m_test;
    }
    slice_del(z); /* delete, line 96 */
    z->B[0] = 1; /* set e_found, line 97 */
    if (!r_undouble(z)) return 0; /* call undouble, line 98 */
    return 1;
}

static int r_en_ending(struct SN_env * z) {
    if (!r_R1(z)) return 0; /* call R1, line 102 */
    {   int m = z->l - z->c; /* and, line 102 */
        if (!(out_grouping_b(z, g_v, 97, 138))) return 0;
        z->c = z->l - m;
        {   int m = z->l - z->c; /* not, line 102 */
            if (!(eq_s_b(z, 3, "gem"))) goto lab0;
            return 0;
        lab0:
            z->c = z->l - m;
        }
    }
    slice_del(z); /* delete, line 102 */
    if (!r_undouble(z)) return 0; /* call undouble, line 103 */
    return 1;
}

static int r_standard_suffix(struct SN_env * z) {
    int among_var;
    {   int m = z->l - z->c; /* do, line 107 */
        z->ket = z->c; /* [, line 108 */
        among_var = find_among_b(z, a_3, 5); /* substring, line 108 */
        if (!(among_var)) goto lab0;
        z->bra = z->c; /* ], line 108 */
        switch(among_var) {
            case 0: goto lab0;
            case 1:
                if (!r_R1(z)) goto lab0; /* call R1, line 110 */
                slice_from_s(z, 4, "heid"); /* <-, line 110 */
                break;
            case 2:
                if (!r_en_ending(z)) goto lab0; /* call en_ending, line 113 */
                break;
            case 3:
                if (!r_R1(z)) goto lab0; /* call R1, line 116 */
                if (!(out_grouping_b(z, g_v_j, 97, 138))) goto lab0;
                slice_del(z); /* delete, line 116 */
                break;
        }
    lab0:
        z->c = z->l - m;
    }
    {   int m = z->l - z->c; /* do, line 120 */
        if (!r_e_ending(z)) goto lab1; /* call e_ending, line 120 */
    lab1:
        z->c = z->l - m;
    }
    {   int m = z->l - z->c; /* do, line 122 */
        z->ket = z->c; /* [, line 122 */
        if (!(eq_s_b(z, 4, "heid"))) goto lab2;
        z->bra = z->c; /* ], line 122 */
        if (!r_R2(z)) goto lab2; /* call R2, line 122 */
        {   int m = z->l - z->c; /* not, line 122 */
            if (!(eq_s_b(z, 1, "c"))) goto lab3;
            goto lab2;
        lab3:
            z->c = z->l - m;
        }
        slice_del(z); /* delete, line 122 */
        z->ket = z->c; /* [, line 123 */
        if (!(eq_s_b(z, 2, "en"))) goto lab2;
        z->bra = z->c; /* ], line 123 */
        if (!r_en_ending(z)) goto lab2; /* call en_ending, line 123 */
    lab2:
        z->c = z->l - m;
    }
    {   int m = z->l - z->c; /* do, line 126 */
        z->ket = z->c; /* [, line 127 */
        among_var = find_among_b(z, a_4, 6); /* substring, line 127 */
        if (!(among_var)) goto lab4;
        z->bra = z->c; /* ], line 127 */
        switch(among_var) {
            case 0: goto lab4;
            case 1:
                if (!r_R2(z)) goto lab4; /* call R2, line 129 */
                slice_del(z); /* delete, line 129 */
                {   int m = z->l - z->c; /* or, line 130 */
                    z->ket = z->c; /* [, line 130 */
                    if (!(eq_s_b(z, 2, "ig"))) goto lab6;
                    z->bra = z->c; /* ], line 130 */
                    if (!r_R2(z)) goto lab6; /* call R2, line 130 */
                    {   int m = z->l - z->c; /* not, line 130 */
                        if (!(eq_s_b(z, 1, "e"))) goto lab7;
                        goto lab6;
                    lab7:
                        z->c = z->l - m;
                    }
                    slice_del(z); /* delete, line 130 */
                    goto lab5;
                lab6:
                    z->c = z->l - m;
                    if (!r_undouble(z)) goto lab4; /* call undouble, line 130 */
                }
            lab5:
                break;
            case 2:
                if (!r_R2(z)) goto lab4; /* call R2, line 133 */
                {   int m = z->l - z->c; /* not, line 133 */
                    if (!(eq_s_b(z, 1, "e"))) goto lab8;
                    goto lab4;
                lab8:
                    z->c = z->l - m;
                }
                slice_del(z); /* delete, line 133 */
                break;
            case 3:
                if (!r_R2(z)) goto lab4; /* call R2, line 136 */
                slice_del(z); /* delete, line 136 */
                if (!r_e_ending(z)) goto lab4; /* call e_ending, line 136 */
                break;
            case 4:
                if (!r_R2(z)) goto lab4; /* call R2, line 139 */
                slice_del(z); /* delete, line 139 */
                break;
            case 5:
                if (!r_R2(z)) goto lab4; /* call R2, line 142 */
                if (!(z->B[0])) goto lab4; /* Boolean test e_found, line 142 */
                slice_del(z); /* delete, line 142 */
                break;
        }
    lab4:
        z->c = z->l - m;
    }
    {   int m = z->l - z->c; /* do, line 146 */
        if (!(out_grouping_b(z, g_v_I, 73, 138))) goto lab9;
        {   int m_test = z->l - z->c; /* test, line 148 */
            if (!(find_among_b(z, a_5, 4))) goto lab9; /* among, line 149 */
            if (!(out_grouping_b(z, g_v, 97, 138))) goto lab9;
            z->c = z->l - m_test;
        }
        z->ket = z->c; /* [, line 152 */
        if (z->c <= z->lb) goto lab9;
        z->c--; /* next, line 152 */
        z->bra = z->c; /* ], line 152 */
        slice_del(z); /* delete, line 152 */
    lab9:
        z->c = z->l - m;
    }
    return 1;
}

extern int dutch_stem(struct SN_env * z) {
    {   int c = z->c; /* do, line 159 */
        if (!r_prelude(z)) goto lab0; /* call prelude, line 159 */
    lab0:
        z->c = c;
    }
    {   int c = z->c; /* do, line 160 */
        if (!r_mark_regions(z)) goto lab1; /* call mark_regions, line 160 */
    lab1:
        z->c = c;
    }
    z->lb = z->c; z->c = z->l; /* backwards, line 161 */

    {   int m = z->l - z->c; /* do, line 162 */
        if (!r_standard_suffix(z)) goto lab2; /* call standard_suffix, line 162 */
    lab2:
        z->c = z->l - m;
    }
    z->c = z->lb;    {   int c = z->c; /* do, line 163 */
        if (!r_postlude(z)) goto lab3; /* call postlude, line 163 */
    lab3:
        z->c = c;
    }
    return 1;
}

extern struct SN_env * dutch_create_env(void) { return SN_create_env(0, 2, 1); }

extern void dutch_close_env(struct SN_env * z) { SN_close_env(z); }



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/dutch/output.txt === (45569/45669 lines abridged)
a
a
…
aa
aach
aachener
aah
aalborg
aalder
aalmoez
aalscholver
aalscholver
aalscholverstand
aalsmer
aalsmeerban
aalsmer
aalt
aaltenar
aaltjes
aam
aan
aan
aanbad
aanbaklaagjes
aanbeld
aanbell
aanbested
aanbested
aanbested
aanbestedingsdossier
aanbestedingsprocedur
aanbestedingsrichtlijn
aanbevel
aanbevel
aanbevel
aanbevol
aanbid
aanbid
aanbid
aanbid
aanbid
aanbied
aanbieder
aanbieder
aanbiedfrequentie
aanbiedingsbrief
aanbiedplaats
aanbiedt
aanblik
aanbod

[-=- -=- -=- 45569 lines omitted -=- -=- -=-]

zwemwater
zwengel
zwenkwiel
zwep
zwepend
zwer
zwerfafval
zwerftocht
zwerfvuil
zwerm
zwet
zwetslot
zwev
zwevend
zwevend
zweving
zwiepend
zwiept
zwiept
zwier
zwierd
zwier
zwierf
zwijg
zwijgend
zwijgend
zwijggeld
zwijgt
zwijmd
zwijmd
zwijn
zwijn
zwikkend
zwinpolder
zwitser
zwitserland
zwitser
zwitser
zwo
zwoel
zwoeld
zwoel
zwoelt
zwoer
zwoet
zwol
zwoll
zwoll
zwol
zwom


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/dutch/stem.h ===

extern struct SN_env * dutch_create_env(void);
extern void dutch_close_env(struct SN_env * z);

extern int dutch_stem(struct SN_env * z);



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/dutch/stem.sbl ===
routines (
           prelude postlude
           e_ending
           en_ending
           mark_regions
           R1 R2
           undouble
           standard_suffix
)

externals ( stem )

booleans ( e_found )

integers ( p1 p2 )

groupings ( v v_I v_j )

stringescapes {}

/* special characters (in ISO Latin) */

stringdef a"   hex '84'
stringdef e"   hex '89'
stringdef i"   hex '8B'
stringdef o"   hex '94'
stringdef u"   hex '81'

stringdef a'   hex 'A0'
stringdef e'   hex '82'
stringdef i'   hex 'A1'
stringdef o'   hex 'A2'
stringdef u'   hex 'A3'

stringdef e`   hex '8A'

define v       'aeiouy{e`}'
define v_I     v + 'I'
define v_j     v + 'j'

define prelude as (
    test repeat (
        [substring] among(
            '{a"}' '{a'}'
                (<- 'a')
            '{e"}' '{e'}'
                (<- 'e')
            '{i"}' '{i'}'
                (<- 'i')
            '{o"}' '{o'}'
                (<- 'o')
            '{u"}' '{u'}'
                (<- 'u')
        ) or next
    )
    try(['y'] <- 'Y')
    repeat goto (
        v [('i'] v <- 'I') or
           ('y']   <- 'Y')
    )
)

define mark_regions as (

    $p1 = limit
    $p2 = limit

    gopast v  gopast non-v  setmark p1
    try($p1 < 3  $p1 = 3)  // at least 3
    gopast v  gopast non-v  setmark p2

)

define postlude as repeat (

    [substring] among(
        'Y'  (<- 'y')
        'I'  (<- 'i')
    ) or next

)

backwardmode (

    define R1 as $p1 <= cursor
    define R2 as $p2 <= cursor

    define undouble as (
        test among('kk' 'dd' 'tt') [next] delete
    )

    define e_ending as (
        unset e_found
        ['e'] R1 test non-v delete
        set e_found
        undouble
    )

    define en_ending as (
        R1 non-v and not 'gem' delete
        undouble
    )

    define standard_suffix as (
        do (
            [substring] among(
                'heden'
                (   R1 <- 'heid'
                )
                'en' 'ene'
                (   en_ending
                )
                's' 'se'
                (   R1 non-v_j delete
                )
            )
        )
        do e_ending

        do ( ['heid'] R2 not 'c' delete
             ['en'] en_ending
           )

        do (
            [substring] among(
                'end' 'ing'
                (   R2 delete
                    (['ig'] R2 not 'e' delete) or undouble
                )
                'ig'
                (   R2 not 'e' delete
                )
                'lijk'
                (   R2 delete e_ending
                )
                'baar'
                (   R2 delete
                )
                'bar'
                (   R2 e_found delete
                )
            )
        )
        do (
            non-v_I
            test (
                among ('aa' 'ee' 'oo' 'uu')
                non-v
            )
            [next] delete
        )
    )
)

define stem as (

        do prelude
        do mark_regions
        backwards
            do standard_suffix
        do postlude
)


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/dutch/stemmer.html ===

<HTML>
<HEAD>
<TITLE>Dutch stemming algorith</TITLE></HEAD>
<BODY BGCOLOR=WHITE>
<TABLE WIDTH=75% ALIGN=CENTER COLS=1>
<H1 ALIGN=CENTER>Dutch stemming algorithm</H1>

<TR><TD>
<BR>&nbsp;<H2>Links to resources</H2>

<DL><DD><TABLE CELLPADDING=0>
<TR><TD><A HREF="stem.sbl">    The stemmer in Snowball</A>
<TR><TD><A HREF="stem.c">      The ANSI C stemmer</A>
<TR><TD><A HREF="stem.h">      - and its header</A>
<TR><TD><A HREF="voc.txt">     Sample Dutch vocabulary (ISO Latin codings)</A>
<TR><TD><A HREF="output.txt">  Its stemmed equivalent</A>
<TR><TD><A HREF="diffs.txt">   Vocabulary + stemmed equivalent in pure ASCII</A>
<TR><TD><A HREF="tarball.tgz"> Tar-gzipped file of all of the above</A>
</TABLE></DL>

<DL><DD><TABLE CELLPADDING=0>
<TR><TD><A HREF="../texts/germanic.html">
                  Germanic language stemmers</A>
</TABLE></DL>

</TR>

<TR><TD BGCOLOR="lightpink">

<BR><BR>

Here is a sample of Dutch vocabulary, with the stemmed forms that will
be generated with this algorithm.

<BR><BR>



<DL><DD><TABLE CELLPADDING=0>
<TR><TD>  <B>word</B> </TD>
 <TD></TD><TD> </TD>
 <TD></TD><TD> <B>stem</B> </TD>
 <TD></TD><TD>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</TD>
 <TD></TD><TD> <B>word</B> </TD>
 <TD></TD><TD> </TD>
 <TD></TD><TD> <B>stem</B> </TD>
</TR>

<TR><TD>
lichaamsziek<BR>
lichamelijk<BR>
lichamelijke<BR>
lichamelijkheden<BR>
lichamen<BR>
lichere<BR>
licht<BR>
lichtbeeld<BR>
lichtbruin<BR>
lichtdoorlatende<BR>
lichte<BR>
lichten<BR>
lichtende<BR>
lichtenvoorde<BR>
lichter<BR>
lichtere<BR>
lichters<BR>
lichtgevoeligheid<BR>
lichtgewicht<BR>
lichtgrijs<BR>
lichthoeveelheid<BR>
lichtintensiteit<BR>
lichtje<BR>
lichtjes<BR>
lichtkranten<BR>
lichtkring<BR>
lichtkringen<BR>
lichtregelsystemen<BR>
lichtste<BR>
lichtstromende<BR>
lichtte<BR>
lichtten<BR>
lichttoetreding<BR>
lichtverontreinigde<BR>
lichtzinnige<BR>
lid<BR>
lidia<BR>
lidmaatschap<BR>
lidstaten<BR>
lidvereniging<BR>
</TD>
<TD></TD><TD> &nbsp;<TT><B> => </B></TT>&nbsp; </TD>
<TD></TD><TD>
lichaamsziek<BR>
licham<BR>
licham<BR>
licham<BR>
licham<BR>
licher<BR>
licht<BR>
lichtbeeld<BR>
lichtbruin<BR>
lichtdoorlat<BR>
licht<BR>
licht<BR>
lichtend<BR>
lichtenvoord<BR>
lichter<BR>
lichter<BR>
lichter<BR>
lichtgevoel<BR>
lichtgewicht<BR>
lichtgrijs<BR>
lichthoevel<BR>
lichtintensiteit<BR>
lichtj<BR>
lichtjes<BR>
lichtkrant<BR>
lichtkring<BR>
lichtkring<BR>
lichtregelsystem<BR>
lichtst<BR>
lichtstrom<BR>
licht<BR>
licht<BR>
lichttoetred<BR>
lichtverontreinigd<BR>
lichtzinn<BR>
lid<BR>
lidia<BR>
lidmaatschap<BR>
lidstat<BR>
lidveren<BR>
</TD>
<TD></TD><TD> </TD>
<TD></TD><TD>
opgingen<BR>
opglanzing<BR>
opglanzingen<BR>
opglimlachten<BR>
opglimpen<BR>
opglimpende<BR>
opglimping<BR>
opglimpingen<BR>
opgraven<BR>
opgrijnzen<BR>
opgrijzende<BR>
opgroeien<BR>
opgroeiende<BR>
opgroeiplaats<BR>
ophaal<BR>
ophaaldienst<BR>
ophaalkosten<BR>
ophaalsystemen<BR>
ophaalt<BR>
ophaaltruck<BR>
ophalen<BR>
ophalend<BR>
ophalers<BR>
ophef<BR>
opheffen<BR>
opheffende<BR>
opheffing<BR>
opheldering<BR>
ophemelde<BR>
ophemelen<BR>
opheusden<BR>
ophief<BR>
ophield<BR>
ophieven<BR>
ophoepelt<BR>
ophoog<BR>
ophoogzand<BR>
ophopen<BR>
ophoping<BR>
ophouden<BR>
</TD>
<TD></TD><TD> &nbsp;<TT><B> => </B></TT>&nbsp; </TD>
<TD></TD><TD>
opging<BR>
opglanz<BR>
opglanz<BR>
opglimlacht<BR>
opglimp<BR>
opglimp<BR>
opglimp<BR>
opglimp<BR>
opgrav<BR>
opgrijnz<BR>
opgrijz<BR>
opgroei<BR>
opgroei<BR>
opgroeiplat<BR>
ophal<BR>
ophaaldienst<BR>
ophaalkost<BR>
ophaalsystem<BR>
ophaalt<BR>
ophaaltruck<BR>
ophal<BR>
ophal<BR>
ophaler<BR>
ophef<BR>
opheff<BR>
opheff<BR>
opheff<BR>
ophelder<BR>
ophemeld<BR>
ophemel<BR>
opheusd<BR>
ophief<BR>
ophield<BR>
ophiev<BR>
ophoepelt<BR>
ophog<BR>
ophoogzand<BR>
ophop<BR>
ophop<BR>
ophoud<BR>
</TD>
</TR>
</TABLE></DL>


</TR>

<TR><TD>

<BR><BR>
<BR>&nbsp;<H2>The stemming algorith</H2>

Dutch includes the following accented forms
<DL><DD>
    <B><I>&auml;  &nbsp;  &euml;  &nbsp;  &iuml;  &nbsp;  &ouml;  &nbsp;  &uuml;  &nbsp;  &aacute;  &nbsp;  &eacute;  &nbsp;  &iacute;  &nbsp;  &oacute;  &nbsp;  &uacute;  &nbsp;  &egrave;</I></B>
</DL>
First, remove all umlaut and acute accents. A vowel is then one of,
<DL><DD>
    <B><I>a  &nbsp;  e  &nbsp;  i  &nbsp;  o  &nbsp;  u  &nbsp;  y  &nbsp;  &egrave;</I></B>
</DL>
Put initial <B><I>y</I></B>, and <B><I>y</I></B> and <B><I>i</I></B> between vowels into upper case. <I>R</I>1 and
<I>R</I>2
(see the <A HREF="../texts/r1r2.html"> note</A> on <I>R</I>1 and <I>R</I>2)
are then defined as in German.
<BR><BR>
Define a valid <B><I>s</I></B>-ending as a non-vowel other than <B><I>j</I></B>.
<BR><BR>
Define a valid <B><I>en</I></B>-ending as a non-vowel, and not <B><I>gem</I></B>.
<BR><BR>
Define undoubling the ending as removing the last letter if the word ends
<B><I>kk</I></B>, <B><I>dd</I></B> or <B><I>tt</I></B>.
<BR><BR>
Do each of steps 1, 2 3 and 4.
<BR><BR>
Step 1:
<DL><DD>
    Search for the longest among the following suffixes, and perform the
    action indicated
<BR><BR>
<DL>
    <DT>(<I>a</I>) <B><I>heden</I></B>
        <DD>replace with <B><I>heid</I></B> if in <I>R</I>1
<BR><BR>
    <DT>(<I>b</I>) <B><I>en  &nbsp;  ene</I></B>
        <DD>delete if in <I>R</I>1 and preceded by a valid <B><I>en</I></B>-ending, and then
        undouble the ending
<BR><BR>
    <DT>(<I>c</I>) <B><I>s  &nbsp;  se</I></B>
        <DD>delete if in <I>R</I>1 and preceded by a valid <B><I>s</I></B>-ending
</DL>
</DL>
Step 2:
<DL><DD>
    Delete suffix <B><I>e</I></B> if in <I>R</I>1 and preceded by a non-vowel, and then undouble
    the ending
</DL>
Step 3a: <B><I>heid</I></B>
<DL><DD>
    delete <B><I>heid</I></B> if in <I>R</I>2 and not preceded by <B><I>c</I></B>, and treat a preceding
    <B><I>en</I></B> as in step 1(<I>b</I>)
</DL>
Step 3b: <I>d</I>-suffixes
<DL><DD>
    Search for the longest among the following suffixes, and perform the
    action indicated.
<BR><BR>
<DL>
    <DT><B><I>end  &nbsp;  ing</I></B>
        <DD>delete if in <I>R</I>2
        <DD>if preceded by <B><I>ig</I></B>, delete if in <I>R</I>2 and not preceded by <B><I>e</I></B>, otherwise
        undouble the ending
<BR><BR>
    <DT><B><I>ig</I></B>
        <DD>delete if in <I>R</I>2 and not preceded by <B><I>e</I></B>
<BR><BR>
    <DT><B><I>lijk</I></B>
        <DD>delete if in <I>R</I>2, and then repeat step 2
<BR><BR>
    <DT><B><I>baar</I></B>
        <DD>delete if in <I>R</I>2
<BR><BR>
    <DT><B><I>bar</I></B>
        <DD>delete if in <I>R</I>2 and if step 2 actually removed an <B><I>e</I></B>
</DL>
</DL>
Step 4: undouble vowel
<DL><DD>
    If the words ends <I>CVD</I>, where <I>C</I> is a non-vowel, <I>D</I> is a non-vowel other
    than <B><I>I</I></B>, and <I>V</I> is double <B><I>a</I></B>, <B><I>e</I></B>, <B><I>o</I></B> or <B><I>u</I></B>, remove one of the vowels from
    <I>V</I> (for example, <I>maan</I> <TT>-&gt;</TT> <I>man</I>, <I>brood</I> <TT>-&gt;</TT> <I>brod</I>).
</DL>
Finally,
<DL><DD>
    Turn <B><I>I</I></B> and <B><I>Y</I></B> back into lower case.
</DL>


</TR>

<TR><TD BGCOLOR="lightblue">

<BR>&nbsp;<H2>The same algorithm in Snowball</H2>

<FONT SIZE=-1><PRE>
<DL><DD>
routines (
           prelude postlude
           e_ending
           en_ending
           mark_regions
           R1 R2
           undouble
           standard_suffix
)

externals ( stem )

booleans ( e_found )

integers ( p1 p2 )

groupings ( v v_I v_j )

stringescapes {}

/* special characters (in ISO Latin) */

stringdef a"   hex '84'
stringdef e"   hex '89'
stringdef i"   hex '8B'
stringdef o"   hex '94'
stringdef u"   hex '81'

stringdef a'   hex 'A0'
stringdef e'   hex '82'
stringdef i'   hex 'A1'
stringdef o'   hex 'A2'
stringdef u'   hex 'A3'

stringdef e`   hex '8A'

define v       'aeiouy{e`}'
define v_I     v + 'I'
define v_j     v + 'j'

define prelude as (
    test repeat (
        [substring] among(
            '{a"}' '{a'}'
                (<- 'a')
            '{e"}' '{e'}'
                (<- 'e')
            '{i"}' '{i'}'
                (<- 'i')
            '{o"}' '{o'}'
                (<- 'o')
            '{u"}' '{u'}'
                (<- 'u')
        ) or next
    )
    try(['y'] <- 'Y')
    repeat goto (
        v [('i'] v <- 'I') or
           ('y']   <- 'Y')
    )
)

define mark_regions as (

    $p1 = limit
    $p2 = limit

    gopast v  gopast non-v  setmark p1
    try($p1 < 3  $p1 = 3)  // at least 3
    gopast v  gopast non-v  setmark p2

)

define postlude as repeat (

    [substring] among(
        'Y'  (<- 'y')
        'I'  (<- 'i')
    ) or next

)

backwardmode (

    define R1 as $p1 <= cursor
    define R2 as $p2 <= cursor

    define undouble as (
        test among('kk' 'dd' 'tt') [next] delete
    )

    define e_ending as (
        unset e_found
        ['e'] R1 test non-v delete
        set e_found
        undouble
    )

    define en_ending as (
        R1 non-v and not 'gem' delete
        undouble
    )

    define standard_suffix as (
        do (
            [substring] among(
                'heden'
                (   R1 <- 'heid'
                )
                'en' 'ene'
                (   en_ending
                )
                's' 'se'
                (   R1 non-v_j delete
                )
            )
        )
        do e_ending

        do ( ['heid'] R2 not 'c' delete
             ['en'] en_ending
           )

        do (
            [substring] among(
                'end' 'ing'
                (   R2 delete
                    (['ig'] R2 not 'e' delete) or undouble
                )
                'ig'
                (   R2 not 'e' delete
                )
                'lijk'
                (   R2 delete e_ending
                )
                'baar'
                (   R2 delete
                )
                'bar'
                (   R2 e_found delete
                )
            )
        )
        do (
            non-v_I
            test (
                among ('aa' 'ee' 'oo' 'uu')
                non-v
            )
            [next] delete
        )
    )
)

define stem as (

        do prelude
        do mark_regions
        backwards
            do standard_suffix
        do postlude
)
</DL>
</PRE></FONT>
</TABLE>
</BODY>
</HTML>


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/src/PyStemmer/dutch/voc.txt === (45569/45669 lines abridged)
a
 
…
aa
aachen
aachener
aah
aalborg
aalders
aalmoezen
aalscholver
aalscholvers
aalscholverstand
aalsmeer
aalsmeerbaan
aalsmeerse
aalten
aaltenaar
aaltjes
aam
aan
 an
aanbad
aanbaklaagjes
aanbelde
aanbellen
aanbesteden
aanbesteding
aanbestedingen
aanbestedingsdossier
aanbestedingsprocedure
aanbestedingsrichtlijn
aanbevelen
aanbeveling
aanbevelingen
aanbevolen
aanbiddelijk
aanbiddelijke
aanbidden
aanbiddend
aanbidding
aanbieden
aanbieder
aanbieders
aanbiedfrequentie
aanbiedingsbrief
aanbiedplaatsen
aanbiedt
aanblik
aanbod

[-=- -=- -=- 45569 lines omitted -=- -=- -=-]

zwemwater
zwengelen
zwenkwielen
zwepen
zwepende
zweren
zwerfafval
zwerftocht
zwerfvuil
zwermen
zweten
zwetsloot
zweven
zwevend
zwevende
zweving
zwiepende
zwiepte
zwiepten
zwier
zwierden
zwieren
zwierf
zwijgen
zwijgend
zwijgende
zwijggeld
zwijgt
zwijmde
zwijmden
zwijn
zwijnen
zwikkende
zwinpolder
zwitser
zwitserland
zwitsers
zwitserse
zwo
zwoel
zwoelde
zwoele
zwoelte
zwoer
zwoete
zwol
zwolle
zwollen
zwolse
zwom