[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser - TextIndexGgen.py:1.1.2.1 kjParseBuild.py:1.1.2.1 kjParser.py:1.1.2.1 kjSet.py:1.1.2.1 Makefile:NONE
Andreas Jung
andreas@digicool.com
Mon, 14 Jan 2002 15:00:41 -0500
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser
In directory cvs.zope.org:/tmp/cvs-serv19985
Added Files:
Tag: ajung-textindexng-branch
TextIndexGgen.py kjParseBuild.py kjParser.py kjSet.py
Removed Files:
Tag: ajung-textindexng-branch
Makefile
Log Message:
added new parser using kwParsing
=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser/TextIndexGgen.py ===
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
""" Grammer for TextIndex queries """
REGENERATEONLOAD = 1
import string
GRAMMARSTRING ="""
Value :: ## indicates Value is the root nonterminal for the grammar
@R BaseRule :: Value >> Term
@R Braces1Rule :: Term >> openp Term closep
@R Braces2Rule :: Term >> openp Term closep Term
@R DoubleTermRule :: Term >> Term Term
@R AndTermRule :: Term >> Term and Term
@R OrTermRule :: Term >> Term or Term
@R Strrule :: Term >> str
"""
COMPILEDFILENAME = "TextIndexG.py"
MARSHALLEDFILENAME = "TextIndexG.mar"
OPENPREGEX = r'('
CLOSEPREGEX = r')'
ANDREGEX = 'and'
ORREGEX = 'or'
STRREGEX = '[a-zA-Z]*'
### declare interpretation functions and regex's for terminals
class Stack:
def __init__(self):
self.lst = []
def push(self,item):
self.lst.append(item)
def pop(self):
item = self.lst[-1]
self.lst = self.lst[:-1]
return item
class Collector(Stack):
def __init__(self, default_op='and'):
Stack.__init__(self)
self._current = []
self._op = []
self._default_op = default_op
self._result = ''
def addWord(self,word):
self._current.append( 'LL("%s")' % word)
def setOp(self, op):
self._op = op
def newLevel(self):
# pop current on the stack
self.push( (self._current, self._op))
self._current = []
def endLevel(self):
if not self._op:
op = self._default_op
else:
op = self._op
if self._current:
r = ','.join(self._current)
r = "%s(%s)" % (op, r)
# get top element from stack
topEl,topOp = self.pop()
topEl.append( r )
self.push( (topEl, topOp))
self._current = []
else:
# merge two top level element of stack
topEl,topOp = self.pop()
top1El,top1Op = self.pop()
r = ','.join(topEl)
r = "%s(%s)" % (topOp, r)
top1El.append(r)
self.push( (top1El, top1Op) )
def __str__(self):
return "Collector: %s %s" % (self._current,self._op)
__repr__ = __str__
def getResult(self):
if self._current:
r = ','.join(self._current)
r = "%s(%s)" % (self._op, r)
return r
else:
topEl,topOp = self.pop()
r = ','.join(topEl)
r = "%s(%s)" % (topOp, r)
return r
def openParens(x):
C.newLevel()
def closeParens(x):
C.endLevel()
def wordFound(word):
C.addWord( word )
return word
def operandToken(op):
C.setOp(op)
def DeclareTerminals(Grammar):
Grammar.Addterm("and", ANDREGEX, operandToken)
Grammar.Addterm("or", ORREGEX, operandToken)
Grammar.Addterm("openp", OPENPREGEX, openParens)
Grammar.Addterm("closep", CLOSEPREGEX, closeParens)
Grammar.Addterm("str", STRREGEX, wordFound)
def BindRules(Grammar):
pass
# This function generates the grammar and dumps it to a file.
def GrammarBuild():
import kjParseBuild
TextIndexG = kjParseBuild.NullCGrammar()
TextIndexG.SetCaseSensitivity(0) # grammar is not case sensitive for keywords
DeclareTerminals(TextIndexG)
# TextIndexG.Keywords("and or")
# TextIndexG.punct("()")
TextIndexG.Nonterms("Value Term")
TextIndexG.Declarerules(GRAMMARSTRING)
TextIndexG.Compile()
print "dumping as python to "+COMPILEDFILENAME
outfile = open(COMPILEDFILENAME, "w")
TextIndexG.Reconstruct("TextIndexG",outfile,"GRAMMAR")
outfile.close()
print "dumping as binary to "+MARSHALLEDFILENAME
outfile = open(MARSHALLEDFILENAME, "w")
TextIndexG.MarshalDump(outfile)
outfile.close()
BindRules(TextIndexG)
return TextIndexG
# this function initializes the compiled grammar from the generated file.
def LoadTextIndexG():
import TextIndexG
# reload to make sure we get the most recent version!
# (only needed when debugging the grammar).
reload(TextIndexG)
TextIndexG = TextIndexG.GRAMMAR()
DeclareTerminals(TextIndexG)
BindRules(TextIndexG)
return TextIndexG
def unMarshalTextIndexG():
import kjParser
infile = open(MARSHALLEDFILENAME, "r")
TextIndexG = kjParser.UnMarshalGram(infile)
infile.close()
DeclareTerminals(TextIndexG)
BindRules(TextIndexG)
return TextIndexG
########## test the grammar generation
if REGENERATEONLOAD:
print "(re)generating the TextIndexG grammar in file TextIndexG.py"
Dummy = GrammarBuild()
print "(re)generation done."
if __name__ == '__main__':
print "loading grammar as python"
TextIndexG = LoadTextIndexG()
Context = { }
for item in ['a','(a)','(a) (b)','(a b)','(a b)(c or d) ' ,
'(a and b) or (c and dd)','a and (b or c or (x and y))']:
print '-'*78
C = Collector()
print item
test = TextIndexG.DoParse1( item, Context)
print 'res:',C.getResult()
=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser/kjParseBuild.py === (1229/1329 lines abridged)
#
# python code for building a parser from a grammar
# Copyright Aaron Robert Watters, 1994
#
# BUGS:
# A bad grammar that has no derivations for
# the root nonterminal may cause a name error
# on the variable "GoodStartingPlace"
# this needs to be modified so the RULEGRAM is loaded from a
# compiled representation if available.
import string
import kjSet
import kjParser
import regex
# import some constants
from kjParser import \
TERMFLAG, NOMATCHFLAG, MOVETOFLAG, REDUCEFLAG, TRANSFLAG, KEYFLAG, \
NONTERMFLAG, TERMFLAG, EOFFLAG, ENDOFFILETOKEN
PMODULE = kjParser.THISMODULE
# errors raised here
TokenError = "TokenError" # may happen on autogen with bad grammar
NotSLRError = "NotSLRError" # may happen for nonSLR grammar
# set this flag for regression testing at each load
RUNTESTS = 0
# set this flag to abort automatic generation on Errors
ABORTONERROR = 0
# token used to mark null productions
NULLTOKEN = (None,None)
# a derived FSM class, with closure computation methods defined
# (compilable FSMachine)
#
class CFSMachine(kjParser.FSMachine):
def __init__(self, nonterm):
kjParser.FSMachine.__init__(self, nonterm)
# return the epsilon closure of the FSM as a new FSM
#
# DoNullMap, if set, will map unexpected tokens to
# the "empty" state (usually creating a really big fsm)
[-=- -=- -=- 1229 lines omitted -=- -=- -=-]
RX = kjParser.ParseRule( X, [ oppar, Y, clpar ] )
RY = kjParser.ParseRule( Y, [] )
rl2 = [RX,RY]
rs2 = ruleset(X, rl2)
rs2.CompFirst()
rs2.CompFollow()
rs2.CompSLRNFA()
rs2.CompDFA()
rs2.SLRFixDFA()
DFA2 = rs2.DFA
ttt2 = dummy()
def TESTDFA2( STRING, DOREDUCTIONS = 1):
return TESTDFA( STRING, ttt2, DFA2, rl2, DOREDUCTIONS )
# the following grammar should fail to be slr
# (Aho,Ullman p. 213)
S = kjParser.nonterminal("S")
L = kjParser.nonterminal("L")
R = kjParser.nonterminal("R")
RS1 = kjParser.ParseRule( S, [L, equals, R] )
RS2 = kjParser.ParseRule( S, [R], echo )
RL1 = kjParser.ParseRule( L, [star, R])
RL2 = kjParser.ParseRule( L, [id])
RR1 = kjParser.ParseRule( R, [L] )
rs3 = ruleset(S, [RS1,RS2,RL1,RL2,RR1])
rs3.CompFirst()
rs3.CompFollow()
rs3.CompSLRNFA()
rs3.CompDFA()
#rs3.SLRFixDFA() # should fail and does.
# testing RULEGRAM
ObjG = NullCGrammar()
ObjG.Addterm("id","id",echo)
ObjG.Nonterms("T E Ep F Tp")
ObjG.Keywords("begin end")
ObjG.punct("+*()")
ObjG.comments(["--.*\n"])
# PROBLEM WITH COMMENTS???
Rulestr = """
## what a silly grammar!
T ::
@R One :: T >> begin E end
@R Three :: E >>
@R Two :: E >> E + T
@R Four :: E >> ( T )
"""
RL = RULEGRAM.DoParse1( Rulestr, ObjG )
=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser/kjParser.py === (1200/1300 lines abridged)
#
# python for parser interpretation
# Copyright Aaron Robert Watters, 1994
#
# BUGS:
# Lexical error handling is not nice
# Parse error handling is not nice
#
# Lex analysis may be slow for big grammars
# Setting case sensitivity for keywords MUST happen BEFORE
# declaration of keywords.
import kjSet
import string
import regex
import regsub
import string
# set this flag for regression testing at each load
RUNTESTS = 0
# set this flag to enable warning for default reductions
WARNONDEFAULTS = 0
# some local constants
TERMFLAG = -1 # FLAG FOR TERMINAL
NOMATCHFLAG = -2 # FLAG FOR NO MATCH IN FSM
MOVETOFLAG = -3 # FLAG FOR "SHIFT" IN SN FSM
REDUCEFLAG = -4 # FLAG FOR REDUCTION IN FSM
TRANSFLAG = -5 # FLAG FOR TRANSIENT STATE IN FSM
KEYFLAG = -6 # FLAG FOR KEYWORD
NONTERMFLAG = -7 # FLAG FOR NONTERMINAL
TERMFLAG = -8 # FLAG FOR TERMINAL
EOFFLAG = "*" # FLAG for End of file
# set this string to the Module name (filename)
# used for dumping reconstructable objects
THISMODULE = "kjParser"
# regular expression for matching whitespace
WHITERE = "["+string.whitespace+"]+"
WHITEREGEX = regex.compile(WHITERE)
# local errors
LexTokenError = "LexTokenError" # may happen on bad string
UnkTermError = "UnkTermError" # ditto
BadPunctError= "BadPunctError" # if try to make whitespace a punct
ParseInitError = "ParseInitError" # shouldn't happen?
#EOFError # may happen on bad string
[-=- -=- -=- 1200 lines omitted -=- -=- -=-]
for tokenindex in range(len(tokens)):
(kind,name) = tokens[tokenindex]
if kind == KEYFLAG:
tokens[tokenindex] = LexD.keyword(name)
elif not kind in [TERMFLAG, NONTERMFLAG]:
raise FlowError, "unknown token type"
# not needed
self.tokens = tokens
def MakeRules(self):
Grammar = self.Gram
Grammar.DFA.root_nonTerminal = self.Root
NameIndex = Grammar.RuleNameToIndex
RuleTuples = self.RuleTups
nRules = len(RuleTuples)
RuleList = [None] * nRules
for index in range(nRules):
(Name, Components) = RuleTuples[index]
rule = apply(ParseRule, Components)
rule.Name = Name
RuleList[index] = rule
NameIndex[Name] = index
Grammar.RuleL = RuleList
def MakeTransitions(self):
Grammar = self.Gram
DFA = Grammar.DFA
StateTokenMap = DFA.StateTokenMap
tokens = self.tokens
# record the state number
DFA.maxState = self.MaxStates
# this is historical, unfortunately... CLEAN IT UP SOMEDAY!
# THE DFA.States DICT IS NOT NEEDED (?) (here)
for state in range(1, self.MaxStates+1):
DFA.States[state] = [TRANSFLAG]
# record the reductions
for (fromState, TokenIndex, rulenum) in self.reducts:
DFA.SetReduction(fromState, tokens[TokenIndex], rulenum)
# record the transitions
for (fromState, TokenIndex, ToState) in self.moveTos:
DFA.SetMap(fromState, tokens[TokenIndex], ToState)
def Cleanup(self):
Grammar = self.Gram
Grammar.CleanUp()
################# FOLLOWING CODE IS FOR REGRESSION TESTING ONLY
################# DELETE IT IF YOU WANT/NEED
#### All tests for this module deleted, since
#### ParseBuild module tests are sufficient.
=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser/kjSet.py ===
#
# sets implemented using mappings
# Copyright Aaron Robert Watters, 1994
#
# these only work for "immutable" elements.
# probably not terribly efficient, but easy to implement
# and not as slow as concievably possible.
def NewSet(Sequence):
Result = {}
for Elt in Sequence:
Result[Elt] = 1
return Result
def Empty(Set):
if Set == {}:
return 1
else:
return 0
def get_elts(Set):
return Set.keys()
def member(Elt,Set):
return Set.has_key(Elt)
# in place mutators:
# returns if no change otherwise 1
def addMember(Elt,Set):
change = 0
if not Set.has_key(Elt):
Set[Elt] = 1
change = 1
return change
def Augment(Set, OtherSet):
change = 0
for Elt in OtherSet.keys():
if not Set.has_key(Elt):
Set[Elt] = 1
change = 1
return change
def Mask(Set, OtherSet):
change = 0
for Elt in OtherSet.keys():
if Set.has_key(Elt):
del Set[Elt]
change = 1
return change
# side effect free functions
def Intersection(Set1, Set2):
Result = {}
for Elt in Set1.keys():
if Set2.has_key(Elt):
Result[Elt] = 1
return Result
def Difference(Set1, Set2):
Result = {}
for Elt in Set1.keys():
if not Set2.has_key(Elt):
Result[Elt] = 1
return Result
def Union(Set1,Set2):
Result = {}
Augment(Result,Set1)
Augment(Result,Set2)
return Result
def Subset(Set1,Set2):
Result = 1
for Elt in Set1.keys():
if not Set2.has_key(Elt):
Result = 0
return Result # nonlocal
return Result
def Same(Set1,Set2):
if Subset(Set1,Set2) and Subset(Set2,Set1):
return 1
else:
return 0
# directed graphs as Dictionaries of Sets
# also only works for immutable nodes
def NewDG(pairlist):
Result = {}
for (source,dest) in pairlist:
AddArc(Result, source, dest)
return Result
def GetPairs(Graph):
result = []
Sources = Graph.keys()
for S in Sources:
Dests = get_elts( Graph[S] )
ThesePairs = [None] * len(Dests)
for i in range(0,len(Dests)):
D = Dests[i]
ThesePairs[i] = (S, D)
result = result + ThesePairs
return result
def AddArc(Graph, Source, Dest):
change = 0
if Graph.has_key(Source):
Adjacent = Graph[Source]
if not member(Dest,Adjacent):
addMember(Dest,Adjacent)
change = 1
else:
Graph[Source] = NewSet( [ Dest ] )
change = 1
return change
def Neighbors(Graph,Source):
if Graph.has_key(Source):
return get_elts(Graph[Source])
else:
return []
def HasArc(Graph, Source, Dest):
result = 0
if Graph.has_key(Source) and member(Dest, Graph[Source]):
result = 1
return result
def Sources(Graph):
return Graph.keys()
# when G1, G2 and G3 are different graphs this results in
# G1 = G1 U ( G2 o G3 )
# If G1 is identical to one of G2,G3 the result is somewhat
# nondeterministic (depends on dictionary implementation).
# However, guaranteed that AddComposition(G,G,G) returns
# G1 U (G1 o G1) <= G <= TC(G1)
# where G1 is G's original value and TC(G1) is its transitive closure
# hence this function can be used for brute force transitive closure
#
def AddComposition(G1, G2, G3):
change = 0
for G2Source in Sources(G2):
for Middle in Neighbors(G2,G2Source):
for G3Dest in Neighbors(G3, Middle):
if not HasArc(G1, G2Source, G3Dest):
change = 1
AddArc(G1, G2Source, G3Dest)
return change
# in place transitive closure of a graph
def TransClose(Graph):
change = AddComposition(Graph, Graph, Graph)
somechange = change
while change:
change = AddComposition(Graph, Graph, Graph)
if not somechange:
somechange = change
return somechange
########### SQueue stuff
#
# A GrabBag should be used to hold objects temporarily for future
# use. You can put things in and take them out, with autodelete
# that's all!
# make a new baggy with nothing in it
# BG[0] is insert cursor BG[1] is delete cursor, others are elts
#
OLD = 1
NEW = 0
START = 2
def NewBG():
B = [None]*8 #default size
B[OLD] = START
B[NEW] = START
return B
def BGempty(B):
# other ops must maintain this: old == new iff empty
return B[OLD] == B[NEW]
# may return new, larger structure
# must be used with assignment... B = BGadd(e,B)
def BGadd(elt, B):
cursor = B[NEW]
oldlen = len(B)
# look for an available position
while B[cursor] != None:
cursor = cursor+1
if cursor >= oldlen: cursor = START
if cursor == B[NEW]: #back to beginning
break
# resize if wrapped
if B[cursor] != None:
B = B + [None] * oldlen
cursor = oldlen
B[OLD] = START
if B[cursor] != None:
raise IndexError, "can't insert?"
# add the elt
B[cursor] = (elt,)
B[NEW] = cursor
# B nonempty so OLD and NEW should differ.
if B[OLD] == cursor:
B[NEW] = cursor + 1
if B[NEW]<=len(B): B[NEW] = START
return B
def BGgetdel(B):
# find something to delete:
cursor = B[OLD]
blen = len(B)
while B[cursor]==None:
cursor = cursor+1
if cursor>=blen: cursor = START
if cursor == B[OLD]: break # wrapped
if B[cursor] == None:
raise IndexError, "delete from empty grabbag(?)"
# test to see if bag is empty (position cursor2 at nonempty slot)
cursor2 = cursor+1
if cursor2>=blen: cursor2 = START
while B[cursor2]==None:
cursor2 = cursor2+1
if cursor2>=blen: cursor2 = START
# since B[cursor] not yet deleted while will terminate
# get and delete the elt
(result,) = B[cursor]
B[cursor] = None
# cursor == cursor2 iff bag is empty
B[OLD] = cursor2
if B[NEW] == cursor2: B[NEW] = cursor
return result
def BGtest(n):
B = NewBG()
rn = range(n)
rn2 = range(n-2)
for i in rn:
for j in rn:
B = BGadd( (i,j), B)
B = BGadd( (j,i), B)
x = BGgetdel(B)
for j in rn2:
y = BGgetdel(B)
print (i, x, y)
return B
=== Removed File Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser/Makefile ===