[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser - kjParseBuild.py:1.1.2.2 kjParser.py:1.1.2.2 kjSet.py:1.1.2.2
Andreas Jung
andreas@digicool.com
Thu, 7 Feb 2002 15:35:42 -0500
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser
In directory cvs.zope.org:/tmp/cvs-serv29733
Modified Files:
Tag: ajung-textindexng-branch
kjParseBuild.py kjParser.py kjSet.py
Log Message:
replaced regex by re module (thanks to Stephan Richter)
(taken from the ZOQLMethod package)
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser/kjParseBuild.py 1.1.2.1 => 1.1.2.2 === (2397/2497 lines abridged)
+
# python code for building a parser from a grammar
# Copyright Aaron Robert Watters, 1994
#
@@ -14,12 +14,10 @@
import string
import kjSet
import kjParser
-import regex
# import some constants
-from kjParser import \
- TERMFLAG, NOMATCHFLAG, MOVETOFLAG, REDUCEFLAG, TRANSFLAG, KEYFLAG, \
- NONTERMFLAG, TERMFLAG, EOFFLAG, ENDOFFILETOKEN
+from kjParser import TERMFLAG, NOMATCHFLAG, MOVETOFLAG, REDUCEFLAG, \
+ TRANSFLAG, KEYFLAG, NONTERMFLAG, TERMFLAG, EOFFLAG, ENDOFFILETOKEN
PMODULE = kjParser.THISMODULE
@@ -41,121 +39,122 @@
#
class CFSMachine(kjParser.FSMachine):
- def __init__(self, nonterm):
- kjParser.FSMachine.__init__(self, nonterm)
+ def __init__(self, nonterm):
+ kjParser.FSMachine.__init__(self, nonterm)
- # return the epsilon closure of the FSM as a new FSM
- #
- # DoNullMap, if set, will map unexpected tokens to
- # the "empty" state (usually creating a really big fsm)
- #
- def Eclosure(self, Epsilon, DoNullMaps=0):
- Closure = CFSMachine( self.root_nonTerminal )
+
+ # return the epsilon closure of the FSM as a new FSM
+ #
+ # DoNullMap, if set, will map unexpected tokens to
+ # the "empty" state (usually creating a really big fsm)
+ #
+ def Eclosure(self, Epsilon, DoNullMaps=0):
+ Closure = CFSMachine( self.root_nonTerminal )
+
+ # compute the Epsilon Graph between states
+ EGraph = kjSet.NewDG([])
+ for State in range(0,self.maxState+1):
+ # every state is E-connected to self
+ kjSet.AddArc( EGraph, State, State )
+ # add possible transition on epsilon (ONLY ONE SUPPORTED!)
[-=- -=- -=- 2397 lines omitted -=- -=- -=-]
- L = kjParser.nonterminal("L")
- R = kjParser.nonterminal("R")
- RS1 = kjParser.ParseRule( S, [L, equals, R] )
- RS2 = kjParser.ParseRule( S, [R], echo )
- RL1 = kjParser.ParseRule( L, [star, R])
- RL2 = kjParser.ParseRule( L, [id])
- RR1 = kjParser.ParseRule( R, [L] )
- rs3 = ruleset(S, [RS1,RS2,RL1,RL2,RR1])
- rs3.CompFirst()
- rs3.CompFollow()
- rs3.CompSLRNFA()
- rs3.CompDFA()
- #rs3.SLRFixDFA() # should fail and does.
-
- # testing RULEGRAM
- ObjG = NullCGrammar()
- ObjG.Addterm("id","id",echo)
- ObjG.Nonterms("T E Ep F Tp")
- ObjG.Keywords("begin end")
- ObjG.punct("+*()")
- ObjG.comments(["--.*\n"])
- # PROBLEM WITH COMMENTS???
- Rulestr = """
- ## what a silly grammar!
- T ::
- @R One :: T >> begin E end
- @R Three :: E >>
- @R Two :: E >> E + T
- @R Four :: E >> ( T )
- """
- RL = RULEGRAM.DoParse1( Rulestr, ObjG )
+ # testing RULEGRAM
+ ObjG = NullCGrammar()
+ ObjG.Addterm("id","id",echo)
+ ObjG.Nonterms("T E Ep F Tp")
+ ObjG.Keywords("begin end")
+ ObjG.punct("+*()")
+ ObjG.comments(["--.*\n"])
+ # PROBLEM WITH COMMENTS???
+ Rulestr = """## what a silly grammar!
+ T ::
+ @R One :: T >> begin E end
+ @R Three :: E >>
+ @R Two :: E >> E + T
+ @R Four :: E >> ( T )
+ """
+ RL = RULEGRAM.DoParse1( Rulestr, ObjG )
+
+
+
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser/kjParser.py 1.1.2.1 => 1.1.2.2 === (2061/2161 lines abridged)
import kjSet
import string
-import regex
-import regsub
-import string
+from string import joinfields, whitespace
+
+import re
+
# set this flag for regression testing at each load
RUNTESTS = 0
@@ -40,7 +41,8 @@
# regular expression for matching whitespace
WHITERE = "["+string.whitespace+"]+"
-WHITEREGEX = regex.compile(WHITERE)
+WHITEREGEX = re.compile(WHITERE)
+
# local errors
LexTokenError = "LexTokenError" # may happen on bad string
@@ -64,13 +66,15 @@
# utility function for error diagnostics
def DumpStringWindow(Str, Pos, Offset=15):
- L = []
- L.append("near ::")
- start = Pos-Offset
- end = Pos+Offset
- if start<0: start = 0
- if end>len(Str): end = len(Str)
- L.append(`Str[start:Pos]`+"*"+`Str[Pos:end]`)
+ L = [""]
+ lines = len(string.split(Str[:Pos], '\n'))+1
+ L.append("Syntax Error on line %i." %lines)
+ start = Pos - Offset
+ end = Pos + Offset
+ if start < 0: start = 0
+ if end > len(Str): end = len(Str)
+ L.append(`Str[start:end]`)
+ L.append(' '*(Pos-start+1)+'^')
from string import join
return join(L, "\n")
@@ -137,403 +141,433 @@
#
class LexDictionary:
- def __init__(self):
[-=- -=- -=- 2061 lines omitted -=- -=- -=-]
+ LexTokens = {}
+ tokens = self.tokens
+ for tokenindex in range(len(tokens)):
+ (kind,name) = tokens[tokenindex]
+ if kind == KEYFLAG:
+ tokens[tokenindex] = LexD.keyword(name)
+ elif not kind in [TERMFLAG, NONTERMFLAG]:
+ raise FlowError, "unknown token type"
+ # not needed
+ self.tokens = tokens
+
+
+ def MakeRules(self):
+ Grammar = self.Gram
+ Grammar.DFA.root_nonTerminal = self.Root
+ NameIndex = Grammar.RuleNameToIndex
+ RuleTuples = self.RuleTups
+ nRules = len(RuleTuples)
+ RuleList = [None] * nRules
+ for index in range(nRules):
+ (Name, Components) = RuleTuples[index]
+ rule = apply(ParseRule, Components)
+ rule.Name = Name
+ RuleList[index] = rule
+ NameIndex[Name] = index
+ Grammar.RuleL = RuleList
+
+
+ def MakeTransitions(self):
+ Grammar = self.Gram
+ DFA = Grammar.DFA
+ StateTokenMap = DFA.StateTokenMap
+ tokens = self.tokens
+ # record the state number
+ DFA.maxState = self.MaxStates
+ # this is historical, unfortunately... CLEAN IT UP SOMEDAY!
+ # THE DFA.States DICT IS NOT NEEDED (?) (here)
+ for state in range(1, self.MaxStates+1):
+ DFA.States[state] = [TRANSFLAG]
+ # record the reductions
+ for (fromState, TokenIndex, rulenum) in self.reducts:
+ DFA.SetReduction(fromState, tokens[TokenIndex], rulenum)
+ # record the transitions
+ for (fromState, TokenIndex, ToState) in self.moveTos:
+ DFA.SetMap(fromState, tokens[TokenIndex], ToState)
+
+
+ def Cleanup(self):
+ Grammar = self.Gram
+ Grammar.CleanUp()
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser/kjSet.py 1.1.2.1 => 1.1.2.2 ===
-#
# sets implemented using mappings
# Copyright Aaron Robert Watters, 1994
#
@@ -13,34 +11,39 @@
Result[Elt] = 1
return Result
+
def Empty(Set):
if Set == {}:
- return 1
+ return 1
else:
- return 0
+ return 0
+
def get_elts(Set):
return Set.keys()
+
def member(Elt,Set):
return Set.has_key(Elt)
+
# in place mutators:
# returns if no change otherwise 1
def addMember(Elt,Set):
change = 0
if not Set.has_key(Elt):
- Set[Elt] = 1
- change = 1
+ Set[Elt] = 1
+ change = 1
return change
+
def Augment(Set, OtherSet):
change = 0
for Elt in OtherSet.keys():
if not Set.has_key(Elt):
- Set[Elt] = 1
- change = 1
+ Set[Elt] = 1
+ change = 1
return change
@@ -48,45 +51,51 @@
change = 0
for Elt in OtherSet.keys():
if Set.has_key(Elt):
- del Set[Elt]
- change = 1
+ del Set[Elt]
+ change = 1
return change
+
# side effect free functions
def Intersection(Set1, Set2):
Result = {}
for Elt in Set1.keys():
if Set2.has_key(Elt):
- Result[Elt] = 1
+ Result[Elt] = 1
return Result
+
def Difference(Set1, Set2):
Result = {}
for Elt in Set1.keys():
if not Set2.has_key(Elt):
- Result[Elt] = 1
+ Result[Elt] = 1
return Result
+
def Union(Set1,Set2):
Result = {}
Augment(Result,Set1)
Augment(Result,Set2)
return Result
+
def Subset(Set1,Set2):
Result = 1
for Elt in Set1.keys():
if not Set2.has_key(Elt):
- Result = 0
- return Result # nonlocal
+ Result = 0
+ return Result # nonlocal
return Result
+
def Same(Set1,Set2):
if Subset(Set1,Set2) and Subset(Set2,Set1):
- return 1
+ return 1
else:
- return 0
+ return 0
+
# directed graphs as Dictionaries of Sets
# also only works for immutable nodes
@@ -109,6 +118,7 @@
result = result + ThesePairs
return result
+
def AddArc(Graph, Source, Dest):
change = 0
if Graph.has_key(Source):
@@ -121,21 +131,25 @@
change = 1
return change
+
def Neighbors(Graph,Source):
if Graph.has_key(Source):
- return get_elts(Graph[Source])
+ return get_elts(Graph[Source])
else:
- return []
+ return []
+
def HasArc(Graph, Source, Dest):
result = 0
if Graph.has_key(Source) and member(Dest, Graph[Source]):
- result = 1
+ result = 1
return result
+
def Sources(Graph):
return Graph.keys()
+
# when G1, G2 and G3 are different graphs this results in
# G1 = G1 U ( G2 o G3 )
# If G1 is identical to one of G2,G3 the result is somewhat
@@ -151,20 +165,22 @@
for Middle in Neighbors(G2,G2Source):
for G3Dest in Neighbors(G3, Middle):
if not HasArc(G1, G2Source, G3Dest):
- change = 1
- AddArc(G1, G2Source, G3Dest)
+ change = 1
+ AddArc(G1, G2Source, G3Dest)
return change
+
# in place transitive closure of a graph
def TransClose(Graph):
change = AddComposition(Graph, Graph, Graph)
somechange = change
while change:
- change = AddComposition(Graph, Graph, Graph)
- if not somechange:
- somechange = change
+ change = AddComposition(Graph, Graph, Graph)
+ if not somechange:
+ somechange = change
return somechange
+
########### SQueue stuff
#
# A GrabBag should be used to hold objects temporarily for future
@@ -183,10 +199,12 @@
B[NEW] = START
return B
+
def BGempty(B):
# other ops must maintain this: old == new iff empty
return B[OLD] == B[NEW]
+
# may return new, larger structure
# must be used with assignment... B = BGadd(e,B)
def BGadd(elt, B):
@@ -194,43 +212,51 @@
oldlen = len(B)
# look for an available position
while B[cursor] != None:
- cursor = cursor+1
- if cursor >= oldlen: cursor = START
- if cursor == B[NEW]: #back to beginning
- break
+ cursor = cursor+1
+ if cursor >= oldlen: cursor = START
+ if cursor == B[NEW]: #back to beginning
+ break
+
# resize if wrapped
if B[cursor] != None:
- B = B + [None] * oldlen
- cursor = oldlen
- B[OLD] = START
+ B = B + [None] * oldlen
+ cursor = oldlen
+ B[OLD] = START
+
if B[cursor] != None:
- raise IndexError, "can't insert?"
+ raise IndexError, "can't insert?"
+
# add the elt
B[cursor] = (elt,)
B[NEW] = cursor
+
# B nonempty so OLD and NEW should differ.
if B[OLD] == cursor:
- B[NEW] = cursor + 1
- if B[NEW]<=len(B): B[NEW] = START
+ B[NEW] = cursor + 1
+ if B[NEW]<=len(B): B[NEW] = START
return B
+
def BGgetdel(B):
# find something to delete:
cursor = B[OLD]
blen = len(B)
while B[cursor]==None:
- cursor = cursor+1
- if cursor>=blen: cursor = START
- if cursor == B[OLD]: break # wrapped
+ cursor = cursor+1
+ if cursor>=blen: cursor = START
+ if cursor == B[OLD]: break # wrapped
+
if B[cursor] == None:
- raise IndexError, "delete from empty grabbag(?)"
+ raise IndexError, "delete from empty grabbag(?)"
# test to see if bag is empty (position cursor2 at nonempty slot)
cursor2 = cursor+1
if cursor2>=blen: cursor2 = START
+
while B[cursor2]==None:
- cursor2 = cursor2+1
- if cursor2>=blen: cursor2 = START
- # since B[cursor] not yet deleted while will terminate
+ cursor2 = cursor2+1
+ if cursor2>=blen: cursor2 = START
+ # since B[cursor] not yet deleted while will terminate
+
# get and delete the elt
(result,) = B[cursor]
B[cursor] = None
@@ -238,6 +264,7 @@
B[OLD] = cursor2
if B[NEW] == cursor2: B[NEW] = cursor
return result
+
def BGtest(n):
B = NewBG()