[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser/yapps2 - LICENSE:1.1.2.1 Makefile:1.1.2.1 NOTES:1.1.2.1 yapps2.py:1.1.2.1 yapps2.tex:1.1.2.1 yappsrt.py:1.1.2.1

Andreas Jung andreas@digicool.com
Fri, 11 Jan 2002 13:21:30 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser/yapps2
In directory cvs.zope.org:/tmp/cvs-serv12830/queryparser/yapps2

Added Files:
      Tag: ajung-textindexng-branch
	LICENSE Makefile NOTES yapps2.py yapps2.tex yappsrt.py 
Log Message:
added prelim. new parser for TextIndex queries


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser/yapps2/LICENSE ===
Permission is hereby granted, free of charge, to any person obtaining 
a copy of this software and associated documentation files (the 
"Software"), to deal in the Software without restriction, including 
without limitation the rights to use, copy, modify, merge, publish, 
distribute, sublicense, and/or sell copies of the Software, and to 
permit persons to whom the Software is furnished to do so, subject to 
the following conditions: 
  
The above copyright notice and this permission notice shall be included 
in all copies or substantial portions of the Software. 
  
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser/yapps2/Makefile ===
ALL: $(shell echo *.g examples/*.g | sed s/.g/.py/g )

%.py: %.g yapps2.py yappsrt.py Makefile
	python yapps2.py $<

DOC:	yapps2.ps

yapps2.ps: yapps2.dvi
	dvips -q yapps2.dvi -o yapps2.ps

yapps2.dvi: yapps2.tex
	latex yapps2.tex

DISTRIB: ALL
	zip -u yapps2.zip LICENSE yapps2.py yappsrt.py parsedesc.g examples/*.g NOTES yapps2.tex yapps2.ps Makefile
	scp yapps2.zip iswim.stanford.edu:www/yapps2.zip



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser/yapps2/NOTES ===
Debugging mode
Output to English?
Optimize unused variables
Optimize flag that would use global lists & dicts (for set membership)


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser/yapps2/yapps2.py === (674/774 lines abridged)
# Yapps 2.0 - yet another python parser system
# Amit J Patel, January 1999
# See http://theory.stanford.edu/~amitp/Yapps/ for documentation and updates

# v2.0.1 changes (October 2001):
# * The exceptions inherit the standard Exception class (thanks Rich Salz)
# * The scanner can use either a different set of regular expressions
#   per instance, or allows the subclass to define class fields with
#   the patterns.  This improves performance when many Scanner objects
#   are being created, because the regular expressions don't have to
#   be recompiled each time. (thanks Amaury Forgeot d'Arc)

from string import *
from yappsrt import *
import regex, re

INDENT = " "*4

class Generator:
    def __init__(self, name, options, tokens, rules):
        self.change_count = 0
        self.name = name
        self.options = options
        self.preparser = ''
        self.postparser = None
        
        self.tokens = {} # Map from tokens to regexps
        self.ignore = [] # List of token names to ignore in parsing
        self.terminals = [] # List of token names (to maintain ordering)
        for n,t in tokens:
            if n == '#ignore':
                n = t
                self.ignore.append(n)
            if n in self.tokens.keys() and self.tokens[n] != t:
                print 'Warning: token', n, 'multiply defined.'
            self.tokens[n] = t
            self.terminals.append(n)
            
        self.rules = {} # Map from rule names to parser nodes
        self.params = {} # Map from rule names to parameters
        self.goals = [] # List of rule names (to maintain ordering)
        for n,p,r in rules:
            self.params[n] = p
            self.rules[n] = r
            self.goals.append(n)
            
        import sys
        self.output = sys.stdout

    def __getitem__(self, name):

[-=- -=- -=- 674 lines omitted -=- -=- -=-]

    p = ParserDescription(ParserDescriptionScanner(s))
    if not p: return
    
    # Now parse the file
    t = wrap_error_reporter(p, 'Parser')
    if not t: return # Error
    if preparser is not None: t.preparser = preparser
    if postparser is not None: t.postparser = postparser

    # Check the options
    for f in t.options.keys():
        for opt,_,_ in yapps_options:
            if f == opt: break
        else:
            print 'Warning: unrecognized option', f
    # Add command line options to the set
    for f in flags.keys(): t.options[f] = flags[f]
            
    # Generate the output
    if dump:
        t.dump_information()
    else:
        t.output = open(outputfilename, 'w')
        t.generate_output()

if __name__=='__main__':
    import sys, getopt
    optlist, args = getopt.getopt(sys.argv[1:], 'f:', ['dump'])
    if not args or len(args) > 2:
        print 'Usage:'
        print '  python', sys.argv[0], '[flags] input.g [output.py]'
        print 'Flags:'
        print ('  --dump' + ' '*40)[:35] + 'Dump out grammar information'
        for flag, _, doc in yapps_options:
            print ('  -f' + flag + ' '*40)[:35] + doc
    else:
        # Read in the options and create a list of flags
	flags = {}
	for opt in optlist:
	    for flag, name, _ in yapps_options:
		if opt == ('-f', flag):
		    flags[name] = 1
		    break
	    else:
                if opt == ('--dump', ''):
                    flags['dump'] = 1
		else:
                    print 'Warning - unrecognized option:  ', opt[0], opt[1]

        apply(generate, tuple(args), flags)


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser/yapps2/yapps2.tex === (1140/1240 lines abridged)
\documentclass[10pt]{article}
\usepackage{palatino}
%\usepackage{html}
\newcommand{\htmladdnormallink}[2]{#1 $\langle$#2$\rangle$}
\usepackage{color}
\usepackage{xspace}

\setlength{\headsep}{0in}
\setlength{\headheight}{0in}
\setlength{\textheight}{8.5in}
\setlength{\textwidth}{5.9in}
\setlength{\oddsidemargin}{0.25in}

\definecolor{darkblue}{rgb}{0,0,0.6}
\definecolor{darkerblue}{rgb}{0,0,0.3}

\newcommand{\mysection}[1]{\section{\textcolor{darkblue}{\textsf{#1}}}}
\newcommand{\mysubsection}[1]{\subsection{\textcolor{darkerblue}{\textsf{#1}}}}

%\bodytext{bgcolor=white text=black link=#004080 vlink=#006020}

\newcommand{\first}{\textsc{first}\xspace}
\newcommand{\follow}{\textsc{follow}\xspace}

\begin{document}

\begin{center}
\hfill \begin{tabular}{c}
{\Large The \emph{Yapps} Parser Generator System}\\
\verb|http://theory.stanford.edu/~amitp/Yapps/|\\
                Version 2.0\\
\\
Amit J. Patel\\
\htmladdnormallink{}
{http://www-cs-students.stanford.edu/{\~{}}amitp/}

\end{tabular} \hfill \rule{0in}{0in}
\end{center}

\mysection{Introduction}

\emph{Yapps} (\underline{Y}et \underline{A}nother \underline{P}ython
\underline{P}arser \underline{S}ystem) is an easy to use parser
generator that is written in Python and generates Python code.  There
are several parser generator systems already available for Python,
including \texttt{PyLR, kjParsing, PyBison,} and \texttt{mcf.pars,}
but I had different goals for my parser.  Yapps is simple, is easy to
use, and produces human-readable parsers.  It is not the fastest or
most powerful parser.  Yapps is designed to be used when regular
expressions are not enough and other parser systems are too much:

[-=- -=- -=- 1140 lines omitted -=- -=- -=-]

   somewhat ugly.
 \item It would be nice to control choices with user-defined
   predicates.
 \item The most likely future extension is backtracking.  A grammar
   pattern like \verb|(VAR ':=' expr)? {{ return Assign(VAR,expr) }} : expr {{ return expr }}| would turn into code that attempted to
   match \verb|VAR ':=' expr|.  If it succeeded, it would run \verb|{{ return ... }}|.  If it failed, it would match \verb|expr {{ return expr }}|.  Backtracking may make it less necessary to
   write LL(2) grammars.
 \item The SRE module used in Python 1.6+ includes an undocumented
   feature that allows one to figure out which of many alternatives
   matched in a regular expression.  In Yapps 2, each token is checked
   separately.  With this feature of SRE, it's possible to construct a
   single regular expression that scans all possible tokens
   simultaneously.  However, there is a difficulty with generating
   this regular expression automatically.  Unlike the ``normal'' rules
   for regexp matches, alternatives are \emph{not} matched using the
   ``longest match'' rule.  Thus,
   \verb:re.match(r'foo|foobar','foobar').group(0): returns
   \verb:'foo':, but
   \verb:re.match(r'foobar|foo','foobar').group(0): returns
   \verb:'foobar':! This means that we have to sort the tokens so that
   the longest ones occur first.  However, this isn't easy to do.
\end{enumerate}

\mysection{References}

\begin{enumerate}
  \item ANTLR/PCCTS, by Terrence Parr, is available at\\
  \htmladdnormallink{The ANTLR Home Page}{http://www.antlr.org/}.
  
  \item PyLR, by Scott Cotton, is at \\
    \htmladdnormallink{his Starship
    page}{http://starship.skyport.net/crew/scott/PyLR.html}.
  
  \item John Aycock's \htmladdnormallink{Compiling Little Languages
      Framework}{http://www.csr.UVic.CA/{\~{}}aycock/python/}.

  \item PyBison, by Scott Hassan, can be found at\\
  \htmladdnormallink{his Python Projects
    page}{http://coho.stanford.edu/{\~{}}hassan/Python/}.
  
  \item mcf.pars, by Mike C. Fletcher, is available at\\
  \htmladdnormallink{his web
    page}{http://www.golden.net/{\~{}}mcfletch/programming/}.
  
  \item kwParsing, by Aaron Watters, is available at \\
  \htmladdnormallink{his Starship
    page}{http://starship.skyport.net/crew/aaron\_watters/kwParsing/}.
\end{enumerate}

\end{document}


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/queryparser/yapps2/yappsrt.py ===
# Yapps 2.0 Runtime
#
# This module is needed to run generated parsers.

from string import *
import exceptions
import re

class SyntaxError(Exception):
    """When we run into an unexpected token, this is the exception to use"""
    def __init__(self, pos=-1, msg="Bad Token"):
	self.pos = pos
	self.msg = msg
    def __repr__(self):
	if self.pos < 0: return "#<syntax-error>"
	else: return "SyntaxError[@ char " + `self.pos` + ": " + self.msg + "]"

class NoMoreTokens(Exception):
    """Another exception object, for when we run out of tokens"""
    pass

class Scanner:
    def __init__(self, patterns, ignore, input):
	"""Patterns is [(terminal,regex)...]
        Ignore is [terminal,...];
	Input is a string"""
	self.tokens = []
	self.restrictions = []
	self.input = input
	self.pos = 0
	self.ignore = ignore
	# The stored patterns are a pair (compiled regex,source
	# regex).  If the patterns variable passed in to the
	# constructor is None, we assume that the class already has a
	# proper .patterns list constructed
        if patterns is not None:
            self.patterns = []
            for k,r in patterns:
                self.patterns.append( (k, re.compile(r)) )
	
    def token(self, i, restrict=0):
	"""Get the i'th token, and if i is one past the end, then scan 
	for another token; restrict is a list of tokens that
	are allowed, or 0 for any token."""
	if i == len(self.tokens): self.scan(restrict)
	if i < len(self.tokens):
	    # Make sure the restriction is more restricted
	    if restrict and self.restrictions[i]:
		for r in restrict:
		    if r not in self.restrictions[i]:
			raise "Unimplemented: restriction set changed"
	    return self.tokens[i]
	raise NoMoreTokens()
    
    def __repr__(self):
	"""Print the last 10 tokens that have been scanned in"""
	output = ''
	for t in self.tokens[-10:]:
	    output = '%s\n  (@%s)  %s  =  %s' % (output,t[0],t[2],`t[3]`)
	return output
    
    def scan(self, restrict):
	"""Should scan another token and add it to the list, self.tokens,
	and add the restriction to self.restrictions"""
	# Keep looking for a token, ignoring any in self.ignore
	while 1:
	    # Search the patterns for the longest match, with earlier
	    # tokens in the list having preference
	    best_match = -1
	    best_pat = '(error)'
	    for p, regexp in self.patterns:
		# First check to see if we're ignoring this token
		if restrict and p not in restrict and p not in self.ignore: 
		    continue
		m = regexp.match(self.input, self.pos)
		if m and len(m.group(0)) > best_match:
		    # We got a match that's better than the previous one
		    best_pat = p
		    best_match = len(m.group(0))
		    
	    # If we didn't find anything, raise an error
	    if best_pat == '(error)' and best_match < 0:
		msg = "Bad Token"
		if restrict:
		    msg = "Trying to find one of "+join(restrict,", ")
		raise SyntaxError(self.pos, msg)

	    # If we found something that isn't to be ignored, return it
	    if best_pat not in self.ignore:
		# Create a token with this data
		token = (self.pos, self.pos+best_match, best_pat,
			 self.input[self.pos:self.pos+best_match])
		self.pos = self.pos + best_match
		# Only add this token if it's not in the list
		# (to prevent looping)
		if not self.tokens or token != self.tokens[-1]:
		    self.tokens.append(token)
		    self.restrictions.append(restrict)
		return
	    else:
		# This token should be ignored ..
		self.pos = self.pos + best_match

class Parser:
    def __init__(self, scanner):
        self._scanner = scanner
        self._pos = 0
        
    def _peek(self, *types):
        """Returns the token type for lookahead; if there are any args
        then the list of args is the set of token types to allow"""
        tok = self._scanner.token(self._pos, types)
        return tok[2]
        
    def _scan(self, type):
        """Returns the matched text, and moves to the next token"""
        tok = self._scanner.token(self._pos, [type])
        if tok[2] != type:
            raise SyntaxError(tok[0], 'Trying to find '+type)
        self._pos = 1+self._pos
        return tok[3]



def print_error(input, err, scanner):
    """This is a really dumb long function to print error messages nicely."""
    p = err.pos
    # Figure out the line number
    line = count(input[:p], '\n')
    print err.msg+" on line "+`line+1`+":"
    # Now try printing part of the line
    text = input[max(p-80,0):p+80]
    p = p - max(p-80,0)

    # Strip to the left
    i = rfind(text[:p],'\n')
    j = rfind(text[:p],'\r')
    if i < 0 or (j < i and j >= 0): i = j
    if i >= 0 and i < p: 
	p = p - i - 1
	text = text[i+1:]

    # Strip to the right
    i = find(text,'\n',p)
    j = find(text,'\r',p)
    if i < 0 or (j < i and j >= 0): i = j
    if i >= 0: 
	text = text[:i]

    # Now shorten the text
    while len(text) > 70 and p > 60:
	# Cut off 10 chars
	text = "..." + text[10:]
	p = p - 7

    # Now print the string, along with an indicator
    print '> ',text
    print '> ',' '*p + '^'
    print 'List of nearby tokens:', scanner

def wrap_error_reporter(parser, rule):
    try: return getattr(parser, rule)()
    except SyntaxError, s:
        input = parser._scanner.input
        try:
            print_error(input, s, parser._scanner)
        except ImportError:
            print 'Syntax Error',s.msg,'on line',1+count(input[:s.pos], '\n')
    except NoMoreTokens:
        print 'Could not complete parsing; stopped around here:'
        print parser._scanner