[ZPT] CVS: Packages/TAL - HTMLTALParser.py:1.19 README.txt:1.5 TALGenerator.py:1.22
guido@digicool.com
guido@digicool.com
Wed, 21 Mar 2001 17:49:37 -0500 (EST)
Update of /cvs-repository/Packages/TAL
In directory korak:/tmp/cvs-serv18168
Modified Files:
HTMLTALParser.py README.txt TALGenerator.py
Log Message:
Pile of major changes -- the tests should all succeed again now:
- NestingError derives from HTMLParseError, and is hence simplified.
- Thread the input position through all the code generation routines;
all compile-time exceptions now possess lineno and offset.
- Restructured the code that inserts implied end-tags, and made it
generate output that is the same as the input more often. This was
the hardest to get right, and I expect to be working more on it.
--- Updated File HTMLTALParser.py in package Packages/TAL --
--- HTMLTALParser.py 2001/03/19 22:57:07 1.18
+++ HTMLTALParser.py 2001/03/21 22:49:37 1.19
@@ -91,7 +91,7 @@
from TALGenerator import TALGenerator
from TALDefs import ZOPE_METAL_NS, ZOPE_TAL_NS, METALError, TALError
-from HTMLParser import HTMLParser
+from HTMLParser import HTMLParser, HTMLParseError
BOOLEAN_HTML_ATTRS = [
# List of Boolean attributes in HTML that may be given in
@@ -135,20 +135,14 @@
+ BLOCK_CLOSING_TAG_MAP.keys())
-class NestingError(Exception):
+class NestingError(HTMLParseError):
"""Exception raised when elements aren't properly nested."""
- def __init__(self, tag, lineno, offset):
+ def __init__(self, tag, position=(None, None)):
self.tag = tag
- self.lineno = lineno
- self.offset = offset
+ HTMLParseError.__init__(self, "unmatched </%s>" % tag, position)
- def __str__(self):
- s = "line %d, offset %d: unmatched </%s>" % (
- self.lineno, self.offset, self.tag)
- return s
-
class HTMLTALParser(HTMLParser):
# External API
@@ -156,7 +150,7 @@
def __init__(self, gen=None):
HTMLParser.__init__(self)
if gen is None:
- gen = TALGenerator()
+ gen = TALGenerator(xml=0)
self.gen = gen
self.tagstack = []
self.nsstack = []
@@ -172,7 +166,7 @@
self.feed(data)
self.close()
while self.tagstack:
- self.finish_endtag(self.tagstack[-1])
+ self.implied_endtag(self.tagstack[-1], 2)
assert self.nsstack == [], self.nsstack
assert self.nsdict == {}, self.nsdict
@@ -182,12 +176,43 @@
# Overriding HTMLParser methods
def finish_starttag(self, tag, attrs):
+ self.close_para_tags(tag)
+ self.tagstack.append(tag)
+ self.scan_xmlns(attrs)
+ attrlist, taldict, metaldict = self.extract_attrs(attrs)
+ self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
+ self.getpos())
+ if tag in EMPTY_HTML_TAGS:
+ self.implied_endtag(tag, -1)
+
+ def finish_startendtag(self, tag, attrs):
+ self.close_para_tags(tag)
self.scan_xmlns(attrs)
+ attrlist, taldict, metaldict = self.extract_attrs(attrs)
+ if taldict.get("replace") or taldict.get("content"):
+ self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
+ self.getpos())
+ self.gen.emitEndElement(tag)
+ else:
+ self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
+ self.getpos(), isend=1)
+ self.pop_xmlns()
+
+ def finish_endtag(self, tag):
if tag in EMPTY_HTML_TAGS:
- self.pop_xmlns()
- elif BLOCK_CLOSING_TAG_MAP.has_key(tag):
+ # </img> etc. in the source is an error
+ raise NestingError(tag, self.getpos())
+ self.close_enclosed_tags(tag)
+ self.gen.emitEndElement(tag)
+ self.pop_xmlns()
+ self.tagstack.pop()
+
+ def close_para_tags(self, tag):
+ if tag in EMPTY_HTML_TAGS:
+ return
+ close_to = -1
+ if BLOCK_CLOSING_TAG_MAP.has_key(tag):
blocks_to_close = BLOCK_CLOSING_TAG_MAP[tag]
- close_to = -1
for i in range(len(self.tagstack)):
t = self.tagstack[i]
if t in blocks_to_close:
@@ -195,51 +220,39 @@
close_to = i
elif t in BLOCK_LEVEL_HTML_TAGS:
close_to = -1
- self.close_to_level(close_to)
- self.tagstack.append(tag)
elif tag in PARA_LEVEL_HTML_TAGS + BLOCK_LEVEL_HTML_TAGS:
- close_to = -1
for i in range(len(self.tagstack)):
if self.tagstack[i] in BLOCK_LEVEL_HTML_TAGS:
close_to = -1
elif self.tagstack[i] in PARA_LEVEL_HTML_TAGS:
if close_to == -1:
close_to = i
- self.close_to_level(close_to)
- self.tagstack.append(tag)
- else:
- self.tagstack.append(tag)
- attrlist, taldict, metaldict = self.extract_attrs(attrs)
- self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
- self.getpos())
+ if close_to >= 0:
+ while len(self.tagstack) > close_to:
+ self.implied_endtag(self.tagstack[-1], 1)
- def finish_endtag(self, tag, implied=0):
- if tag in EMPTY_HTML_TAGS:
- return
+ def close_enclosed_tags(self, tag):
if tag not in self.tagstack:
- lineno, offset = self.getpos()
- raise NestingError(tag, lineno, offset)
- while self.tagstack[-1] != tag:
- self.finish_endtag(self.tagstack[-1], implied=1)
+ raise NestingError(tag, self.getpos())
+ while tag != self.tagstack[-1]:
+ self.implied_endtag(self.tagstack[-1], 1)
+ assert self.tagstack[-1] == tag
+
+ def implied_endtag(self, tag, implied):
+ assert tag == self.tagstack[-1]
+ assert implied in (-1, 1, 2)
+ if implied > 0:
+ if tag in TIGHTEN_IMPLICIT_CLOSE_TAGS:
+ # Pick out trailing whitespace from the program, and
+ # insert the close tag before the whitespace.
+ white = self.gen.unEmitWhitespace()
+ self.gen.emitEndElement(tag)
+ if white:
+ self.gen.emitRawText(white)
+ else:
+ self.gen.emitEndElement(tag)
self.tagstack.pop()
self.pop_xmlns()
- if implied \
- and tag in TIGHTEN_IMPLICIT_CLOSE_TAGS \
- and self.gen.program \
- and self.gen.program[-1][0] == "rawtext":
- # Pick out trailing whitespace from the last instruction,
- # if it was a "rawtext" instruction, and insert the close
- # tag before the whitespace.
- data = self.gen.program.pop()[1]
- prefix = string.rstrip(data)
- white = data[len(prefix):]
- if data:
- self.gen.emitRawText(prefix)
- self.gen.emitEndElement(tag)
- if white:
- self.gen.emitRawText(white)
- else:
- self.gen.emitEndElement(tag)
def handle_charref(self, name):
self.gen.emitRawText("&#%s;" % name)
@@ -301,10 +314,3 @@
taldict[suffix] = value
attrlist.append(item)
return attrlist, taldict, metaldict
-
- def close_to_level(self, close_to):
- if close_to > -1:
- closing = self.tagstack[close_to:]
- closing.reverse()
- for t in closing:
- self.finish_endtag(t, implied=1)
--- Updated File README.txt in package Packages/TAL --
--- README.txt 2001/03/20 23:05:12 1.4
+++ README.txt 2001/03/21 22:49:37 1.5
@@ -80,19 +80,11 @@
- Bring DummyEngine.py up to specs.
-- Disallow TAL on start-tags whose end-tag is implied.
-
-- The call to emitSubstitution() in emitEndElement() for "replace"
- doesn't pass in anything for attrDict.
-
- Finish implementing insertStructure(): attribute replacement isn't
implemented yet.
- TALInterpreter currently always uses an XML parser to parse inserted
structure; it should use a parser appropriate to the mode.
-
-- Incorporate line number and offset information into remaining
- compile-time exceptions.
- HTMLTALParser.py and TALParser.py are silly names. Should be
HTMLTALCompiler.py and XMLTALCompiler.py (or maybe shortened,
--- Updated File TALGenerator.py in package Packages/TAL --
--- TALGenerator.py 2001/03/17 04:06:53 1.21
+++ TALGenerator.py 2001/03/21 22:49:37 1.22
@@ -99,7 +99,7 @@
class TALGenerator:
- def __init__(self, expressionCompiler=None):
+ def __init__(self, expressionCompiler=None, xml=1):
if not expressionCompiler:
expressionCompiler = DummyCompiler()
self.expressionCompiler = expressionCompiler
@@ -109,6 +109,7 @@
self.macros = {}
self.slots = {}
self.slotStack = []
+ self.xml = xml
def getCode(self):
return self.optimize(self.program), self.macros
@@ -186,11 +187,15 @@
def emit(self, *instruction):
self.program.append(instruction)
- def emitStartTag(self, name, attrlist):
- self.program.append(("startTag", name, attrlist))
+ def emitStartTag(self, name, attrlist, isend=0):
+ if isend:
+ opcode = "startEndTag"
+ else:
+ opcode = "startTag"
+ self.program.append((opcode, name, attrlist))
def emitEndTag(self, name):
- if self.program and self.program[-1][0] == "startTag":
+ if self.xml and self.program and self.program[-1][0] == "startTag":
# Minimize empty element
self.program[-1] = ("startEndTag",) + self.program[-1][1:]
else:
@@ -207,8 +212,7 @@
m = re.match(
r"(?s)\s*(?:(global|local)\s+)?(%s)\s+(.*)\Z" % NAME_RE, part)
if not m:
- raise TALError("invalid define syntax: " + `part`,
- position)
+ raise TALError("invalid define syntax: " + `part`, position)
scope, name, expr = m.group(1, 2, 3)
scope = scope or "local"
cexpr = self.compileExpression(expr)
@@ -222,19 +226,19 @@
program = self.popProgram()
self.emit("condition", cexpr, program)
- def emitRepeat(self, arg):
+ def emitRepeat(self, arg, position=(None, None)):
m = re.match("(?s)\s*(%s)\s+(.*)\Z" % NAME_RE, arg)
if not m:
- raise TALError("invalid repeat syntax: " + `repeat`)
+ raise TALError("invalid repeat syntax: " + `repeat`, position)
name, expr = m.group(1, 2)
cexpr = self.compileExpression(expr)
program = self.popProgram()
self.emit("loop", name, cexpr, program)
- def emitSubstitution(self, arg, attrDict={}):
+ def emitSubstitution(self, arg, attrDict={}, position=(None, None)):
key, expr = parseSubstitution(arg)
if not key:
- raise TALError("Bad syntax in insert/replace: " + `arg`)
+ raise TALError("Bad syntax in content/replace: " + `arg`, position)
cexpr = self.compileExpression(expr)
program = self.popProgram()
if key == "text":
@@ -243,10 +247,11 @@
assert key == "structure"
self.emit("insertStructure", cexpr, attrDict, program)
- def emitDefineMacro(self, macroName):
+ def emitDefineMacro(self, macroName, position=(None, None)):
program = self.popProgram()
if self.macros.has_key(macroName):
- raise METALError("duplicate macro definition: %s" % macroName)
+ raise METALError("duplicate macro definition: %s" % macroName,
+ position)
self.macros[macroName] = program
self.emit("defineMacro", macroName, program)
@@ -259,13 +264,36 @@
program = self.popProgram()
self.emit("defineSlot", slotName, program)
- def emitFillSlot(self, slotName):
+ def emitFillSlot(self, slotName, position=(None, None)):
program = self.popProgram()
if self.slots.has_key(slotName):
- raise METALError("duplicate slot definition: %s" % slotName)
+ raise METALError("duplicate slot definition: %s" % slotName,
+ position)
self.slots[slotName] = program
self.emit("fillSlot", slotName, program)
+ def unEmitWhitespace(self):
+ collect = []
+ i = len(self.program) - 1
+ while i >= 0:
+ item = self.program[i]
+ if item[0] != "rawtext":
+ break
+ text = item[1]
+ if not re.match(r"\A\s*\Z", text):
+ break
+ collect.append(text)
+ i = i-1
+ del self.program[i+1:]
+ if i >= 0 and self.program[i][0] == "rawtext":
+ text = self.program[i][1]
+ m = re.search(r"\s+\Z", text)
+ if m:
+ self.program[i] = ("rawtext", text[:m.start()])
+ collect.append(m.group())
+ collect.reverse()
+ return string.join(collect, "")
+
def unEmitNewlineWhitespace(self):
collect = []
i = len(self.program)
@@ -306,7 +334,7 @@
return newlist
def emitStartElement(self, name, attrlist, taldict, metaldict,
- position=(None, None)):
+ position=(None, None), isend=0):
for key in taldict.keys():
if key not in KNOWN_TAL_ATTRIBUTES:
raise TALError("bad TAL attribute: " + `key`, position)
@@ -380,46 +408,56 @@
if replace:
todo["repldict"] = repldict
repldict = {}
- self.emitStartTag(name, self.replaceAttrs(attrlist, repldict))
+ self.emitStartTag(name, self.replaceAttrs(attrlist, repldict), isend)
if content:
self.pushProgram()
+ if todo and position != (None, None):
+ todo["position"] = position
self.todoPush(todo)
+ if isend:
+ self.emitEndElement(name, isend)
- def emitEndElement(self, name):
+ def emitEndElement(self, name, isend=0):
todo = self.todoPop()
if not todo:
# Shortcut
- self.emitEndTag(name)
+ if not isend:
+ self.emitEndTag(name)
return
+
+ position = todo.get("position", (None, None))
+ defineMacro = todo.get("defineMacro")
+ useMacro = todo.get("useMacro")
+ defineSlot = todo.get("defineSlot")
+ fillSlot = todo.get("fillSlot")
content = todo.get("content")
- if content:
- self.emitSubstitution(content)
- self.emitEndTag(name)
repeat = todo.get("repeat")
+ replace = todo.get("replace")
+ condition = todo.get("condition")
+ define = todo.get("define")
+ repldict = todo.get("repldict", {})
+
+ if content:
+ self.emitSubstitution(content, {}, position)
+ if not isend:
+ self.emitEndTag(name)
if repeat:
- self.emitRepeat(repeat)
+ self.emitRepeat(repeat, position)
self.emit("endScope")
- replace = todo.get("replace")
if replace:
- repldict = todo.get("repldict", {})
- self.emitSubstitution(replace, repldict)
- condition = todo.get("condition")
+ self.emitSubstitution(replace, repldict, position)
if condition:
self.emitCondition(condition)
- if todo.get("define"):
+ if define:
self.emit("endScope")
- defineMacro = todo.get("defineMacro")
- useMacro = todo.get("useMacro")
- defineSlot = todo.get("defineSlot")
- fillSlot = todo.get("fillSlot")
if defineMacro:
- self.emitDefineMacro(defineMacro)
+ self.emitDefineMacro(defineMacro, position)
if useMacro:
self.emitUseMacro(useMacro)
if defineSlot:
self.emitDefineSlot(defineSlot)
if fillSlot:
- self.emitFillSlot(fillSlot)
+ self.emitFillSlot(fillSlot, position)
def test():
t = TALGenerator()