[ZPT] CVS: Packages/TAL - HTMLParser.py:1.8 HTMLTALParser.py:1.20
guido@digicool.com
guido@digicool.com
Thu, 22 Mar 2001 12:54:14 -0500 (EST)
Update of /cvs-repository/Packages/TAL
In directory korak:/tmp/cvs-serv30150
Modified Files:
HTMLParser.py HTMLTALParser.py
Log Message:
Another round of cleanup. Change finish_ to handle_.
--- Updated File HTMLParser.py in package Packages/TAL --
--- HTMLParser.py 2001/03/22 17:16:24 1.7
+++ HTMLParser.py 2001/03/22 17:54:13 1.8
@@ -296,9 +296,9 @@
(lineno, offset))
if end[-2:] == '/>':
# XHTML-style empty tag: <span attr="value" />
- self.finish_startendtag(tag, attrs)
+ self.handle_startendtag(tag, attrs)
else:
- self.finish_starttag(tag, attrs)
+ self.handle_starttag(tag, attrs)
return endpos
# Internal -- parse endtag, return end or -1 if incomplete
@@ -312,122 +312,47 @@
tag = string.lower(string.strip(rawdata[i+2:j-1]))
if not tag:
raise HTMLParseError("empty start tag", self.getpos())
- self.finish_endtag(tag)
+ self.handle_endtag(tag)
return j
# Overridable -- finish processing of start+end tag: <tag.../>
- def finish_startendtag(self, tag, attrs):
- self.finish_starttag(tag, attrs)
- self.finish_endtag(tag)
-
- # Overridable -- finish processing of start tag
- def finish_starttag(self, tag, attrs):
- try:
- method = getattr(self, 'start_' + tag)
- except AttributeError:
- try:
- method = getattr(self, 'do_' + tag)
- except AttributeError:
- self.unknown_starttag(tag, attrs)
- else:
- self.handle_starttag(tag, method, attrs)
- else:
- self.stack.append(tag)
- self.handle_starttag(tag, method, attrs)
-
- # Overridable -- finish processing of end tag
- def finish_endtag(self, tag):
- if not tag:
- found = len(self.stack) - 1
- if found < 0:
- self.unknown_endtag(tag)
- return
- else:
- if tag not in self.stack:
- try:
- method = getattr(self, 'end_' + tag)
- except AttributeError:
- self.unknown_endtag(tag)
- else:
- self.report_unbalanced(tag)
- return
- found = len(self.stack)
- for i in range(found):
- if self.stack[i] == tag: found = i
- while len(self.stack) > found:
- tag = self.stack[-1]
- try:
- method = getattr(self, 'end_' + tag)
- except AttributeError:
- method = None
- if method:
- self.handle_endtag(tag, method)
- else:
- self.unknown_endtag(tag)
- del self.stack[-1]
+ def handle_startendtag(self, tag, attrs):
+ self.handle_starttag(tag, attrs)
+ self.handle_endtag(tag)
# Overridable -- handle start tag
- def handle_starttag(self, tag, method, attrs):
- method(attrs)
+ def handle_starttag(self, tag, attrs):
+ pass
# Overridable -- handle end tag
- def handle_endtag(self, tag, method):
- method()
-
- # Example -- report an unbalanced </...> tag.
- def report_unbalanced(self, tag):
- if self.verbose:
- print '*** Unbalanced </' + tag + '>'
- print '*** Stack:', self.stack
+ def handle_endtag(self, tag):
+ pass
- # Example -- handle character reference, no need to override
+ # Overridable -- handle character reference
def handle_charref(self, name):
- try:
- n = int(name)
- except ValueError:
- self.unknown_charref(name)
- return
- if not 0 <= n <= 255:
- self.unknown_charref(name)
- return
- self.handle_data(chr(n))
-
- # Definition of entities -- derived classes may override
- entitydefs = \
- {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
+ pass
- # Example -- handle entity reference, no need to override
+ # Overridable -- handle entity reference
def handle_entityref(self, name):
- table = self.entitydefs
- if table.has_key(name):
- self.handle_data(table[name])
- else:
- self.unknown_entityref(name)
- return
+ pass
- # Example -- handle data, should be overridden
+ # Overridable -- handle data
def handle_data(self, data):
pass
- # Example -- handle comment, could be overridden
+ # Overridable -- handle comment
def handle_comment(self, data):
pass
- # Example -- handle declaration, could be overridden
+ # Overridable -- handle declaration
def handle_decl(self, decl):
pass
- # Example -- handle processing instruction, could be overridden
+ # Overridable -- handle processing instruction
def handle_pi(self, data):
pass
-
- # To be overridden -- handlers for unknown objects
- def unknown_starttag(self, tag, attrs): pass
- def unknown_endtag(self, tag): pass
- def unknown_charref(self, ref): pass
- def unknown_entityref(self, ref): pass
- # Helper to remove special character quoting
+ # Internal -- helper to remove special character quoting
def unescape(self, s):
if '&' not in s:
return s
@@ -437,94 +362,3 @@
s = string.replace(s, """, '"')
s = string.replace(s, "&", "&") # Must be last
return s
-
-
-class TestHTMLParser(HTMLParser):
-
- def __init__(self, verbose=0):
- self.testdata = ""
- HTMLParser.__init__(self, verbose)
-
- def handle_data(self, data):
- self.testdata = self.testdata + data
- if len(`self.testdata`) >= 70:
- self.flush()
-
- def flush(self):
- data = self.testdata
- if data:
- self.testdata = ""
- print 'data:', `data`
-
- def handle_comment(self, data):
- self.flush()
- r = `data`
- if len(r) > 68:
- r = r[:32] + '...' + r[-32:]
- print 'comment:', r
-
- def unknown_starttag(self, tag, attrs):
- self.flush()
- if not attrs:
- print 'start tag: <' + tag + '>'
- else:
- print 'start tag: <' + tag,
- for name, value in attrs:
- print name + '=' + '"' + value + '"',
- print '>'
-
- def unknown_endtag(self, tag):
- self.flush()
- print 'end tag: </' + tag + '>'
-
- def unknown_entityref(self, ref):
- self.flush()
- print '*** unknown entity ref: &' + ref + ';'
-
- def unknown_charref(self, ref):
- self.flush()
- print '*** unknown char ref: &#' + ref + ';'
-
- def close(self):
- HTMLParser.close(self)
- self.flush()
-
-
-def test(args = None):
- import sys
-
- if not args:
- args = sys.argv[1:]
-
- if args and args[0] == '-s':
- args = args[1:]
- klass = HTMLParser
- else:
- klass = TestHTMLParser
-
- if args:
- file = args[0]
- else:
- file = 'test.html'
-
- if file == '-':
- f = sys.stdin
- else:
- try:
- f = open(file, 'r')
- except IOError, msg:
- print file, ":", msg
- sys.exit(1)
-
- data = f.read()
- if f is not sys.stdin:
- f.close()
-
- x = klass()
- for c in data:
- x.feed(c)
- x.close()
-
-
-if __name__ == '__main__':
- test()
--- Updated File HTMLTALParser.py in package Packages/TAL --
--- HTMLTALParser.py 2001/03/21 22:49:37 1.19
+++ HTMLTALParser.py 2001/03/22 17:54:13 1.20
@@ -175,7 +175,7 @@
# Overriding HTMLParser methods
- def finish_starttag(self, tag, attrs):
+ def handle_starttag(self, tag, attrs):
self.close_para_tags(tag)
self.tagstack.append(tag)
self.scan_xmlns(attrs)
@@ -185,7 +185,7 @@
if tag in EMPTY_HTML_TAGS:
self.implied_endtag(tag, -1)
- def finish_startendtag(self, tag, attrs):
+ def handle_startendtag(self, tag, attrs):
self.close_para_tags(tag)
self.scan_xmlns(attrs)
attrlist, taldict, metaldict = self.extract_attrs(attrs)
@@ -198,7 +198,7 @@
self.getpos(), isend=1)
self.pop_xmlns()
- def finish_endtag(self, tag):
+ def handle_endtag(self, tag):
if tag in EMPTY_HTML_TAGS:
# </img> etc. in the source is an error
raise NestingError(tag, self.getpos())