[Zope-Checkins] SVN: Zope/trunk/lib/python/TAL/ remove local version of the HTMLParser module; this is now part of Python

Wed Jul 14 16:43:45 EDT 2004

Log message for revision 26536:
  remove local version of the HTMLParser module; this is now part of Python


Changed:
  D   Zope/trunk/lib/python/TAL/HTMLParser.py
  D   Zope/trunk/lib/python/TAL/tests/test_htmlparser.py


-=-
Deleted: Zope/trunk/lib/python/TAL/HTMLParser.py
===================================================================

--- Zope/trunk/lib/python/TAL/HTMLParser.py	2004-07-14 20:30:06 UTC (rev 26535)
+++ Zope/trunk/lib/python/TAL/HTMLParser.py	2004-07-14 20:43:44 UTC (rev 26536)
@@ -1,402 +0,0 @@
-"""A parser for HTML and XHTML."""
-
-# This file is based on sgmllib.py, but the API is slightly different.
-
-# XXX There should be a way to distinguish between PCDATA (parsed
-# character data -- the normal case), RCDATA (replaceable character
-# data -- only char and entity references and end tags are special)
-# and CDATA (character data -- only end tags are special).
-
-
-import markupbase
-import re
-
-# Regular expressions used for parsing
-
-interesting_normal = re.compile('[&<]')
-interesting_cdata = re.compile(r'<(/|\Z)')
-incomplete = re.compile('&[a-zA-Z#]')
-
-entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
-charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
-
-starttagopen = re.compile('<[a-zA-Z]')
-piclose = re.compile('>')
-endtagopen = re.compile('</')
-commentclose = re.compile(r'--\s*>')
-tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
-attrfind = re.compile(
-    r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
-    r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:;+*%?!&$\(\)_#=~]*))?')
-
-locatestarttagend = re.compile(r"""
-  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
-  (?:\s+                             # whitespace before attribute name
-    (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
-      (?:\s*=\s*                     # value indicator
-        (?:'[^']*'                   # LITA-enclosed value
-          |\"[^\"]*\"                # LIT-enclosed value
-          |[^'\">\s]+                # bare value
-         )
-       )?
-     )
-   )*
-  \s*                                # trailing whitespace
-""", re.VERBOSE)
-endendtag = re.compile('>')
-endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
-
-
-class HTMLParseError(Exception):
-    """Exception raised for all parse errors."""
-
-    def __init__(self, msg, position=(None, None)):
-        assert msg
-        self.msg = msg
-        self.lineno = position[0]
-        self.offset = position[1]
-
-    def __str__(self):
-        result = self.msg
-        if self.lineno is not None:
-            result = result + ", at line %d" % self.lineno
-        if self.offset is not None:
-            result = result + ", column %d" % (self.offset + 1)
-        return result
-
-
-def _contains_at(s, sub, pos):
-    return s[pos:pos+len(sub)] == sub
-
-
-class HTMLParser(markupbase.ParserBase):
-    """Find tags and other markup and call handler functions.
-
-    Usage:
-        p = HTMLParser()
-        p.feed(data)
-        ...
-        p.close()
-
-    Start tags are handled by calling self.handle_starttag() or
-    self.handle_startendtag(); end tags by self.handle_endtag().  The
-    data between tags is passed from the parser to the derived class
-    by calling self.handle_data() with the data as argument (the data
-    may be split up in arbitrary chunks).  Entity references are
-    passed by calling self.handle_entityref() with the entity
-    reference as the argument.  Numeric character references are
-    passed to self.handle_charref() with the string containing the
-    reference as the argument.
-    """
-
-    CDATA_CONTENT_ELEMENTS = ("script", "style")
-
-
-    def __init__(self):
-        """Initialize and reset this instance."""
-        self.reset()
-
-    def reset(self):
-        """Reset this instance.  Loses all unprocessed data."""
-        self.rawdata = ''
-        self.stack = []
-        self.lasttag = '???'
-        self.interesting = interesting_normal
-        markupbase.ParserBase.reset(self)
-
-    def feed(self, data):
-        """Feed data to the parser.
-
-        Call this as often as you want, with as little or as much text
-        as you want (may include '\n').
-        """
-        self.rawdata = self.rawdata + data
-        self.goahead(0)
-
-    def close(self):
-        """Handle any buffered data."""
-        self.goahead(1)
-
-    def error(self, message):
-        raise HTMLParseError(message, self.getpos())
-
-    __starttag_text = None
-
-    def get_starttag_text(self):
-        """Return full source of start tag: '<...>'."""
-        return self.__starttag_text
-
-    cdata_endtag = None
-
-    def set_cdata_mode(self, endtag=None):
-        self.cdata_endtag = endtag
-        self.interesting = interesting_cdata
-
-    def clear_cdata_mode(self):
-        self.cdata_endtag = None
-        self.interesting = interesting_normal
-
-    # Internal -- handle data as far as reasonable.  May leave state
-    # and data to be processed by a subsequent call.  If 'end' is
-    # true, force handling all data as if followed by EOF marker.
-    def goahead(self, end):
-        rawdata = self.rawdata
-        i = 0
-        n = len(rawdata)
-        while i < n:
-            match = self.interesting.search(rawdata, i) # < or &
-            if match:
-                j = match.start()
-            else:
-                j = n
-            if i < j: self.handle_data(rawdata[i:j])
-            i = self.updatepos(i, j)
-            if i == n: break
-            if rawdata[i] == '<':
-                if starttagopen.match(rawdata, i): # < + letter
-                    k = self.parse_starttag(i)
-                elif endtagopen.match(rawdata, i): # </
-                    k = self.parse_endtag(i)
-                elif _contains_at(rawdata, "<!--", i): # <!--
-                    k = self.parse_comment(i)
-                elif _contains_at(rawdata, "<!", i): # <!
-                    k = self.parse_declaration(i)
-                elif _contains_at(rawdata, "<?", i): # <?
-                    k = self.parse_pi(i)
-                elif _contains_at(rawdata, "<?", i): # <!
-                    k = self.parse_declaration(i)
-                elif (i + 1) < n:
-                    self.handle_data("<")
-                    k = i + 1
-                else:
-                    break
-                if k < 0:
-                    if end:
-                        self.error("EOF in middle of construct")
-                    break
-                i = self.updatepos(i, k)
-            elif rawdata[i:i+2] == "&#":
-                match = charref.match(rawdata, i)
-                if match:
-                    name = match.group()[2:-1]
-                    self.handle_charref(name)
-                    k = match.end()
-                    if rawdata[k-1] != ';':
-                        k = k - 1
-                    i = self.updatepos(i, k)
-                    continue
-                else:
-                    break
-            elif rawdata[i] == '&':
-                match = entityref.match(rawdata, i)
-                if match:
-                    name = match.group(1)
-                    self.handle_entityref(name)
-                    k = match.end()
-                    if rawdata[k-1] != ';':
-                        k = k - 1
-                    i = self.updatepos(i, k)
-                    continue
-                match = incomplete.match(rawdata, i)
-                if match:
-                    # match.group() will contain at least 2 chars
-                    rest = rawdata[i:]
-                    if end and match.group() == rest:
-                        self.error("EOF in middle of entity or char ref")
-                    # incomplete
-                    break
-                elif (i + 1) < n:
-                    # not the end of the buffer, and can't be confused
-                    # with some other construct
-                    self.handle_data("&")
-                    i = self.updatepos(i, i + 1)
-                else:
-                    break
-            else:
-                assert 0, "interesting.search() lied"
-        # end while
-        if end and i < n:
-            self.handle_data(rawdata[i:n])
-            i = self.updatepos(i, n)
-        self.rawdata = rawdata[i:]
-
-    # Internal -- parse comment, return end or -1 if not terminated
-    def parse_comment(self, i, report=1):
-        rawdata = self.rawdata
-        assert rawdata[i:i+4] == '<!--', 'unexpected call to parse_comment()'
-        match = commentclose.search(rawdata, i+4)
-        if not match:
-            return -1
-        if report:
-            j = match.start()
-            self.handle_comment(rawdata[i+4: j])
-        j = match.end()
-        return j
-
-    # Internal -- parse processing instr, return end or -1 if not terminated
-    def parse_pi(self, i):
-        rawdata = self.rawdata
-        assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()'
-        match = piclose.search(rawdata, i+2) # >
-        if not match:
-            return -1
-        j = match.start()
-        self.handle_pi(rawdata[i+2: j])
-        j = match.end()
-        return j
-
-    # Internal -- handle starttag, return end or -1 if not terminated
-    def parse_starttag(self, i):
-        self.__starttag_text = None
-        endpos = self.check_for_whole_start_tag(i)
-        if endpos < 0:
-            return endpos
-        rawdata = self.rawdata
-        self.__starttag_text = rawdata[i:endpos]
-
-        # Now parse the data between i+1 and j into a tag and attrs
-        attrs = []
-        match = tagfind.match(rawdata, i+1)
-        assert match, 'unexpected call to parse_starttag()'
-        k = match.end()
-        self.lasttag = tag = rawdata[i+1:k].lower()
-
-        while k < endpos:
-            m = attrfind.match(rawdata, k)
-            if not m:
-                break
-            attrname, rest, attrvalue = m.group(1, 2, 3)
-            if not rest:
-                attrvalue = None
-            elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
-                 attrvalue[:1] == '"' == attrvalue[-1:]:
-                attrvalue = attrvalue[1:-1]
-                attrvalue = self.unescape(attrvalue)
-            attrs.append((attrname.lower(), attrvalue))
-            k = m.end()
-
-        end = rawdata[k:endpos].strip()
-        if end not in (">", "/>"):
-            lineno, offset = self.getpos()
-            if "\n" in self.__starttag_text:
-                lineno = lineno + self.__starttag_text.count("\n")
-                offset = len(self.__starttag_text) \
-                         - self.__starttag_text.rfind("\n")
-            else:
-                offset = offset + len(self.__starttag_text)
-            self.error("junk characters in start tag: %s"
-                       % `rawdata[k:endpos][:20]`)
-        if end[-2:] == '/>':
-            # XHTML-style empty tag: <span attr="value" />
-            self.handle_startendtag(tag, attrs)
-        else:
-            self.handle_starttag(tag, attrs)
-            if tag in self.CDATA_CONTENT_ELEMENTS:
-                self.set_cdata_mode(tag)
-        return endpos
-
-    # Internal -- check to see if we have a complete starttag; return end
-    # or -1 if incomplete.
-    def check_for_whole_start_tag(self, i):
-        rawdata = self.rawdata
-        m = locatestarttagend.match(rawdata, i)
-        if m:
-            j = m.end()
-            next = rawdata[j:j+1]
-            if next == ">":
-                return j + 1
-            if next == "/":
-                s = rawdata[j:j+2]
-                if s == "/>":
-                    return j + 2
-                if s == "/":
-                    # buffer boundary
-                    return -1
-                # else bogus input
-                self.updatepos(i, j + 1)
-                self.error("malformed empty start tag")
-            if next == "":
-                # end of input
-                return -1
-            if next in ("abcdefghijklmnopqrstuvwxyz=/"
-                        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
-                # end of input in or before attribute value, or we have the
-                # '/' from a '/>' ending
-                return -1
-            self.updatepos(i, j)
-            self.error("malformed start tag")
-        raise AssertionError("we should not get here!")
-
-    # Internal -- parse endtag, return end or -1 if incomplete
-    def parse_endtag(self, i):
-        rawdata = self.rawdata
-        assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
-        match = endendtag.search(rawdata, i+1) # >
-        if not match:
-            return -1
-        j = match.end()
-        match = endtagfind.match(rawdata, i) # </ + tag + >
-        if not match:
-            self.error("bad end tag: %s" % `rawdata[i:j]`)
-        tag = match.group(1).lower()
-        if (  self.cdata_endtag is not None
-              and tag != self.cdata_endtag):
-            # Should be a mismatched end tag, but we'll treat it
-            # as text anyway, since most HTML authors aren't
-            # interested in the finer points of syntax.
-            self.handle_data(match.group(0))
-        else:
-            self.handle_endtag(tag)
-            self.clear_cdata_mode()
-        return j
-
-    # Overridable -- finish processing of start+end tag: <tag.../>
-    def handle_startendtag(self, tag, attrs):
-        self.handle_starttag(tag, attrs)
-        self.handle_endtag(tag)
-
-    # Overridable -- handle start tag
-    def handle_starttag(self, tag, attrs):
-        pass
-
-    # Overridable -- handle end tag
-    def handle_endtag(self, tag):
-        pass
-
-    # Overridable -- handle character reference
-    def handle_charref(self, name):
-        pass
-
-    # Overridable -- handle entity reference
-    def handle_entityref(self, name):
-        pass
-
-    # Overridable -- handle data
-    def handle_data(self, data):
-        pass
-
-    # Overridable -- handle comment
-    def handle_comment(self, data):
-        pass
-
-    # Overridable -- handle declaration
-    def handle_decl(self, decl):
-        pass
-
-    # Overridable -- handle processing instruction
-    def handle_pi(self, data):
-        pass
-
-    def unknown_decl(self, data):
-        self.error("unknown declaration: " + `data`)
-
-    # Internal -- helper to remove special character quoting
-    def unescape(self, s):
-        if '&' not in s:
-            return s
-        s = s.replace("&lt;", "<")
-        s = s.replace("&gt;", ">")
-        s = s.replace("&apos;", "'")
-        s = s.replace("&quot;", '"')
-        s = s.replace("&amp;", "&") # Must be last
-        return s

Deleted: Zope/trunk/lib/python/TAL/tests/test_htmlparser.py
===================================================================
--- Zope/trunk/lib/python/TAL/tests/test_htmlparser.py	2004-07-14 20:30:06 UTC (rev 26535)
+++ Zope/trunk/lib/python/TAL/tests/test_htmlparser.py	2004-07-14 20:43:44 UTC (rev 26536)
@@ -1,313 +0,0 @@
-#! /usr/bin/env python1.5
-"""Tests for HTMLParser.py."""
-
-import sys
-
-from TAL.tests import utils
-import unittest
-
-from TAL import HTMLParser
-
-
-class EventCollector(HTMLParser.HTMLParser):
-
-    def __init__(self):
-        self.events = []
-        self.append = self.events.append
-        HTMLParser.HTMLParser.__init__(self)
-
-    def get_events(self):
-        # Normalize the list of events so that buffer artefacts don't
-        # separate runs of contiguous characters.
-        L = []
-        prevtype = None
-        for event in self.events:
-            type = event[0]
-            if type == prevtype == "data":
-                L[-1] = ("data", L[-1][1] + event[1])
-            else:
-                L.append(event)
-            prevtype = type
-        self.events = L
-        return L
-
-    # structure markup
-
-    def handle_starttag(self, tag, attrs):
-        self.append(("starttag", tag, attrs))
-
-    def handle_startendtag(self, tag, attrs):
-        self.append(("startendtag", tag, attrs))
-
-    def handle_endtag(self, tag):
-        self.append(("endtag", tag))
-
-    # all other markup
-
-    def handle_comment(self, data):
-        self.append(("comment", data))
-
-    def handle_charref(self, data):
-        self.append(("charref", data))
-
-    def handle_data(self, data):
-        self.append(("data", data))
-
-    def handle_decl(self, data):
-        self.append(("decl", data))
-
-    def handle_entityref(self, data):
-        self.append(("entityref", data))
-
-    def handle_pi(self, data):
-        self.append(("pi", data))
-
-    def unknown_decl(self, decl):
-        self.append(("unknown decl", decl))
-
-
-class EventCollectorExtra(EventCollector):
-
-    def handle_starttag(self, tag, attrs):
-        EventCollector.handle_starttag(self, tag, attrs)
-        self.append(("starttag_text", self.get_starttag_text()))
-
-
-class TestCaseBase(unittest.TestCase):
-
-    # Constant pieces of source and events
-    prologue = ""
-    epilogue = ""
-    initial_events = []
-    final_events = []
-
-    def _run_check(self, source, events, collector=EventCollector):
-        parser = collector()
-        parser.feed(self.prologue)
-        for s in source:
-            parser.feed(s)
-        for c in self.epilogue:
-            parser.feed(c)
-        parser.close()
-        self.assert_(parser.get_events() ==
-                     self.initial_events + events + self.final_events,
-                     parser.get_events())
-
-    def _run_check_extra(self, source, events):
-        self._run_check(source, events, EventCollectorExtra)
-
-    def _parse_error(self, source):
-        def parse(source=source):
-            parser = HTMLParser.HTMLParser()
-            parser.feed(source)
-            parser.close()
-        self.assertRaises(HTMLParser.HTMLParseError, parse)
-
-
-class HTMLParserTestCase(TestCaseBase):
-
-    def check_processing_instruction_only(self):
-        self._run_check("<?processing instruction>", [
-            ("pi", "processing instruction"),
-            ])
-
-    def check_simple_html(self):
-        self._run_check("""
-<!DOCTYPE html PUBLIC 'foo'>
-<HTML>&entity;&#32;
-<!--comment1a
--></foo><bar>&lt;<?pi?></foo<bar
-comment1b-->
-<Img sRc='Bar' isMAP>sample
-text
-&#x201C;
-<!--comment2a-- --comment2b-->
-</Html>
-""", [
-    ("data", "\n"),
-    ("decl", "DOCTYPE html PUBLIC 'foo'"),
-    ("data", "\n"),
-    ("starttag", "html", []),
-    ("entityref", "entity"),
-    ("charref", "32"),
-    ("data", "\n"),
-    ("comment", "comment1a\n-></foo><bar>&lt;<?pi?></foo<bar\ncomment1b"),
-    ("data", "\n"),
-    ("starttag", "img", [("src", "Bar"), ("ismap", None)]),
-    ("data", "sample\ntext\n"),
-    ("charref", "x201C"),
-    ("data", "\n"),
-    ("comment", "comment2a-- --comment2b"),
-    ("data", "\n"),
-    ("endtag", "html"),
-    ("data", "\n"),
-    ])
-
-    def check_unclosed_entityref(self):
-        self._run_check("&entityref foo", [
-            ("entityref", "entityref"),
-            ("data", " foo"),
-            ])
-
-    def check_doctype_decl(self):
-        inside = """\
-DOCTYPE html [
-  <!ELEMENT html - O EMPTY>
-  <!ATTLIST html
-      version CDATA #IMPLIED
-      profile CDATA 'DublinCore'>
-  <!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'>
-  <!ENTITY myEntity 'internal parsed entity'>
-  <!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'>
-  <!ENTITY % paramEntity 'name|name|name'>
-  %paramEntity;
-  <!-- comment -->
-]"""
-        self._run_check("<!%s>" % inside, [
-            ("decl", inside),
-            ])
-
-    def check_bad_nesting(self):
-        # Strangely, this *is* supposed to test that overlapping
-        # elements are allowed.  HTMLParser is more geared toward
-        # lexing the input that parsing the structure.
-        self._run_check("<a><b></a></b>", [
-            ("starttag", "a", []),
-            ("starttag", "b", []),
-            ("endtag", "a"),
-            ("endtag", "b"),
-            ])
-
-    def check_bare_ampersands(self):
-        self._run_check("this text & contains & ampersands &", [
-            ("data", "this text & contains & ampersands &"),
-            ])
-
-    def check_bare_pointy_brackets(self):
-        self._run_check("this < text > contains < bare>pointy< brackets", [
-            ("data", "this < text > contains < bare>pointy< brackets"),
-            ])
-
-    def check_attr_syntax(self):
-        output = [
-          ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)])
-          ]
-        self._run_check("""<a b='v' c="v" d=v e>""", output)
-        self._run_check("""<a  b = 'v' c = "v" d = v e>""", output)
-        self._run_check("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output)
-        self._run_check("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output)
-
-    def check_attr_values(self):
-        self._run_check("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""",
-                        [("starttag", "a", [("b", "xxx\n\txxx"),
-                                            ("c", "yyy\t\nyyy"),
-                                            ("d", "\txyz\n")])
-                         ])
-        self._run_check("""<a b='' c="">""", [
-            ("starttag", "a", [("b", ""), ("c", "")]),
-            ])
-
-    def check_attr_entity_replacement(self):
-        self._run_check("""<a b='&amp;&gt;&lt;&quot;&apos;'>""", [
-            ("starttag", "a", [("b", "&><\"'")]),
-            ])
-
-    def check_attr_funky_names(self):
-        self._run_check("""<a a.b='v' c:d=v e-f=v>""", [
-            ("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),
-            ])
-
-    def check_illegal_declarations(self):
-        self._parse_error('<!spacer type="block" height="25">')
-
-    def check_starttag_end_boundary(self):
-        self._run_check("""<a b='<'>""", [("starttag", "a", [("b", "<")])])
-        self._run_check("""<a b='>'>""", [("starttag", "a", [("b", ">")])])
-
-    def check_buffer_artefacts(self):
-        output = [("starttag", "a", [("b", "<")])]
-        self._run_check(["<a b='<'>"], output)
-        self._run_check(["<a ", "b='<'>"], output)
-        self._run_check(["<a b", "='<'>"], output)
-        self._run_check(["<a b=", "'<'>"], output)
-        self._run_check(["<a b='<", "'>"], output)
-        self._run_check(["<a b='<'", ">"], output)
-
-        output = [("starttag", "a", [("b", ">")])]
-        self._run_check(["<a b='>'>"], output)
-        self._run_check(["<a ", "b='>'>"], output)
-        self._run_check(["<a b", "='>'>"], output)
-        self._run_check(["<a b=", "'>'>"], output)
-        self._run_check(["<a b='>", "'>"], output)
-        self._run_check(["<a b='>'", ">"], output)
-
-    def check_starttag_junk_chars(self):
-        self._parse_error("</>")
-        self._parse_error("</$>")
-        self._parse_error("</")
-        self._parse_error("</a")
-        self._parse_error("<a<a>")
-        self._parse_error("</a<a>")
-        self._parse_error("<!")
-        self._parse_error("<a $>")
-        self._parse_error("<a")
-        self._parse_error("<a foo='bar'")
-        self._parse_error("<a foo='bar")
-        self._parse_error("<a foo='>'")
-        self._parse_error("<a foo='>")
-        self._parse_error("<a foo=>")
-
-    def check_declaration_junk_chars(self):
-        self._parse_error("<!DOCTYPE foo $ >")
-
-    def check_startendtag(self):
-        self._run_check("<p/>", [
-            ("startendtag", "p", []),
-            ])
-        self._run_check("<p></p>", [
-            ("starttag", "p", []),
-            ("endtag", "p"),
-            ])
-        self._run_check("<p><img src='foo' /></p>", [
-            ("starttag", "p", []),
-            ("startendtag", "img", [("src", "foo")]),
-            ("endtag", "p"),
-            ])
-
-    def check_get_starttag_text(self):
-        s = """<foo:bar   \n   one="1"\ttwo=2   >"""
-        self._run_check_extra(s, [
-            ("starttag", "foo:bar", [("one", "1"), ("two", "2")]),
-            ("starttag_text", s)])
-
-    def check_cdata_content(self):
-        s = """<script> <!-- not a comment --> &not-an-entity-ref; </script>"""
-        self._run_check(s, [
-            ("starttag", "script", []),
-            ("data", " <!-- not a comment --> &not-an-entity-ref; "),
-            ("endtag", "script"),
-            ])
-        s = """<script> <not a='start tag'> </script>"""
-        self._run_check(s, [
-            ("starttag", "script", []),
-            ("data", " <not a='start tag'> "),
-            ("endtag", "script"),
-            ])
-
-    def check_enumerated_attr_type(self):
-        s = "<!DOCTYPE doc [<!ATTLIST doc attr (a | b) >]>"
-        self._run_check(s, [
-            ('decl', 'DOCTYPE doc [<!ATTLIST doc attr (a | b) >]'),
-            ])
-
-
-# Support for the Zope regression test framework:
-def test_suite():
-    suite = unittest.TestSuite()
-    suite.addTest(unittest.makeSuite(HTMLParserTestCase, "check_"))
-    return suite
-
-
-if __name__ == "__main__":
-    errs = utils.run_suite(test_suite())
-    sys.exit(errs and 1 or 0)