[ZPT] CVS: Packages/TAL/tests - test_htmlparser.py:1.15
Fred L. Drake" <fdrake@acm.org>
Fred L. Drake" <fdrake@acm.org>
Tue, 4 Sep 2001 10:53:26 -0400
Update of /cvs-repository/Packages/TAL/tests
In directory cvs.zope.org:/tmp/cvs-serv20772
Modified Files:
test_htmlparser.py
Log Message:
Add more tests to cover edge cases that are legal in either HTML (bare
pointy brackets & ampersands) or XHTML (hexadecimal character references),
but not both. Also added a test for DOCTYPE declaration parsing.
=== Packages/TAL/tests/test_htmlparser.py 1.14 => 1.15 ===
self.append(("pi", data))
+ def unknown_decl(self, decl):
+ self.append(("unknown decl", decl))
+
class EventCollectorExtra(EventCollector):
@@ -117,6 +120,7 @@
comment1b-->
<Img sRc='Bar' isMAP>sample
text
+“
<!--comment2a-- --comment2b-->
</Html>
""", [
@@ -131,13 +135,36 @@
("data", "\n"),
("starttag", "img", [("src", "Bar"), ("ismap", None)]),
("data", "sample\ntext\n"),
+ ("charref", "x201C"),
+ ("data", "\n"),
("comment", "comment2a-- --comment2b"),
("data", "\n"),
("endtag", "html"),
("data", "\n"),
])
+ def check_doctype_decl(self):
+ inside = """\
+DOCTYPE html [
+ <!ELEMENT html - O EMPTY>
+ <!ATTLIST html
+ version CDATA #IMPLIED
+ profile CDATA 'DublinCore'>
+ <!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'>
+ <!ENTITY myEntity 'internal parsed entity'>
+ <!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'>
+ <!ENTITY % paramEntity 'name|name|name'>
+ %paramEntity;
+ <!-- comment -->
+]"""
+ self._run_check("<!%s>" % inside, [
+ ("decl", inside),
+ ])
+
def check_bad_nesting(self):
+ # Strangely, this *is* supposed to test that overlapping
+ # elements are allowed. HTMLParser is more geared toward
+ # lexing the input that parsing the structure.
self._run_check("<a><b></a></b>", [
("starttag", "a", []),
("starttag", "b", []),
@@ -145,6 +172,16 @@
("endtag", "b"),
])
+ def check_bare_ampersands(self):
+ self._run_check("this text & contains & ampersands &", [
+ ("data", "this text & contains & ampersands &"),
+ ])
+
+ def check_bare_pointy_brackets(self):
+ self._run_check("this < text > contains < bare>pointy< brackets", [
+ ("data", "this < text > contains < bare>pointy< brackets"),
+ ])
+
def check_attr_syntax(self):
output = [
("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)])
@@ -174,6 +211,14 @@
("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),
])
+ def check_illegal_declarations(self):
+ s = 'abc<!spacer type="block" height="25">def'
+ self._run_check(s, [
+ ("data", "abc"),
+ ("unknown decl", 'spacer type="block" height="25"'),
+ ("data", "def"),
+ ])
+
def check_starttag_end_boundary(self):
self._run_check("""<a b='<'>""", [("starttag", "a", [("b", "<")])])
self._run_check("""<a b='>'>""", [("starttag", "a", [("b", ">")])])
@@ -196,17 +241,12 @@
self._run_check(["<a b='>'", ">"], output)
def check_starttag_junk_chars(self):
- self._parse_error("<")
- self._parse_error("<>")
self._parse_error("</>")
self._parse_error("</$>")
self._parse_error("</")
self._parse_error("</a")
- self._parse_error("</a")
self._parse_error("<a<a>")
self._parse_error("</a<a>")
- self._parse_error("<$")
- self._parse_error("<$>")
self._parse_error("<!")
self._parse_error("<a $>")
self._parse_error("<a")