[Zpt] CVS: Packages/TAL - nsgmllib.py:1.6
fred@digiciool.com
fred@digiciool.com
Thu, 15 Mar 2001 12:10:08 -0500 (EST)
Update of /cvs-repository/Packages/TAL
In directory korak:/tmp/cvs-serv17679
Modified Files:
nsgmllib.py
Log Message:
Remove code to support the null-end-tag support (<span/.../); this is
*never* used with HTML. Removing this simply reduces the number of
special cases that clutter the start-tag parsing.
Add support for XHTML-style end-tags (<img src="foo.gif" />).
--- Updated File nsgmllib.py in package Packages/TAL --
--- nsgmllib.py 2001/03/15 16:35:38 1.5
+++ nsgmllib.py 2001/03/15 17:10:07 1.6
@@ -25,8 +25,6 @@
charref = re.compile('&#([0-9]+)[^0-9]')
starttagopen = re.compile('<[a-zA-Z]')
-shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/')
-shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/')
piopen = re.compile('<\?')
piclose = re.compile('>')
endtagopen = re.compile('</[a-zA-Z]')
@@ -217,25 +215,11 @@
self.__starttag_text = None
start_pos = i
rawdata = self.rawdata
- if shorttagopen.match(rawdata, i):
- # SGML shorthand: <tag/data/ == <tag>data</tag>
- # XXX Can data contain &... (entity or char refs)?
- # XXX Can data contain < or > (tag characters)?
- # XXX Can there be whitespace before the first /?
- match = shorttag.match(rawdata, i)
- if not match:
- return -1
- tag, data = match.group(1, 2)
- self.__starttag_text = '<%s/' % tag
- tag = string.lower(tag)
- k = match.end(0)
- self.finish_shorttag(tag, data)
- self.__starttag_text = rawdata[start_pos:match.end(1) + 1]
- return k
# XXX The following should skip matching quotes (' or ")
match = endbracket.search(rawdata, i+1)
if not match:
return -1
+ self.__starttag_text = rawdata[i:match.end()]
j = match.start(0)
# Now parse the data between i+1 and j into a tag and attrs
attrs = []
@@ -246,20 +230,26 @@
tag = string.lower(rawdata[i+1:k])
self.lasttag = tag
while k < j:
- match = attrfind.match(rawdata, k)
- if not match: break
- attrname, rest, attrvalue = match.group(1, 2, 3)
+ m = attrfind.match(rawdata, k)
+ if not m:
+ break
+ attrname, rest, attrvalue = m.group(1, 2, 3)
if not rest:
attrvalue = attrname
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
attrvalue[:1] == '"' == attrvalue[-1:]:
attrvalue = attrvalue[1:-1]
attrs.append((string.lower(attrname), attrvalue))
- k = match.end(0)
- if rawdata[j] == '>':
- j = j+1
- self.__starttag_text = rawdata[start_pos:j]
+ k = m.end(0)
+ if rawdata[j:j+1] == '/>':
+ explicit_empty = 1
+ j = j + 2
+ elif rawdata[j] == '>':
+ j = j + 1
self.finish_starttag(tag, attrs)
+ if self.__starttag_text[-2:] == '/>':
+ # XHTML-style empty tag: <span attr="value" />
+ self.finish_endtag(tag)
return j
# Internal -- parse endtag
@@ -274,12 +264,6 @@
j = j+1
self.finish_endtag(tag)
return j
-
- # Internal -- finish parsing of <tag/data/ (same as <tag>data</tag>)
- def finish_shorttag(self, tag, data):
- self.finish_starttag(tag, [])
- self.handle_data(data)
- self.finish_endtag(tag)
# Internal -- finish processing of start tag
# Return -1 for unknown tag, 0 for open-only tag, 1 for balanced tag