[Zpt] CVS: Packages/TAL - nsgmllib.py:1.6

fred@digiciool.com fred@digiciool.com
Thu, 15 Mar 2001 12:10:08 -0500 (EST)


Update of /cvs-repository/Packages/TAL
In directory korak:/tmp/cvs-serv17679

Modified Files:
	nsgmllib.py 
Log Message:

Remove code to support the null-end-tag support (<span/.../); this is
*never* used with HTML.  Removing this simply reduces the number of
special cases that clutter the start-tag parsing.

Add support for XHTML-style end-tags (<img src="foo.gif" />).



--- Updated File nsgmllib.py in package Packages/TAL --
--- nsgmllib.py	2001/03/15 16:35:38	1.5
+++ nsgmllib.py	2001/03/15 17:10:07	1.6
@@ -25,8 +25,6 @@
 charref = re.compile('&#([0-9]+)[^0-9]')
 
 starttagopen = re.compile('<[a-zA-Z]')
-shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/')
-shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/')
 piopen = re.compile('<\?')
 piclose = re.compile('>')
 endtagopen = re.compile('</[a-zA-Z]')
@@ -217,25 +215,11 @@
         self.__starttag_text = None
         start_pos = i
         rawdata = self.rawdata
-        if shorttagopen.match(rawdata, i):
-            # SGML shorthand: <tag/data/ == <tag>data</tag>
-            # XXX Can data contain &... (entity or char refs)?
-            # XXX Can data contain < or > (tag characters)?
-            # XXX Can there be whitespace before the first /?
-            match = shorttag.match(rawdata, i)
-            if not match:
-                return -1
-            tag, data = match.group(1, 2)
-            self.__starttag_text = '<%s/' % tag
-            tag = string.lower(tag)
-            k = match.end(0)
-            self.finish_shorttag(tag, data)
-            self.__starttag_text = rawdata[start_pos:match.end(1) + 1]
-            return k
         # XXX The following should skip matching quotes (' or ")
         match = endbracket.search(rawdata, i+1)
         if not match:
             return -1
+        self.__starttag_text = rawdata[i:match.end()]
         j = match.start(0)
         # Now parse the data between i+1 and j into a tag and attrs
         attrs = []
@@ -246,20 +230,26 @@
         tag = string.lower(rawdata[i+1:k])
         self.lasttag = tag
         while k < j:
-            match = attrfind.match(rawdata, k)
-            if not match: break
-            attrname, rest, attrvalue = match.group(1, 2, 3)
+            m = attrfind.match(rawdata, k)
+            if not m:
+                break
+            attrname, rest, attrvalue = m.group(1, 2, 3)
             if not rest:
                 attrvalue = attrname
             elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
                  attrvalue[:1] == '"' == attrvalue[-1:]:
                 attrvalue = attrvalue[1:-1]
             attrs.append((string.lower(attrname), attrvalue))
-            k = match.end(0)
-        if rawdata[j] == '>':
-            j = j+1
-        self.__starttag_text = rawdata[start_pos:j]
+            k = m.end(0)
+        if rawdata[j:j+1] == '/>':
+            explicit_empty = 1
+            j = j + 2
+        elif rawdata[j] == '>':
+            j = j + 1
         self.finish_starttag(tag, attrs)
+        if self.__starttag_text[-2:] == '/>':
+            # XHTML-style empty tag: <span attr="value" />
+            self.finish_endtag(tag)
         return j
 
     # Internal -- parse endtag
@@ -274,12 +264,6 @@
             j = j+1
         self.finish_endtag(tag)
         return j
-
-    # Internal -- finish parsing of <tag/data/ (same as <tag>data</tag>)
-    def finish_shorttag(self, tag, data):
-        self.finish_starttag(tag, [])
-        self.handle_data(data)
-        self.finish_endtag(tag)
 
     # Internal -- finish processing of start tag
     # Return -1 for unknown tag, 0 for open-only tag, 1 for balanced tag