[ZPT] CVS: Packages/TAL - HTMLParser.py:1.14
guido@digicool.com
guido@digicool.com
Mon, 9 Apr 2001 10:15:20 -0400 (EDT)
Update of /cvs-repository/Packages/TAL
In directory korak:/tmp/cvs-serv14537
Modified Files:
HTMLParser.py
Log Message:
The cdata handling code for <script> and <style> could be confused by
buffer boundaries in the middle of the cdata stretch.
Fixed this, and added a clear_cdata_mode() callback after a successful
</endtag>. (I hope this doesn't break other things. This parser is
getting horribly ad-hoc. :-( )
--- Updated File HTMLParser.py in package Packages/TAL --
--- HTMLParser.py 2001/04/06 22:23:31 1.13
+++ HTMLParser.py 2001/04/09 14:15:19 1.14
@@ -14,7 +14,7 @@
# Regular expressions used for parsing
interesting_normal = re.compile('[&<]')
-interesting_cdata = re.compile('</')
+interesting_cdata = re.compile(r'<(/|\Z)')
incomplete = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*|#[0-9]*)?')
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
@@ -144,6 +144,9 @@
def set_cdata_mode(self):
self.interesting = interesting_cdata
+ def clear_cdata_mode(self):
+ self.interesting = interesting_normal
+
# Internal -- handle data as far as reasonable. May leave state
# and data to be processed by a subsequent call. If 'end' is
# true, force handling all data as if followed by EOF marker.
@@ -155,7 +158,6 @@
match = self.interesting.search(rawdata, i) # < or &
if match:
j = match.start()
- self.interesting = interesting_normal
else:
j = n
if i < j: self.handle_data(rawdata[i:j])
@@ -166,6 +168,8 @@
k = self.parse_starttag(i)
elif endtagopen.match(rawdata, i): # </
k = self.parse_endtag(i)
+ if k >= 0:
+ self.clear_cdata_mode()
elif commentopen.match(rawdata, i): # <!--
k = self.parse_comment(i)
elif piopen.match(rawdata, i): # <?