[Zope3-checkins] SVN: Zope3/trunk/src/zope/ Improved content type
guessing heuristic in zope.publisher.browser. Now
Marius Gedminas
marius at pov.lt
Fri Jun 11 14:22:00 EDT 2004
Log message for revision 25364:
Improved content type guessing heuristic in zope.publisher.browser. Now
source annotations will not result in som epages being returned as text/plain.
Removed comments implicating Mozilla which is completely innocent. Removed
code that tried to work around this bug by postponing annotations after
doctype declarations. Made the content-type guesser add a header
X-Content-Type-Warning: guessed from content
In the future all page templates should declare their content types explicitly
and if they do not, a warning should be displayed in the event log.
-=-
Modified: Zope3/trunk/src/zope/app/publication/tests/test_browserpublication.py
===================================================================
--- Zope3/trunk/src/zope/app/publication/tests/test_browserpublication.py 2004-06-11 18:14:50 UTC (rev 25363)
+++ Zope3/trunk/src/zope/app/publication/tests/test_browserpublication.py 2004-06-11 18:22:00 UTC (rev 25364)
@@ -287,6 +287,7 @@
'Status: 200 Ok\r\n'
'Content-Length: 4\r\n'
'Content-Type: text/plain;charset=utf-8\r\n'
+ 'X-Content-Type-Warning: guessed from content\r\n'
'X-Powered-By: Zope (www.zope.org), Python (www.python.org)\r\n'
'\r\nspam'
)
@@ -304,6 +305,7 @@
'Status: 200 Ok\r\n'
'Content-Length: 0\r\n'
'Content-Type: text/plain;charset=utf-8\r\n'
+ 'X-Content-Type-Warning: guessed from content\r\n'
'X-Powered-By: Zope (www.zope.org), Python (www.python.org)\r\n'
'\r\n'
)
@@ -319,6 +321,7 @@
'Status: 200 Ok\r\n'
'Content-Length: 8\r\n'
'Content-Type: text/plain;charset=utf-8\r\n'
+ 'X-Content-Type-Warning: guessed from content\r\n'
'X-Powered-By: Zope (www.zope.org), Python (www.python.org)\r\n'
'\r\n\xd1\x82\xd0\xb5\xd1\x81\xd1\x82')
Modified: Zope3/trunk/src/zope/publisher/browser.py
===================================================================
--- Zope3/trunk/src/zope/publisher/browser.py 2004-06-11 18:14:50 UTC (rev 25363)
+++ Zope3/trunk/src/zope/publisher/browser.py 2004-06-11 18:22:00 UTC (rev 25364)
@@ -653,6 +653,9 @@
if self._charset is not None:
c += ';charset=' + self._charset
self.setHeader('content-type', c)
+ self.setHeader('x-content-type-warning', 'guessed from content')
+ # XXX emit a warning once all page templates are changed to
+ # specify their content type explicitly.
body = self.__insertBase(body)
self._body = body
@@ -661,9 +664,17 @@
self.setStatus(200)
def __isHTML(self, str):
- s = str.strip().lower()
- return ((s.startswith('<html') and (s[5:6] in ' >'))
- or s.startswith('<!doctype html'))
+ """Try to determine whether str is HTML or not."""
+ s = str.lstrip().lower()
+ if s.startswith('<!doctype html'):
+ return True
+ if s.startswith('<html') and (s[5:6] in ' >'):
+ return True
+ if s.startswith('<!--'):
+ idx = s.find('<html')
+ return idx > 0 and (s[idx+5:idx+6] in ' >')
+ else:
+ return False
def __wrapInHTML(self, title, content):
Modified: Zope3/trunk/src/zope/publisher/tests/test_browserrequest.py
===================================================================
--- Zope3/trunk/src/zope/publisher/tests/test_browserrequest.py 2004-06-11 18:14:50 UTC (rev 25363)
+++ Zope3/trunk/src/zope/publisher/tests/test_browserrequest.py 2004-06-11 18:22:00 UTC (rev 25364)
@@ -127,6 +127,7 @@
"Status: 200 Ok\r\n"
"Content-Length: 7\r\n"
"Content-Type: text/plain;charset=utf-8\r\n"
+ "X-Content-Type-Warning: guessed from content\r\n"
"X-Powered-By: Zope (www.zope.org), Python (www.python.org)\r\n"
"\r\n"
"u'5', 6")
Modified: Zope3/trunk/src/zope/tal/talinterpreter.py
===================================================================
--- Zope3/trunk/src/zope/tal/talinterpreter.py 2004-06-11 18:14:50 UTC (rev 25363)
+++ Zope3/trunk/src/zope/tal/talinterpreter.py 2004-06-11 18:22:00 UTC (rev 25364)
@@ -102,9 +102,8 @@
1. setPosition bytecode follows setSourceFile, and we need position
information to output the line number.
- 2. Mozilla does not cope with HTML comments that occur before
- <!DOCTYPE> (XXX file a bug into bugzilla.mozilla.org as comments
- are legal there according to HTML4 spec).
+ 2. Comments are not allowed in XML documents before the <?xml?>
+ declaration.
For performance reasons (XXX premature optimization?) instead of checking
the value of _pending_source_annotation on every write to the output
@@ -257,17 +256,13 @@
self._stream_write = self.stream.write
def _annotated_stream_write(self, s):
- idx = s.find('<!DOCTYPE')
- if idx == -1:
- idx = s.find('<?xml')
+ idx = s.find('<?xml')
if idx >= 0 or s.isspace():
- # Do *not* preprend comments in front of the <!DOCTYPE> or
- # <?xml?> declaration! Although that is completely legal according
- # to w3c.org, Mozilla chokes on such pages.
- end_of_doctype = s.find('>', idx)
+ # Do not preprend comments in front of the <?xml?> declaration.
+ end_of_doctype = s.find('?>', idx)
if end_of_doctype > idx:
- self.stream.write(s[:end_of_doctype+1])
- s = s[end_of_doctype+1:]
+ self.stream.write(s[:end_of_doctype+2])
+ s = s[end_of_doctype+2:]
# continue
else:
self.stream.write(s)
Modified: Zope3/trunk/src/zope/tal/tests/output/test_sa2.html
===================================================================
--- Zope3/trunk/src/zope/tal/tests/output/test_sa2.html 2004-06-11 18:14:50 UTC (rev 25363)
+++ Zope3/trunk/src/zope/tal/tests/output/test_sa2.html 2004-06-11 18:22:00 UTC (rev 25364)
@@ -1,10 +1,10 @@
-<!DOCTYPE html
- PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
- "DTD/xhtml1-transitional.dtd"><!--
+<!--
==============================================================================
tests/input/test_sa2.html
==============================================================================
--->
+--><!DOCTYPE html
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "DTD/xhtml1-transitional.dtd">
<html>
<title>Simple test of source annotations</title>
<body>
Modified: Zope3/trunk/src/zope/tal/tests/output/test_sa2.xml
===================================================================
--- Zope3/trunk/src/zope/tal/tests/output/test_sa2.xml 2004-06-11 18:14:50 UTC (rev 25363)
+++ Zope3/trunk/src/zope/tal/tests/output/test_sa2.xml 2004-06-11 18:22:00 UTC (rev 25364)
@@ -1,11 +1,11 @@
-<?xml version="1.0" ?>
-<!DOCTYPE html
- PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
- "DTD/xhtml1-transitional.dtd"><!--
+<?xml version="1.0" ?><!--
==============================================================================
tests/input/test_sa2.xml
==============================================================================
-->
+<!DOCTYPE html
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "DTD/xhtml1-transitional.dtd">
<html>
<title>Simple test of source annotations</title>
<body>
Modified: Zope3/trunk/src/zope/tal/tests/test_talinterpreter.py
===================================================================
--- Zope3/trunk/src/zope/tal/tests/test_talinterpreter.py 2004-06-11 18:14:50 UTC (rev 25363)
+++ Zope3/trunk/src/zope/tal/tests/test_talinterpreter.py 2004-06-11 18:22:00 UTC (rev 25364)
@@ -410,15 +410,11 @@
test_cases = [
'@some text',
'\n',
- '<!DOCTYPE ...>@some text',
- ' <!DOCTYPE ...>@some text',
- '\n<!DOCTYPE ...>@some text',
- '<!DOCTYPE ...',
- '<?xml ...>@some text',
- ' <?xml ...>@some text',
- '\n<?xml ...>@some text',
+ '<?xml ...?>@some text',
+ ' <?xml ...?>@some text',
+ '\n<?xml ...?>@some text',
'<?xml ...',
- '<?xml ...?>\n<!DOCTYPE ...>@some text',
+ '<?xml ...?>@\n<!DOCTYPE ...>some text',
]
for output in test_cases:
input = output.replace('@', '')
More information about the Zope3-Checkins
mailing list