[Zope3-checkins] SVN: Zope3/trunk/src/zope/ Improved content type guessing heuristic in zope.publisher.browser. Now

Marius Gedminas marius at pov.lt
Fri Jun 11 14:22:00 EDT 2004


Log message for revision 25364:
Improved content type guessing heuristic in zope.publisher.browser.  Now
source annotations will not result in som epages being returned as text/plain.

Removed comments implicating Mozilla which is completely innocent.  Removed
code that tried to work around this bug by postponing annotations after
doctype declarations.  Made the content-type guesser add a header

  X-Content-Type-Warning: guessed from content

In the future all page templates should declare their content types explicitly
and if they do not, a warning should be displayed in the event log.




-=-
Modified: Zope3/trunk/src/zope/app/publication/tests/test_browserpublication.py
===================================================================
--- Zope3/trunk/src/zope/app/publication/tests/test_browserpublication.py	2004-06-11 18:14:50 UTC (rev 25363)
+++ Zope3/trunk/src/zope/app/publication/tests/test_browserpublication.py	2004-06-11 18:22:00 UTC (rev 25364)
@@ -287,6 +287,7 @@
             'Status: 200 Ok\r\n'
             'Content-Length: 4\r\n'
             'Content-Type: text/plain;charset=utf-8\r\n'
+            'X-Content-Type-Warning: guessed from content\r\n'
             'X-Powered-By: Zope (www.zope.org), Python (www.python.org)\r\n'
             '\r\nspam'
             )
@@ -304,6 +305,7 @@
             'Status: 200 Ok\r\n'
             'Content-Length: 0\r\n'
             'Content-Type: text/plain;charset=utf-8\r\n'
+            'X-Content-Type-Warning: guessed from content\r\n'
             'X-Powered-By: Zope (www.zope.org), Python (www.python.org)\r\n'
             '\r\n'
             )
@@ -319,6 +321,7 @@
             'Status: 200 Ok\r\n'
             'Content-Length: 8\r\n'
             'Content-Type: text/plain;charset=utf-8\r\n'
+            'X-Content-Type-Warning: guessed from content\r\n'
             'X-Powered-By: Zope (www.zope.org), Python (www.python.org)\r\n'
             '\r\n\xd1\x82\xd0\xb5\xd1\x81\xd1\x82')
 

Modified: Zope3/trunk/src/zope/publisher/browser.py
===================================================================
--- Zope3/trunk/src/zope/publisher/browser.py	2004-06-11 18:14:50 UTC (rev 25363)
+++ Zope3/trunk/src/zope/publisher/browser.py	2004-06-11 18:22:00 UTC (rev 25364)
@@ -653,6 +653,9 @@
             if self._charset is not None:
                 c += ';charset=' + self._charset
             self.setHeader('content-type', c)
+            self.setHeader('x-content-type-warning', 'guessed from content')
+            # XXX emit a warning once all page templates are changed to
+            # specify their content type explicitly.
 
         body = self.__insertBase(body)
         self._body = body
@@ -661,9 +664,17 @@
             self.setStatus(200)
 
     def __isHTML(self, str):
-        s = str.strip().lower()
-        return ((s.startswith('<html') and (s[5:6] in ' >'))
-                 or s.startswith('<!doctype html'))
+        """Try to determine whether str is HTML or not."""
+        s = str.lstrip().lower()
+        if s.startswith('<!doctype html'):
+            return True
+        if s.startswith('<html') and (s[5:6] in ' >'):
+            return True
+        if s.startswith('<!--'):
+            idx = s.find('<html')
+            return idx > 0 and (s[idx+5:idx+6] in ' >')
+        else:
+            return False
 
 
     def __wrapInHTML(self, title, content):

Modified: Zope3/trunk/src/zope/publisher/tests/test_browserrequest.py
===================================================================
--- Zope3/trunk/src/zope/publisher/tests/test_browserrequest.py	2004-06-11 18:14:50 UTC (rev 25363)
+++ Zope3/trunk/src/zope/publisher/tests/test_browserrequest.py	2004-06-11 18:22:00 UTC (rev 25364)
@@ -127,6 +127,7 @@
             "Status: 200 Ok\r\n"
             "Content-Length: 7\r\n"
             "Content-Type: text/plain;charset=utf-8\r\n"
+            "X-Content-Type-Warning: guessed from content\r\n"
             "X-Powered-By: Zope (www.zope.org), Python (www.python.org)\r\n"
             "\r\n"
             "u'5', 6")

Modified: Zope3/trunk/src/zope/tal/talinterpreter.py
===================================================================
--- Zope3/trunk/src/zope/tal/talinterpreter.py	2004-06-11 18:14:50 UTC (rev 25363)
+++ Zope3/trunk/src/zope/tal/talinterpreter.py	2004-06-11 18:22:00 UTC (rev 25364)
@@ -102,9 +102,8 @@
 
         1. setPosition bytecode follows setSourceFile, and we need position
            information to output the line number.
-        2. Mozilla does not cope with HTML comments that occur before
-           <!DOCTYPE> (XXX file a bug into bugzilla.mozilla.org as comments
-           are legal there according to HTML4 spec).
+        2. Comments are not allowed in XML documents before the <?xml?>
+           declaration.
 
     For performance reasons (XXX premature optimization?) instead of checking
     the value of _pending_source_annotation on every write to the output
@@ -257,17 +256,13 @@
             self._stream_write = self.stream.write
 
     def _annotated_stream_write(self, s):
-        idx = s.find('<!DOCTYPE')
-        if idx == -1:
-            idx = s.find('<?xml')
+        idx = s.find('<?xml')
         if idx >= 0 or s.isspace():
-            # Do *not* preprend comments in front of the <!DOCTYPE> or
-            # <?xml?> declaration!  Although that is completely legal according
-            # to w3c.org, Mozilla chokes on such pages.
-            end_of_doctype = s.find('>', idx)
+            # Do not preprend comments in front of the <?xml?> declaration.
+            end_of_doctype = s.find('?>', idx)
             if end_of_doctype > idx:
-                self.stream.write(s[:end_of_doctype+1])
-                s = s[end_of_doctype+1:]
+                self.stream.write(s[:end_of_doctype+2])
+                s = s[end_of_doctype+2:]
                 # continue
             else:
                 self.stream.write(s)

Modified: Zope3/trunk/src/zope/tal/tests/output/test_sa2.html
===================================================================
--- Zope3/trunk/src/zope/tal/tests/output/test_sa2.html	2004-06-11 18:14:50 UTC (rev 25363)
+++ Zope3/trunk/src/zope/tal/tests/output/test_sa2.html	2004-06-11 18:22:00 UTC (rev 25364)
@@ -1,10 +1,10 @@
-<!DOCTYPE html 
-  PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
-  "DTD/xhtml1-transitional.dtd"><!--
+<!--
 ==============================================================================
 tests/input/test_sa2.html
 ==============================================================================
--->
+--><!DOCTYPE html 
+  PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+  "DTD/xhtml1-transitional.dtd">
 <html>
 <title>Simple test of source annotations</title>
 <body>

Modified: Zope3/trunk/src/zope/tal/tests/output/test_sa2.xml
===================================================================
--- Zope3/trunk/src/zope/tal/tests/output/test_sa2.xml	2004-06-11 18:14:50 UTC (rev 25363)
+++ Zope3/trunk/src/zope/tal/tests/output/test_sa2.xml	2004-06-11 18:22:00 UTC (rev 25364)
@@ -1,11 +1,11 @@
-<?xml version="1.0" ?>
-<!DOCTYPE html 
-  PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
-  "DTD/xhtml1-transitional.dtd"><!--
+<?xml version="1.0" ?><!--
 ==============================================================================
 tests/input/test_sa2.xml
 ==============================================================================
 -->
+<!DOCTYPE html 
+  PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+  "DTD/xhtml1-transitional.dtd">
 <html>
 <title>Simple test of source annotations</title>
 <body>

Modified: Zope3/trunk/src/zope/tal/tests/test_talinterpreter.py
===================================================================
--- Zope3/trunk/src/zope/tal/tests/test_talinterpreter.py	2004-06-11 18:14:50 UTC (rev 25363)
+++ Zope3/trunk/src/zope/tal/tests/test_talinterpreter.py	2004-06-11 18:22:00 UTC (rev 25364)
@@ -410,15 +410,11 @@
         test_cases = [
             '@some text',
             '\n',
-            '<!DOCTYPE ...>@some text',
-            ' <!DOCTYPE ...>@some text',
-            '\n<!DOCTYPE ...>@some text',
-            '<!DOCTYPE ...',
-            '<?xml ...>@some text',
-            ' <?xml ...>@some text',
-            '\n<?xml ...>@some text',
+            '<?xml ...?>@some text',
+            ' <?xml ...?>@some text',
+            '\n<?xml ...?>@some text',
             '<?xml ...',
-            '<?xml ...?>\n<!DOCTYPE ...>@some text',
+            '<?xml ...?>@\n<!DOCTYPE ...>some text',
         ]
         for output in test_cases:
             input = output.replace('@', '')




More information about the Zope3-Checkins mailing list