[Zope-Checkins] SVN: Zope/trunk/ Merging

Wed Dec 20 11:54:07 EST 2006

Log message for revision 71628:
  Merging 
  
  /Zope/branches/ajung-zpt-encoding-fixes
  
  This branch fixes several encoding issues with the ZopePageTemplate
  implementation, some webdav issues and now uses unicode internally
  for ZPT instances (but not for the PageTemplate(File) classes)
                           
  

Changed:
  U   Zope/trunk/doc/CHANGES.txt
  U   Zope/trunk/lib/python/Products/PageTemplates/PageTemplateFile.py
  U   Zope/trunk/lib/python/Products/PageTemplates/ZopePageTemplate.py
  U   Zope/trunk/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py
  A   Zope/trunk/lib/python/Products/PageTemplates/utils.py
  U   Zope/trunk/lib/python/Products/PageTemplates/www/default.html
  U   Zope/trunk/lib/python/Products/PageTemplates/www/ptEdit.zpt

-=-
Modified: Zope/trunk/doc/CHANGES.txt
===================================================================

--- Zope/trunk/doc/CHANGES.txt	2006-12-20 16:46:11 UTC (rev 71627)
+++ Zope/trunk/doc/CHANGES.txt	2006-12-20 16:54:06 UTC (rev 71628)
@@ -39,6 +39,8 @@
         until the late startup phase. This in in particular useful when running
         Zope behind a loadbalancer (patch by Patrick Gerken).
 
+      - the ZopePageTemplate implementation now uses unicode internally.        
+
     Bugs Fixed
 
       - Collector #2191: extended DateTime parser for better support

Modified: Zope/trunk/lib/python/Products/PageTemplates/PageTemplateFile.py
===================================================================
--- Zope/trunk/lib/python/Products/PageTemplates/PageTemplateFile.py	2006-12-20 16:46:11 UTC (rev 71627)
+++ Zope/trunk/lib/python/Products/PageTemplates/PageTemplateFile.py	2006-12-20 16:54:06 UTC (rev 71628)
@@ -32,6 +32,14 @@
 LOG = getLogger('PageTemplateFile')
 
 def guess_type(filename, text):
+
+    # check for XML ourself since guess_content_type can't
+    # detect text/xml  if 'filename' won't end with .xml
+    # XXX: fix this in zope.contenttype
+
+    if text.startswith('<?xml'):
+        return 'text/xml'
+
     content_type, dummy = guess_content_type(filename, text)
     if content_type in ('text/html', 'text/xml'):
         return content_type

Modified: Zope/trunk/lib/python/Products/PageTemplates/ZopePageTemplate.py
===================================================================
--- Zope/trunk/lib/python/Products/PageTemplates/ZopePageTemplate.py	2006-12-20 16:46:11 UTC (rev 71627)
+++ Zope/trunk/lib/python/Products/PageTemplates/ZopePageTemplate.py	2006-12-20 16:54:06 UTC (rev 71628)
@@ -40,20 +40,14 @@
 from Products.PageTemplates.PageTemplateFile import guess_type
 from Products.PageTemplates.Expressions import SecureModuleImporter
 
-# regular expression to extract the encoding from the XML preamble
-encoding_reg = re.compile('<\?xml.*?encoding="(.*?)".*?\?>', re.M)
+from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv
+            
 
 preferred_encodings = ['utf-8', 'iso-8859-15']
 if os.environ.has_key('ZPT_PREFERRED_ENCODING'):
     preferred_encodings.insert(0, os.environ['ZPT_PREFERRED_ENCODING'])
+  
 
-def sniffEncoding(text, default_encoding='utf-8'):
-    """Try to determine the encoding from html or xml"""
-    if text.startswith('<?xml'):
-        mo = encoding_reg.search(text)
-        if mo:
-            return mo.group(1)
-    return default_encoding
 
 class Src(Acquisition.Explicit):
     """ I am scary code """
@@ -79,7 +73,6 @@
 
     func_defaults = None
     func_code = FuncCode((), 0)
-    strict = False
 
     _default_bindings = {'name_subpath': 'traverse_subpath'}
     _default_content_fn = os.path.join(package_home(globals()),
@@ -108,22 +101,68 @@
     security.declareProtected(view_management_screens,
                               'read', 'ZScriptHTML_tryForm')
 
-    def __init__(self, id, text=None, content_type=None, encoding='utf-8',
-                 strict=False):
+    def __init__(self, id, text=None, content_type=None, strict=True, output_encoding='utf-8'):
         self.id = id
         self.expand = 0                                                               
-        self.strict = strict
         self.ZBindings_edit(self._default_bindings)
+        self.output_encoding = output_encoding
+
+        # default content
         if not text:
             text = open(self._default_content_fn).read()
-            encoding = 'utf-8'
             content_type = 'text/html'
-        self.pt_edit(text, content_type, encoding)
+        self.pt_edit(text, content_type)
 
+
     security.declareProtected(change_page_templates, 'pt_edit')
-    def pt_edit(self, text, content_type, encoding='utf-8'):
+    def pt_edit(self, text, content_type, keep_output_encoding=False):
+
         text = text.strip()
-        if self.strict and not isinstance(text, unicode):
+        
+        is_unicode = isinstance(text, unicode)
+        encoding = None
+        output_encoding = None
+
+        if content_type == 'text/xml':
+
+            if is_unicode:
+                encoding = None
+                output_encoding = 'utf-8'
+            else:
+                encoding = encodingFromXMLPreamble(text)
+                output_encoding = 'utf-8'
+            
+
+        elif content_type == 'text/html':
+
+            charset = charsetFromMetaEquiv(text)
+
+            if is_unicode:
+
+                if charset:
+                    encoding = None
+                    output_encoding = charset
+                else:
+                    encoding = None
+                    output_encoding = 'iso-8859-15'
+
+            else:
+
+                if charset:
+                    encoding = charset
+                    output_encoding = charset
+                else:
+                    encoding = 'iso-8859-15'
+                    output_encoding = 'iso-8859-15'
+
+        else:
+            raise ValueError('Unsupported content-type %s' % content_type)
+
+        # for content updated through WebDAV, FTP 
+        if not keep_output_encoding:
+            self.output_encoding = output_encoding
+
+        if not is_unicode:
             text = unicode(text, encoding)
 
         self.ZCacheable_invalidate()
@@ -137,16 +176,16 @@
     source_dot_xml = Src()
 
     security.declareProtected(change_page_templates, 'pt_editAction')
-    def pt_editAction(self, REQUEST, title, text, content_type, encoding, expand):
+    def pt_editAction(self, REQUEST, title, text, content_type, expand):
         """Change the title and document."""
 
         if self.wl_isLocked():
             raise ResourceLockedError("File is locked via WebDAV")
 
         self.expand = expand
-        self.pt_setTitle(title, encoding)
+        self.pt_setTitle(title, self.output_encoding)
 
-        self.pt_edit(text, content_type, encoding)
+        self.pt_edit(text, content_type, True)
         REQUEST.set('text', self.read()) # May not equal 'text'!
         REQUEST.set('title', self.title)
         message = "Saved changes."
@@ -157,7 +196,7 @@
 
     security.declareProtected(change_page_templates, 'pt_setTitle')
     def pt_setTitle(self, title, encoding='utf-8'):
-        if self.strict and not isinstance(title, unicode):
+        if not isinstance(title, unicode):
             title = unicode(title, encoding)
         self._setPropValue('title', title)
 
@@ -186,8 +225,7 @@
         if not content_type in ('text/html', 'text/xml'):
             raise ValueError('Unsupported mimetype: %s' % content_type)
 
-        encoding = sniffEncoding(text, encoding)
-        self.pt_edit(text, content_type, encoding)
+        self.pt_edit(text, content_type)
         return self.pt_editForm(manage_tabs_message='Saved changes')
 
     security.declareProtected(change_page_templates, 'pt_changePrefs')
@@ -240,6 +278,8 @@
         return c
 
     def write(self, text):
+        if not isinstance(text, unicode):
+            raise TypeError("'text' parameter must be unicode")
         self.ZCacheable_invalidate()
         ZopePageTemplate.inheritedAttribute('write')(self, text)
 
@@ -291,8 +331,9 @@
         """ Handle HTTP PUT requests """
         self.dav__init(REQUEST, RESPONSE)
         self.dav__simpleifhandler(REQUEST, RESPONSE, refresh=1)
-        ## XXX this should be unicode or we must pass an encoding
-        self.pt_edit(REQUEST.get('BODY', ''))
+        text = REQUEST.get('BODY', '')
+        content_type = guess_type('', text) 
+        self.pt_edit(text, content_type)
         RESPONSE.setStatus(204)
         return RESPONSE
 
@@ -303,8 +344,8 @@
     security.declareProtected(ftp_access, 'manage_FTPget')
     def manage_FTPget(self):
         "Get source for FTP download"
-        self.REQUEST.RESPONSE.setHeader('Content-Type', self.content_type)
-        return self.read()
+        result = self.pt_render()
+        return result.encode(self.output_encoding)
 
     security.declareProtected(view_management_screens, 'html')
     def html(self):
@@ -353,6 +394,12 @@
             # acquisition context, so we don't know where it is. :-(
             return None
 
+    def pt_render(self, source=False, extra_context={}):
+        result = PageTemplate.pt_render(self, source, extra_context)
+        assert isinstance(result, unicode)
+        return result
+
+
     def wl_isLocked(self):
         return 0
 
@@ -407,8 +454,8 @@
             content_type = headers['content_type']
         else:
             content_type = guess_type(filename, text) 
-        encoding = sniffEncoding(text, encoding)
 
+
     else:
         if hasattr(text, 'read'):
             filename = getattr(text, 'filename', '')
@@ -418,9 +465,14 @@
                 content_type = headers['content_type']
             else:
                 content_type = guess_type(filename, text) 
-        encoding = sniffEncoding(text, encoding)
 
-    zpt = ZopePageTemplate(id, text, content_type, encoding)
+    # ensure that we pass unicode to the constructor to
+    # avoid further hassles with pt_edit()
+
+    if not isinstance(text, unicode):
+        text = unicode(text, encoding)
+
+    zpt = ZopePageTemplate(id, text, content_type, output_encoding=encoding)
     zpt.pt_setTitle(title, encoding)
     self._setObject(id, zpt)
     zpt = getattr(self, id)

Modified: Zope/trunk/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py
===================================================================
--- Zope/trunk/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py	2006-12-20 16:46:11 UTC (rev 71627)
+++ Zope/trunk/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py	2006-12-20 16:54:06 UTC (rev 71628)
@@ -1,3 +1,5 @@
+# -*- encoding: iso-8859-15 -*-
+
 """ZopePageTemplate regression tests.
 
 Ensures that adding a page template works correctly.
@@ -6,14 +8,163 @@
 
 """
 
-
 import unittest
 import Zope2
 import transaction
 import zope.component.testing
 from zope.traversing.adapters import DefaultTraversable
 from Testing.makerequest import makerequest
+from Testing.ZopeTestCase import ZopeTestCase, installProduct
+from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate, manage_addPageTemplate
+from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv
 
+
+ascii_str = '<html><body>hello world</body></html>'
+iso885915_str = '<html><body>üöäÜÖÄß</body></html>'
+utf8_str = unicode(iso885915_str, 'iso-8859-15').encode('utf-8')
+
+xml_template = '''<?xml vesion="1.0" encoding="%s"?>
+<foo>
+üöäÜÖÄß
+</foo>
+'''
+
+xml_iso_8859_15 = xml_template % 'iso-8859-15'
+xml_utf8 = unicode(xml_template, 'iso-8859-15').encode('utf-8') % 'utf-8'
+
+html_template_w_header = '''
+<html>
+    <head>
+        <META http-equiv="content-type" content="text/html; charset=%s">
+    </hed>
+    <body>
+    test üöäÜÖÄß
+    </body>
+</html>
+'''
+
+html_iso_8859_15_w_header = html_template_w_header % 'iso-8859-15'
+html_utf8_w_header = unicode(html_template_w_header, 'iso-8859-15').encode('utf-8') % 'utf-8'
+
+html_template_wo_header = '''
+<html>
+    <body>
+    test üöäÜÖÄß
+    </body>
+</html>
+'''
+
+html_iso_8859_15_wo_header = html_template_wo_header 
+html_utf8_wo_header = unicode(html_template_wo_header, 'iso-8859-15').encode('utf-8') 
+
+
+installProduct('PageTemplates')
+
+class ZPTUtilsTests(unittest.TestCase):
+
+    def testExtractEncodingFromXMLPreamble(self):
+        extract = encodingFromXMLPreamble
+        self.assertEqual(extract('<?xml version="1.0" ?>'), 'utf-8')
+        self.assertEqual(extract('<?xml encoding="utf-8" version="1.0" ?>'), 'utf-8')
+        self.assertEqual(extract('<?xml encoding="UTF-8" version="1.0" ?>'), 'utf-8')
+        self.assertEqual(extract('<?xml encoding="ISO-8859-15" version="1.0" ?>'), 'iso-8859-15')
+        self.assertEqual(extract('<?xml encoding="iso-8859-15" version="1.0" ?>'), 'iso-8859-15')
+
+    def testExtractCharsetFromMetaHTTPEquivTag(self):
+        extract = charsetFromMetaEquiv
+        self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=UTF-8"></html>'), 'utf-8')
+        self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=iso-8859-15"></html>'), 'iso-8859-15')
+        self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html"></html>'), None)
+        self.assertEqual(extract('<html>...<html>'), None)
+        
+
+class ZopePageTemplateFileTests(ZopeTestCase):
+
+    def testPT_RenderWithAscii(self):
+        manage_addPageTemplate(self.app, 'test', text=ascii_str, encoding='ascii')
+        zpt = self.app['test']
+        result = zpt.pt_render()
+        # use startswith() because the renderer appends a trailing \n
+        self.assertEqual(result.encode('ascii').startswith(ascii_str), True)
+        self.assertEqual(zpt.output_encoding, 'iso-8859-15')
+
+    def testPT_RenderWithISO885915(self):
+        manage_addPageTemplate(self.app, 'test', text=iso885915_str, encoding='iso-8859-15')
+        zpt = self.app['test']
+        result = zpt.pt_render()
+        # use startswith() because the renderer appends a trailing \n
+        self.assertEqual(result.encode('iso-8859-15').startswith(iso885915_str), True)
+        self.assertEqual(zpt.output_encoding, 'iso-8859-15')
+
+    def testPT_RenderWithUTF8(self):
+        manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8')
+        zpt = self.app['test']
+        result = zpt.pt_render()
+        # use startswith() because the renderer appends a trailing \n
+        self.assertEqual(result.encode('utf-8').startswith(utf8_str), True)
+        self.assertEqual(zpt.output_encoding, 'iso-8859-15')
+
+    def testWriteAcceptsUnicode(self):
+        manage_addPageTemplate(self.app, 'test', '', encoding='utf-8')
+        zpt = self.app['test']
+        s = u'this is unicode'
+        zpt.write(s)
+        self.assertEqual(zpt.read(), s)
+        self.assertEqual(isinstance(zpt.read(), unicode), True)
+
+    def testWriteWontAcceptsNonUnicode(self):
+        manage_addPageTemplate(self.app, 'test', '', encoding='utf-8')
+        zpt = self.app['test']
+        self.assertRaises(TypeError, zpt.write, 'this is not unicode')
+
+
+    def _createZPT(self):
+        manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8')
+        zpt = self.app['test']
+        return zpt
+
+    def _makePUTRequest(self, body):
+        return {'BODY' : body}
+
+    def _put(self, text):
+        zpt = self._createZPT()
+        REQUEST = self.app.REQUEST
+        REQUEST.set('BODY', text)
+        zpt.PUT(REQUEST, REQUEST.RESPONSE)
+        return zpt
+
+    def testPutHTMLIso8859_15WithCharsetInfo(self):
+        zpt = self._put(html_iso_8859_15_w_header)
+        self.assertEqual(zpt.output_encoding, 'iso-8859-15')
+        self.assertEqual(zpt.content_type, 'text/html')
+
+    def testPutHTMLUTF8_WithCharsetInfo(self):
+        zpt = self._put(html_utf8_w_header)
+        self.assertEqual(zpt.output_encoding, 'utf-8')
+        self.assertEqual(zpt.content_type, 'text/html')
+
+    def testPutHTMLIso8859_15WithoutCharsetInfo(self):
+        zpt = self._put(html_iso_8859_15_wo_header)
+        self.assertEqual(zpt.output_encoding, 'iso-8859-15')
+        self.assertEqual(zpt.content_type, 'text/html')
+
+    def testPutHTMLUTF8_WithoutCharsetInfo(self):
+        zpt = self._put(html_utf8_wo_header)
+        self.assertEqual(zpt.output_encoding, 'iso-8859-15')
+        self.assertEqual(zpt.content_type, 'text/html')
+
+    def testPutXMLIso8859_15(self):
+        """ XML: use always UTF-8 als output encoding """
+        zpt = self._put(xml_iso_8859_15)
+        self.assertEqual(zpt.output_encoding, 'utf-8')
+        self.assertEqual(zpt.content_type, 'text/xml')
+
+    def testPutXMLUTF8(self):
+        """ XML: use always UTF-8 als output encoding """
+        zpt = self._put(xml_utf8)
+        self.assertEqual(zpt.output_encoding, 'utf-8')
+        self.assertEqual(zpt.content_type, 'text/xml')
+
 class ZPTRegressions(unittest.TestCase):
 
     def setUp(self):
@@ -58,14 +209,7 @@
         pt = self.app.pt1
         self.assertEqual(pt.document_src(), self.text)
 
-    def test_BBB_for_strict_attribute(self):
-        # Collector 2213:  old templates don't have 'strict' attribute.
-        from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
-        zpt = ZopePageTemplate('issue_2213')
-        del zpt.strict  # simulate old templates
-        self.assertEqual(zpt.strict, False)
 
-
 class ZPTMacros(zope.component.testing.PlacelessSetup, unittest.TestCase):
 
     def setUp(self):
@@ -132,7 +276,9 @@
        
 def test_suite():
     suite = unittest.makeSuite(ZPTRegressions)
+    suite.addTests(unittest.makeSuite(ZPTUtilsTests))
     suite.addTests(unittest.makeSuite(ZPTMacros))
+    suite.addTests(unittest.makeSuite(ZopePageTemplateFileTests))
     return suite
 
 if __name__ == '__main__':

Copied: Zope/trunk/lib/python/Products/PageTemplates/utils.py (from rev 71627, Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/utils.py)

Modified: Zope/trunk/lib/python/Products/PageTemplates/www/default.html
===================================================================
--- Zope/trunk/lib/python/Products/PageTemplates/www/default.html	2006-12-20 16:46:11 UTC (rev 71627)
+++ Zope/trunk/lib/python/Products/PageTemplates/www/default.html	2006-12-20 16:54:06 UTC (rev 71628)
@@ -1,6 +1,7 @@
 <html>
   <head>
     <title tal:content="template/title">The title</title>
+    <meta http-equiv="content-type" content="text/html;charset=utf-8">
   </head>
   <body>
     

Modified: Zope/trunk/lib/python/Products/PageTemplates/www/ptEdit.zpt
===================================================================
--- Zope/trunk/lib/python/Products/PageTemplates/www/ptEdit.zpt	2006-12-20 16:46:11 UTC (rev 71627)
+++ Zope/trunk/lib/python/Products/PageTemplates/www/ptEdit.zpt	2006-12-20 16:54:06 UTC (rev 71628)
@@ -1,4 +1,4 @@
-<h1 tal:replace="structure python:context.manage_page_header(management_page_charset='utf-8')">Header</h1>
+<h1 tal:replace="structure python:context.manage_page_header(management_page_charset=context.output_encoding)">Header</h1>
 <h2 tal:define="manage_tabs_message options/manage_tabs_message | nothing"
     tal:replace="structure context/manage_tabs">Tabs</h2>
 
@@ -33,7 +33,7 @@
        tal:content="python:context.bobobase_modification_time().strftime('%Y-%m-%d %I:%M %p')">1/1/2000
       </div>
     </td>
-    <td align="left" valign="top" colspan="2">
+    <td align="left" valign="top" colspan="2" rowspan="2">
       <a href="source.html" tal:condition="context/html">Browse HTML source</a>
       <a href="source.xml" tal:condition="not:context/html">Browse XML source</a>
       <br />
@@ -44,6 +44,17 @@
     </td>
   </tr>
 
+  <tr>
+    <td align="left" valign="middle">
+      <div class="form-label">Output encoding</div>
+    </td>
+    <td align="left" valign="middle">
+      <div class="form-text" 
+           tal:content="context/output_encoding"
+      />
+    </td>
+  </tr>
+
   <tr tal:define="errors context/pt_errors" tal:condition="errors">
     <tal:block define="global body python:context.document_src({'raw':1})" />
     <td align="left" valign="middle" class="form-label">Errors</td>