[Zope-Checkins] SVN: Zope/trunk/ Merging
Andreas Jung
andreas at andreas-jung.com
Wed Dec 20 11:54:07 EST 2006
Log message for revision 71628:
Merging
/Zope/branches/ajung-zpt-encoding-fixes
This branch fixes several encoding issues with the ZopePageTemplate
implementation, some webdav issues and now uses unicode internally
for ZPT instances (but not for the PageTemplate(File) classes)
Changed:
U Zope/trunk/doc/CHANGES.txt
U Zope/trunk/lib/python/Products/PageTemplates/PageTemplateFile.py
U Zope/trunk/lib/python/Products/PageTemplates/ZopePageTemplate.py
U Zope/trunk/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py
A Zope/trunk/lib/python/Products/PageTemplates/utils.py
U Zope/trunk/lib/python/Products/PageTemplates/www/default.html
U Zope/trunk/lib/python/Products/PageTemplates/www/ptEdit.zpt
-=-
Modified: Zope/trunk/doc/CHANGES.txt
===================================================================
--- Zope/trunk/doc/CHANGES.txt 2006-12-20 16:46:11 UTC (rev 71627)
+++ Zope/trunk/doc/CHANGES.txt 2006-12-20 16:54:06 UTC (rev 71628)
@@ -39,6 +39,8 @@
until the late startup phase. This in in particular useful when running
Zope behind a loadbalancer (patch by Patrick Gerken).
+ - the ZopePageTemplate implementation now uses unicode internally.
+
Bugs Fixed
- Collector #2191: extended DateTime parser for better support
Modified: Zope/trunk/lib/python/Products/PageTemplates/PageTemplateFile.py
===================================================================
--- Zope/trunk/lib/python/Products/PageTemplates/PageTemplateFile.py 2006-12-20 16:46:11 UTC (rev 71627)
+++ Zope/trunk/lib/python/Products/PageTemplates/PageTemplateFile.py 2006-12-20 16:54:06 UTC (rev 71628)
@@ -32,6 +32,14 @@
LOG = getLogger('PageTemplateFile')
def guess_type(filename, text):
+
+ # check for XML ourself since guess_content_type can't
+ # detect text/xml if 'filename' won't end with .xml
+ # XXX: fix this in zope.contenttype
+
+ if text.startswith('<?xml'):
+ return 'text/xml'
+
content_type, dummy = guess_content_type(filename, text)
if content_type in ('text/html', 'text/xml'):
return content_type
Modified: Zope/trunk/lib/python/Products/PageTemplates/ZopePageTemplate.py
===================================================================
--- Zope/trunk/lib/python/Products/PageTemplates/ZopePageTemplate.py 2006-12-20 16:46:11 UTC (rev 71627)
+++ Zope/trunk/lib/python/Products/PageTemplates/ZopePageTemplate.py 2006-12-20 16:54:06 UTC (rev 71628)
@@ -40,20 +40,14 @@
from Products.PageTemplates.PageTemplateFile import guess_type
from Products.PageTemplates.Expressions import SecureModuleImporter
-# regular expression to extract the encoding from the XML preamble
-encoding_reg = re.compile('<\?xml.*?encoding="(.*?)".*?\?>', re.M)
+from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv
+
preferred_encodings = ['utf-8', 'iso-8859-15']
if os.environ.has_key('ZPT_PREFERRED_ENCODING'):
preferred_encodings.insert(0, os.environ['ZPT_PREFERRED_ENCODING'])
+
-def sniffEncoding(text, default_encoding='utf-8'):
- """Try to determine the encoding from html or xml"""
- if text.startswith('<?xml'):
- mo = encoding_reg.search(text)
- if mo:
- return mo.group(1)
- return default_encoding
class Src(Acquisition.Explicit):
""" I am scary code """
@@ -79,7 +73,6 @@
func_defaults = None
func_code = FuncCode((), 0)
- strict = False
_default_bindings = {'name_subpath': 'traverse_subpath'}
_default_content_fn = os.path.join(package_home(globals()),
@@ -108,22 +101,68 @@
security.declareProtected(view_management_screens,
'read', 'ZScriptHTML_tryForm')
- def __init__(self, id, text=None, content_type=None, encoding='utf-8',
- strict=False):
+ def __init__(self, id, text=None, content_type=None, strict=True, output_encoding='utf-8'):
self.id = id
self.expand = 0
- self.strict = strict
self.ZBindings_edit(self._default_bindings)
+ self.output_encoding = output_encoding
+
+ # default content
if not text:
text = open(self._default_content_fn).read()
- encoding = 'utf-8'
content_type = 'text/html'
- self.pt_edit(text, content_type, encoding)
+ self.pt_edit(text, content_type)
+
security.declareProtected(change_page_templates, 'pt_edit')
- def pt_edit(self, text, content_type, encoding='utf-8'):
+ def pt_edit(self, text, content_type, keep_output_encoding=False):
+
text = text.strip()
- if self.strict and not isinstance(text, unicode):
+
+ is_unicode = isinstance(text, unicode)
+ encoding = None
+ output_encoding = None
+
+ if content_type == 'text/xml':
+
+ if is_unicode:
+ encoding = None
+ output_encoding = 'utf-8'
+ else:
+ encoding = encodingFromXMLPreamble(text)
+ output_encoding = 'utf-8'
+
+
+ elif content_type == 'text/html':
+
+ charset = charsetFromMetaEquiv(text)
+
+ if is_unicode:
+
+ if charset:
+ encoding = None
+ output_encoding = charset
+ else:
+ encoding = None
+ output_encoding = 'iso-8859-15'
+
+ else:
+
+ if charset:
+ encoding = charset
+ output_encoding = charset
+ else:
+ encoding = 'iso-8859-15'
+ output_encoding = 'iso-8859-15'
+
+ else:
+ raise ValueError('Unsupported content-type %s' % content_type)
+
+ # for content updated through WebDAV, FTP
+ if not keep_output_encoding:
+ self.output_encoding = output_encoding
+
+ if not is_unicode:
text = unicode(text, encoding)
self.ZCacheable_invalidate()
@@ -137,16 +176,16 @@
source_dot_xml = Src()
security.declareProtected(change_page_templates, 'pt_editAction')
- def pt_editAction(self, REQUEST, title, text, content_type, encoding, expand):
+ def pt_editAction(self, REQUEST, title, text, content_type, expand):
"""Change the title and document."""
if self.wl_isLocked():
raise ResourceLockedError("File is locked via WebDAV")
self.expand = expand
- self.pt_setTitle(title, encoding)
+ self.pt_setTitle(title, self.output_encoding)
- self.pt_edit(text, content_type, encoding)
+ self.pt_edit(text, content_type, True)
REQUEST.set('text', self.read()) # May not equal 'text'!
REQUEST.set('title', self.title)
message = "Saved changes."
@@ -157,7 +196,7 @@
security.declareProtected(change_page_templates, 'pt_setTitle')
def pt_setTitle(self, title, encoding='utf-8'):
- if self.strict and not isinstance(title, unicode):
+ if not isinstance(title, unicode):
title = unicode(title, encoding)
self._setPropValue('title', title)
@@ -186,8 +225,7 @@
if not content_type in ('text/html', 'text/xml'):
raise ValueError('Unsupported mimetype: %s' % content_type)
- encoding = sniffEncoding(text, encoding)
- self.pt_edit(text, content_type, encoding)
+ self.pt_edit(text, content_type)
return self.pt_editForm(manage_tabs_message='Saved changes')
security.declareProtected(change_page_templates, 'pt_changePrefs')
@@ -240,6 +278,8 @@
return c
def write(self, text):
+ if not isinstance(text, unicode):
+ raise TypeError("'text' parameter must be unicode")
self.ZCacheable_invalidate()
ZopePageTemplate.inheritedAttribute('write')(self, text)
@@ -291,8 +331,9 @@
""" Handle HTTP PUT requests """
self.dav__init(REQUEST, RESPONSE)
self.dav__simpleifhandler(REQUEST, RESPONSE, refresh=1)
- ## XXX this should be unicode or we must pass an encoding
- self.pt_edit(REQUEST.get('BODY', ''))
+ text = REQUEST.get('BODY', '')
+ content_type = guess_type('', text)
+ self.pt_edit(text, content_type)
RESPONSE.setStatus(204)
return RESPONSE
@@ -303,8 +344,8 @@
security.declareProtected(ftp_access, 'manage_FTPget')
def manage_FTPget(self):
"Get source for FTP download"
- self.REQUEST.RESPONSE.setHeader('Content-Type', self.content_type)
- return self.read()
+ result = self.pt_render()
+ return result.encode(self.output_encoding)
security.declareProtected(view_management_screens, 'html')
def html(self):
@@ -353,6 +394,12 @@
# acquisition context, so we don't know where it is. :-(
return None
+ def pt_render(self, source=False, extra_context={}):
+ result = PageTemplate.pt_render(self, source, extra_context)
+ assert isinstance(result, unicode)
+ return result
+
+
def wl_isLocked(self):
return 0
@@ -407,8 +454,8 @@
content_type = headers['content_type']
else:
content_type = guess_type(filename, text)
- encoding = sniffEncoding(text, encoding)
+
else:
if hasattr(text, 'read'):
filename = getattr(text, 'filename', '')
@@ -418,9 +465,14 @@
content_type = headers['content_type']
else:
content_type = guess_type(filename, text)
- encoding = sniffEncoding(text, encoding)
- zpt = ZopePageTemplate(id, text, content_type, encoding)
+ # ensure that we pass unicode to the constructor to
+ # avoid further hassles with pt_edit()
+
+ if not isinstance(text, unicode):
+ text = unicode(text, encoding)
+
+ zpt = ZopePageTemplate(id, text, content_type, output_encoding=encoding)
zpt.pt_setTitle(title, encoding)
self._setObject(id, zpt)
zpt = getattr(self, id)
Modified: Zope/trunk/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py
===================================================================
--- Zope/trunk/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py 2006-12-20 16:46:11 UTC (rev 71627)
+++ Zope/trunk/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py 2006-12-20 16:54:06 UTC (rev 71628)
@@ -1,3 +1,5 @@
+# -*- encoding: iso-8859-15 -*-
+
"""ZopePageTemplate regression tests.
Ensures that adding a page template works correctly.
@@ -6,14 +8,163 @@
"""
-
import unittest
import Zope2
import transaction
import zope.component.testing
from zope.traversing.adapters import DefaultTraversable
from Testing.makerequest import makerequest
+from Testing.ZopeTestCase import ZopeTestCase, installProduct
+from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate, manage_addPageTemplate
+from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv
+
+ascii_str = '<html><body>hello world</body></html>'
+iso885915_str = '<html><body>üöäÜÖÄß</body></html>'
+utf8_str = unicode(iso885915_str, 'iso-8859-15').encode('utf-8')
+
+xml_template = '''<?xml vesion="1.0" encoding="%s"?>
+<foo>
+üöäÜÖÄß
+</foo>
+'''
+
+xml_iso_8859_15 = xml_template % 'iso-8859-15'
+xml_utf8 = unicode(xml_template, 'iso-8859-15').encode('utf-8') % 'utf-8'
+
+html_template_w_header = '''
+<html>
+ <head>
+ <META http-equiv="content-type" content="text/html; charset=%s">
+ </hed>
+ <body>
+ test üöäÜÖÄß
+ </body>
+</html>
+'''
+
+html_iso_8859_15_w_header = html_template_w_header % 'iso-8859-15'
+html_utf8_w_header = unicode(html_template_w_header, 'iso-8859-15').encode('utf-8') % 'utf-8'
+
+html_template_wo_header = '''
+<html>
+ <body>
+ test üöäÜÖÄß
+ </body>
+</html>
+'''
+
+html_iso_8859_15_wo_header = html_template_wo_header
+html_utf8_wo_header = unicode(html_template_wo_header, 'iso-8859-15').encode('utf-8')
+
+
+installProduct('PageTemplates')
+
+class ZPTUtilsTests(unittest.TestCase):
+
+ def testExtractEncodingFromXMLPreamble(self):
+ extract = encodingFromXMLPreamble
+ self.assertEqual(extract('<?xml version="1.0" ?>'), 'utf-8')
+ self.assertEqual(extract('<?xml encoding="utf-8" version="1.0" ?>'), 'utf-8')
+ self.assertEqual(extract('<?xml encoding="UTF-8" version="1.0" ?>'), 'utf-8')
+ self.assertEqual(extract('<?xml encoding="ISO-8859-15" version="1.0" ?>'), 'iso-8859-15')
+ self.assertEqual(extract('<?xml encoding="iso-8859-15" version="1.0" ?>'), 'iso-8859-15')
+
+ def testExtractCharsetFromMetaHTTPEquivTag(self):
+ extract = charsetFromMetaEquiv
+ self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=UTF-8"></html>'), 'utf-8')
+ self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=iso-8859-15"></html>'), 'iso-8859-15')
+ self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html"></html>'), None)
+ self.assertEqual(extract('<html>...<html>'), None)
+
+
+class ZopePageTemplateFileTests(ZopeTestCase):
+
+ def testPT_RenderWithAscii(self):
+ manage_addPageTemplate(self.app, 'test', text=ascii_str, encoding='ascii')
+ zpt = self.app['test']
+ result = zpt.pt_render()
+ # use startswith() because the renderer appends a trailing \n
+ self.assertEqual(result.encode('ascii').startswith(ascii_str), True)
+ self.assertEqual(zpt.output_encoding, 'iso-8859-15')
+
+ def testPT_RenderWithISO885915(self):
+ manage_addPageTemplate(self.app, 'test', text=iso885915_str, encoding='iso-8859-15')
+ zpt = self.app['test']
+ result = zpt.pt_render()
+ # use startswith() because the renderer appends a trailing \n
+ self.assertEqual(result.encode('iso-8859-15').startswith(iso885915_str), True)
+ self.assertEqual(zpt.output_encoding, 'iso-8859-15')
+
+ def testPT_RenderWithUTF8(self):
+ manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8')
+ zpt = self.app['test']
+ result = zpt.pt_render()
+ # use startswith() because the renderer appends a trailing \n
+ self.assertEqual(result.encode('utf-8').startswith(utf8_str), True)
+ self.assertEqual(zpt.output_encoding, 'iso-8859-15')
+
+ def testWriteAcceptsUnicode(self):
+ manage_addPageTemplate(self.app, 'test', '', encoding='utf-8')
+ zpt = self.app['test']
+ s = u'this is unicode'
+ zpt.write(s)
+ self.assertEqual(zpt.read(), s)
+ self.assertEqual(isinstance(zpt.read(), unicode), True)
+
+ def testWriteWontAcceptsNonUnicode(self):
+ manage_addPageTemplate(self.app, 'test', '', encoding='utf-8')
+ zpt = self.app['test']
+ self.assertRaises(TypeError, zpt.write, 'this is not unicode')
+
+
+ def _createZPT(self):
+ manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8')
+ zpt = self.app['test']
+ return zpt
+
+ def _makePUTRequest(self, body):
+ return {'BODY' : body}
+
+ def _put(self, text):
+ zpt = self._createZPT()
+ REQUEST = self.app.REQUEST
+ REQUEST.set('BODY', text)
+ zpt.PUT(REQUEST, REQUEST.RESPONSE)
+ return zpt
+
+ def testPutHTMLIso8859_15WithCharsetInfo(self):
+ zpt = self._put(html_iso_8859_15_w_header)
+ self.assertEqual(zpt.output_encoding, 'iso-8859-15')
+ self.assertEqual(zpt.content_type, 'text/html')
+
+ def testPutHTMLUTF8_WithCharsetInfo(self):
+ zpt = self._put(html_utf8_w_header)
+ self.assertEqual(zpt.output_encoding, 'utf-8')
+ self.assertEqual(zpt.content_type, 'text/html')
+
+ def testPutHTMLIso8859_15WithoutCharsetInfo(self):
+ zpt = self._put(html_iso_8859_15_wo_header)
+ self.assertEqual(zpt.output_encoding, 'iso-8859-15')
+ self.assertEqual(zpt.content_type, 'text/html')
+
+ def testPutHTMLUTF8_WithoutCharsetInfo(self):
+ zpt = self._put(html_utf8_wo_header)
+ self.assertEqual(zpt.output_encoding, 'iso-8859-15')
+ self.assertEqual(zpt.content_type, 'text/html')
+
+ def testPutXMLIso8859_15(self):
+ """ XML: use always UTF-8 als output encoding """
+ zpt = self._put(xml_iso_8859_15)
+ self.assertEqual(zpt.output_encoding, 'utf-8')
+ self.assertEqual(zpt.content_type, 'text/xml')
+
+ def testPutXMLUTF8(self):
+ """ XML: use always UTF-8 als output encoding """
+ zpt = self._put(xml_utf8)
+ self.assertEqual(zpt.output_encoding, 'utf-8')
+ self.assertEqual(zpt.content_type, 'text/xml')
+
class ZPTRegressions(unittest.TestCase):
def setUp(self):
@@ -58,14 +209,7 @@
pt = self.app.pt1
self.assertEqual(pt.document_src(), self.text)
- def test_BBB_for_strict_attribute(self):
- # Collector 2213: old templates don't have 'strict' attribute.
- from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
- zpt = ZopePageTemplate('issue_2213')
- del zpt.strict # simulate old templates
- self.assertEqual(zpt.strict, False)
-
class ZPTMacros(zope.component.testing.PlacelessSetup, unittest.TestCase):
def setUp(self):
@@ -132,7 +276,9 @@
def test_suite():
suite = unittest.makeSuite(ZPTRegressions)
+ suite.addTests(unittest.makeSuite(ZPTUtilsTests))
suite.addTests(unittest.makeSuite(ZPTMacros))
+ suite.addTests(unittest.makeSuite(ZopePageTemplateFileTests))
return suite
if __name__ == '__main__':
Copied: Zope/trunk/lib/python/Products/PageTemplates/utils.py (from rev 71627, Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/utils.py)
Modified: Zope/trunk/lib/python/Products/PageTemplates/www/default.html
===================================================================
--- Zope/trunk/lib/python/Products/PageTemplates/www/default.html 2006-12-20 16:46:11 UTC (rev 71627)
+++ Zope/trunk/lib/python/Products/PageTemplates/www/default.html 2006-12-20 16:54:06 UTC (rev 71628)
@@ -1,6 +1,7 @@
<html>
<head>
<title tal:content="template/title">The title</title>
+ <meta http-equiv="content-type" content="text/html;charset=utf-8">
</head>
<body>
Modified: Zope/trunk/lib/python/Products/PageTemplates/www/ptEdit.zpt
===================================================================
--- Zope/trunk/lib/python/Products/PageTemplates/www/ptEdit.zpt 2006-12-20 16:46:11 UTC (rev 71627)
+++ Zope/trunk/lib/python/Products/PageTemplates/www/ptEdit.zpt 2006-12-20 16:54:06 UTC (rev 71628)
@@ -1,4 +1,4 @@
-<h1 tal:replace="structure python:context.manage_page_header(management_page_charset='utf-8')">Header</h1>
+<h1 tal:replace="structure python:context.manage_page_header(management_page_charset=context.output_encoding)">Header</h1>
<h2 tal:define="manage_tabs_message options/manage_tabs_message | nothing"
tal:replace="structure context/manage_tabs">Tabs</h2>
@@ -33,7 +33,7 @@
tal:content="python:context.bobobase_modification_time().strftime('%Y-%m-%d %I:%M %p')">1/1/2000
</div>
</td>
- <td align="left" valign="top" colspan="2">
+ <td align="left" valign="top" colspan="2" rowspan="2">
<a href="source.html" tal:condition="context/html">Browse HTML source</a>
<a href="source.xml" tal:condition="not:context/html">Browse XML source</a>
<br />
@@ -44,6 +44,17 @@
</td>
</tr>
+ <tr>
+ <td align="left" valign="middle">
+ <div class="form-label">Output encoding</div>
+ </td>
+ <td align="left" valign="middle">
+ <div class="form-text"
+ tal:content="context/output_encoding"
+ />
+ </td>
+ </tr>
+
<tr tal:define="errors context/pt_errors" tal:condition="errors">
<tal:block define="global body python:context.document_src({'raw':1})" />
<td align="left" valign="middle" class="form-label">Errors</td>
More information about the Zope-Checkins
mailing list