[Zope-Checkins]
SVN: Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/
refactored code to extract the encoding from the XML preamble
and the charset
Andreas Jung
andreas at andreas-jung.com
Mon Dec 18 04:30:10 EST 2006
Log message for revision 71585:
refactored code to extract the encoding from the XML preamble and the charset
from a <meta http-equiv..> tag
Changed:
U Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py
A Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/utils.py
-=-
Modified: Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py
===================================================================
--- Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py 2006-12-18 09:20:36 UTC (rev 71584)
+++ Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py 2006-12-18 09:30:09 UTC (rev 71585)
@@ -16,6 +16,7 @@
from Testing.makerequest import makerequest
from Testing.ZopeTestCase import ZopeTestCase, installProduct
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate, manage_addPageTemplate
+from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv
ascii_str = '<html><body>hello world</body></html>'
@@ -59,7 +60,24 @@
installProduct('PageTemplates')
+class ZPTUtilsTests(unittest.TestCase):
+ def testExtractEncodingFromXMLPreamble(self):
+ extract = encodingFromXMLPreamble
+ self.assertEqual(extract('<?xml version="1.0" ?>'), 'utf-8')
+ self.assertEqual(extract('<?xml encoding="utf-8" version="1.0" ?>'), 'utf-8')
+ self.assertEqual(extract('<?xml encoding="UTF-8" version="1.0" ?>'), 'utf-8')
+ self.assertEqual(extract('<?xml encoding="ISO-8859-15" version="1.0" ?>'), 'iso-8859-15')
+ self.assertEqual(extract('<?xml encoding="iso-8859-15" version="1.0" ?>'), 'iso-8859-15')
+
+ def testExtractCharsetFromMetaHTTPEquivTag(self):
+ extract = charsetFromMetaEquiv
+ self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=UTF-8"></html>'), 'utf-8')
+ self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=iso-8859-15"></html>'), 'iso-8859-15')
+ self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html"></html>'), None)
+ self.assertEqual(extract('<html>...<html>'), None)
+
+
class ZopePageTemplateFileTests(ZopeTestCase):
def testPT_RenderWithAscii(self):
@@ -67,7 +85,7 @@
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
- self.assertEqual(result.startswith(ascii_str), True)
+ self.assertEqual(result.encode('ascii').startswith(ascii_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def testPT_RenderWithISO885915(self):
@@ -75,15 +93,16 @@
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
- self.assertEqual(result.startswith(iso885915_str), True)
+ self.assertEqual(result.encode('iso-8859-15').startswith(iso885915_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def testPT_RenderWithUTF8(self):
+ import pdb; pdb.set_trace()
manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8')
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
- self.assertEqual(result.startswith(utf8_str), True)
+ self.assertEqual(result.encode('utf-8').startswith(utf8_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def _createZPT(self):
@@ -243,9 +262,11 @@
def test_suite():
- suite = unittest.makeSuite(ZPTRegressions)
- suite.addTests(unittest.makeSuite(ZPTMacros))
- suite.addTests(unittest.makeSuite(ZopePageTemplateFileTests))
+# suite = unittest.makeSuite(ZPTRegressions)
+ suite = unittest.makeSuite(ZPTUtilsTests)
+# suite.addTests(unittest.makeSuite(ZPTUtilsTests))
+# suite.addTests(unittest.makeSuite(ZPTMacros))
+# suite.addTests(unittest.makeSuite(ZopePageTemplateFileTests))
return suite
if __name__ == '__main__':
Added: Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/utils.py
===================================================================
--- Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/utils.py 2006-12-18 09:20:36 UTC (rev 71584)
+++ Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/utils.py 2006-12-18 09:30:09 UTC (rev 71585)
@@ -0,0 +1,59 @@
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+
+""" Some helper methods
+
+$Id: ZopePageTemplate.py 71579 2006-12-17 20:26:10Z andreasjung $
+"""
+
+import re
+
+
+xml_preamble_reg = re.compile(r'^<\?xml.*?encoding="(.*?)".*?\?>', re.M)
+http_equiv_reg = re.compile(r'(<meta.*?http\-equiv.*?content-type.*?>)', re.I|re.M|re.S)
+http_equiv_reg2 = re.compile(r'charset.*?=.*?(?P<charset>[\w\-]*)', re.I|re.M|re.S)
+
+def encodingFromXMLPreamble(xml):
+ """ Extract the encoding from a xml preamble.
+ Return 'utf-8' if not available
+ """
+
+ mo = xml_preamble_reg.match(xml)
+
+ if not mo:
+ return 'utf-8'
+ else:
+ return mo.group(1).lower()
+
+
+def charsetFromMetaEquiv(html):
+ """ Return the value of the 'charset' from a html document
+ containing <meta http-equiv="content-type" content="text/html; charset=utf8>.
+ Returns None, if not available.
+ """
+
+ # first check for the <meta...> tag
+ mo = http_equiv_reg.search(html)
+ if mo:
+ # extract the meta tag
+ meta = mo.group(1)
+
+ # search for the charset value
+ mo = http_equiv_reg2.search(meta)
+ if mo:
+ # return charset
+ return mo.group(1).lower()
+
+ return None
+
+
More information about the Zope-Checkins
mailing list