[Checkins] SVN: Products.CMFCore/trunk/Products/CMFCore/ - FSPageTemplate: Change the charset/encoding detection to consider
Jens Vagelpohl
jens at dataflake.org
Sun May 24 04:49:42 EDT 2009
Log message for revision 100323:
- FSPageTemplate: Change the charset/encoding detection to consider
charset specifications in the content type, and replace the
hardcoded Latin-15 fallback with the mechanism used by the
Products.PageTemplate code, which can be influenced by setting
the environment variable "ZPT_PREFERRED_ENCODING"
(https://bugs.launchpad.net/zope-cmf/+bug/322263)
Changed:
U Products.CMFCore/trunk/Products/CMFCore/CHANGES.txt
U Products.CMFCore/trunk/Products/CMFCore/FSPageTemplate.py
A Products.CMFCore/trunk/Products/CMFCore/tests/fake_skins/fake_skin/testPT5.pt
A Products.CMFCore/trunk/Products/CMFCore/tests/fake_skins/fake_skin/testPT5.pt.metadata
U Products.CMFCore/trunk/Products/CMFCore/tests/test_FSPageTemplate.py
-=-
Modified: Products.CMFCore/trunk/Products/CMFCore/CHANGES.txt
===================================================================
--- Products.CMFCore/trunk/Products/CMFCore/CHANGES.txt 2009-05-24 08:36:22 UTC (rev 100322)
+++ Products.CMFCore/trunk/Products/CMFCore/CHANGES.txt 2009-05-24 08:49:42 UTC (rev 100323)
@@ -4,6 +4,13 @@
2.2.0 (unreleased)
------------------
+- FSPageTemplate: Change the charset/encoding detection to consider
+ charset specifications in the content type, and replace the
+ hardcoded Latin-15 fallback with the mechanism used by the
+ Products.PageTemplate code, which can be influenced by setting
+ the environment variable "ZPT_PREFERRED_ENCODING"
+ (https://bugs.launchpad.net/zope-cmf/+bug/322263)
+
- Expose the ZMI manage view of the `_components ` object manager as
a new "Components Folder" tab in the ZMI.
Modified: Products.CMFCore/trunk/Products/CMFCore/FSPageTemplate.py
===================================================================
--- Products.CMFCore/trunk/Products/CMFCore/FSPageTemplate.py 2009-05-24 08:36:22 UTC (rev 100322)
+++ Products.CMFCore/trunk/Products/CMFCore/FSPageTemplate.py 2009-05-24 08:49:42 UTC (rev 100323)
@@ -22,10 +22,11 @@
from App.class_init import InitializeClass
from App.special_dtml import DTMLFile
from Products.PageTemplates.PageTemplate import PageTemplate
+from Products.PageTemplates.utils import charsetFromMetaEquiv
from Products.PageTemplates.utils import encodingFromXMLPreamble
-from Products.PageTemplates.utils import charsetFromMetaEquiv
+from Products.PageTemplates.ZopePageTemplate import preferred_encodings
+from Products.PageTemplates.ZopePageTemplate import Src
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
-from Products.PageTemplates.ZopePageTemplate import Src
from Shared.DC.Scripts.Script import Script
from Products.CMFCore.DirectoryView import registerFileExtension
@@ -39,8 +40,8 @@
from Products.CMFCore.utils import _setCacheHeaders
-
xml_detect_re = re.compile('^\s*<\?xml\s+(?:[^>]*?encoding=["\']([^"\'>]+))?')
+charset_re = re.compile(r'charset.*?=.*?(?P<charset>[\w\-]*)', re.I|re.M|re.S)
_marker = object()
@@ -94,8 +95,10 @@
# type is initialized as text/html by default, so we only
# attempt further detection if the default is encountered.
# One previous misbehavior remains: It is not possible to
- # force a text./html type if parsing detects it as XML.
+ # force a text/html type if parsing detects it as XML.
encoding = None
+ preferred = preferred_encodings[:]
+
if getattr(self, 'content_type', 'text/html') == 'text/html':
xml_info = xml_detect_re.match(data)
if xml_info:
@@ -104,24 +107,42 @@
encoding = xml_info.group(1) or 'utf-8'
self.content_type = 'text/xml; charset=%s' % encoding
+ if not isinstance(data, unicode):
+ if encoding is None:
+ charset = getattr(self, 'charset', None)
- if encoding is None:
- charset = getattr(self, 'charset', None)
- if charset is None:
- if self.content_type.startswith('text/html'):
- charset = charsetFromMetaEquiv(data) or 'iso-8859-15'
- elif self.content_type.startswith('text/xml'):
- charset = encodingFromXMLPreamble(data)
- else:
- raise ValueError('Unsupported content-type: %s'
- % self.content_type)
+ if charset is None:
+ if self.content_type.startswith('text/html'):
+ mo = charset_re.search(self.content_type)
+ if mo:
+ charset = mo.group(1).lower()
- if not isinstance(data, unicode):
- data = unicode(data, charset)
- else:
- if not isinstance(data, unicode):
- data = unicode(data, encoding)
+ if charset is None:
+ charset = charsetFromMetaEquiv(data)
+
+ elif self.content_type.startswith('text/xml'):
+ charset = encodingFromXMLPreamble(data)
+ else:
+ raise ValueError('Unsupported content_type: %s'
+ % self.content_type)
+
+ if charset is not None:
+ preferred.insert(0, charset)
+
+ else:
+ preferred.insert(0, encoding)
+
+ for enc in preferred:
+ try:
+ data = unicode(data, enc)
+ if isinstance(data, unicode):
+ break
+ except UnicodeDecodeError:
+ continue
+ else:
+ data = unicode(data)
+
self.write(data)
Added: Products.CMFCore/trunk/Products/CMFCore/tests/fake_skins/fake_skin/testPT5.pt
===================================================================
(Binary files differ)
Property changes on: Products.CMFCore/trunk/Products/CMFCore/tests/fake_skins/fake_skin/testPT5.pt
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Added: Products.CMFCore/trunk/Products/CMFCore/tests/fake_skins/fake_skin/testPT5.pt.metadata
===================================================================
--- Products.CMFCore/trunk/Products/CMFCore/tests/fake_skins/fake_skin/testPT5.pt.metadata (rev 0)
+++ Products.CMFCore/trunk/Products/CMFCore/tests/fake_skins/fake_skin/testPT5.pt.metadata 2009-05-24 08:49:42 UTC (rev 100323)
@@ -0,0 +1,2 @@
+[default]
+content_type=text/html; charset=utf-16
Modified: Products.CMFCore/trunk/Products/CMFCore/tests/test_FSPageTemplate.py
===================================================================
--- Products.CMFCore/trunk/Products/CMFCore/tests/test_FSPageTemplate.py 2009-05-24 08:36:22 UTC (rev 100322)
+++ Products.CMFCore/trunk/Products/CMFCore/tests/test_FSPageTemplate.py 2009-05-24 08:49:42 UTC (rev 100323)
@@ -96,6 +96,7 @@
self.assertEqual( self.RESPONSE.getHeader('content-type')
, 'text/xml'
)
+
def test_CharsetFromFSMetadata(self):
# testPT3 is an UTF-16 encoded file (see its .metadatafile)
# is respected
@@ -114,6 +115,13 @@
self.failUnless(u'123üöäß' in data)
self.assertEqual(script.content_type, 'text/html')
+ def test_CharsetFromContentTypeMetadata(self):
+ script = self._makeOne('testPT5', 'testPT5.pt')
+ script = script.__of__(self.root)
+ data = script.read()
+ self.failUnless(u'123üöäß' in data)
+ self.assertEqual(script.content_type, 'text/html; charset=utf-16')
+
def test_BadCall( self ):
script = self._makeOne( 'testPTbad', 'testPTbad.pt' )
script = script.__of__(self.root)
More information about the Checkins
mailing list