[CMF-checkins] SVN: CMF/trunk/C Autodetect the encoding of UTF-8
text files with a suitable
Florent Guillaume
fg at nuxeo.com
Fri Jan 13 12:35:48 EST 2006
Log message for revision 41308:
Autodetect the encoding of UTF-8 text files with a suitable
Byte Order Mark (0xEF 0xBB 0xBF).
Changed:
U CMF/trunk/CHANGES.txt
U CMF/trunk/CMFCore/FSFile.py
A CMF/trunk/CMFCore/tests/fake_skins/fake_skin/testUtf8.js
U CMF/trunk/CMFCore/tests/test_FSFile.py
-=-
Modified: CMF/trunk/CHANGES.txt
===================================================================
--- CMF/trunk/CHANGES.txt 2006-01-13 17:08:27 UTC (rev 41307)
+++ CMF/trunk/CHANGES.txt 2006-01-13 17:35:47 UTC (rev 41308)
@@ -2,6 +2,9 @@
New Features
+ - FSFile: autodetect the encoding of UTF-8 text files with a
+ suitable Byte Order Mark (0xEF 0xBB 0xBF).
+
- CMFDefault.MetadataTool: support arbitrary additional schemas.
The "stock" DublinCore-specific API is still accessible, implemented
via a special "DCMI" subobject.
Modified: CMF/trunk/CMFCore/FSFile.py
===================================================================
--- CMF/trunk/CMFCore/FSFile.py 2006-01-13 17:08:27 UTC (rev 41307)
+++ CMF/trunk/CMFCore/FSFile.py 2006-01-13 17:35:47 UTC (rev 41308)
@@ -15,6 +15,7 @@
$Id$
"""
+import codecs
import Globals
from AccessControl import ClassSecurityInfo
from DateTime import DateTime
@@ -79,6 +80,11 @@
body = body.data
content_type, enc=guess_content_type(
getattr(file, 'filename',id), body, content_type)
+ if (enc is None
+ and (content_type.startswith('text/') or
+ content_type.startswith('application/'))
+ and body.startswith(codecs.BOM_UTF8)):
+ content_type += '; charset=utf-8'
return content_type
Added: CMF/trunk/CMFCore/tests/fake_skins/fake_skin/testUtf8.js
===================================================================
--- CMF/trunk/CMFCore/tests/fake_skins/fake_skin/testUtf8.js 2006-01-13 17:08:27 UTC (rev 41307)
+++ CMF/trunk/CMFCore/tests/fake_skins/fake_skin/testUtf8.js 2006-01-13 17:35:47 UTC (rev 41308)
@@ -0,0 +1,4 @@
+/*
+ * This file starts with an UTF-8 BOM: 0xEF 0xBB 0xBF
+*/
+var drink = "café";
Modified: CMF/trunk/CMFCore/tests/test_FSFile.py
===================================================================
--- CMF/trunk/CMFCore/tests/test_FSFile.py 2006-01-13 17:08:27 UTC (rev 41307)
+++ CMF/trunk/CMFCore/tests/test_FSFile.py 2006-01-13 17:35:47 UTC (rev 41308)
@@ -178,7 +178,15 @@
self.assertEqual( self.RESPONSE.getHeader( 'Last-Modified'.lower() )
, rfc1123_date( mod_time ) )
+ def test_utf8charset_detection( self ):
+ file_name = 'testUtf8.js'
+ file = self._makeOne(file_name, file_name)
+ file = file.__of__(self.root)
+ data = file.index_html(self.REQUEST, self.RESPONSE)
+ self.assertEqual(self.RESPONSE.getHeader('content-type'),
+ 'application/x-javascript; charset=utf-8')
+
def test_suite():
return unittest.TestSuite((
unittest.makeSuite(FSFileTests),
More information about the CMF-checkins
mailing list