[Zope3-checkins]
SVN: Zope3/branches/fdrake-anguenot_better_xml_support_for_pt/src/zope/tal/
Simple HTML Fragements encoding sniffer
Julien Anguenot
ja at nuxeo.com
Sat Oct 15 01:03:49 EDT 2005
Log message for revision 39460:
Simple HTML Fragements encoding sniffer
Changed:
A Zope3/branches/fdrake-anguenot_better_xml_support_for_pt/src/zope/tal/encodingsniffer.py
A Zope3/branches/fdrake-anguenot_better_xml_support_for_pt/src/zope/tal/tests/test_encodingsniffer.py
-=-
Added: Zope3/branches/fdrake-anguenot_better_xml_support_for_pt/src/zope/tal/encodingsniffer.py
===================================================================
--- Zope3/branches/fdrake-anguenot_better_xml_support_for_pt/src/zope/tal/encodingsniffer.py 2005-10-15 03:42:02 UTC (rev 39459)
+++ Zope3/branches/fdrake-anguenot_better_xml_support_for_pt/src/zope/tal/encodingsniffer.py 2005-10-15 05:03:48 UTC (rev 39460)
@@ -0,0 +1,58 @@
+##############################################################################
+#
+# Copyright (c) 2005 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""HTML fragement encoding sniffer
+
+$Id$
+"""
+
+from HTMLParser import HTMLParser
+from HTMLParser import HTMLParseError
+
+class EncodingFound(Exception):
+ # This exception is throwned by the parser when a meta tag with
+ # charset is found. The value attribute holds the charset.
+ def __init__(self, value):
+ self.value = value
+
+class EncodingParser(HTMLParser):
+ """Encoding Parser for HTML fragments
+ """
+ def handle_starttag(self, tag, attrs):
+ # This method is called to handle the start of a tag If it founds
+ # a meta tag with charst information it raises an EncodingFound
+ # exception holding the charset
+ if tag != 'meta':
+ return
+ for attr, value in attrs:
+ if (attr == 'content' and
+ 'charset' in value):
+ try:
+ charset = value.split(';')[1].split('=')[1]
+ except IndexError:
+ pass
+ else:
+ raise EncodingFound(charset)
+
+def sniff_encoding(data):
+ """Try to sniff the encoding of an HTML fragment by checking the
+ meta tag and the charset information
+ """
+ parser = EncodingParser()
+ try:
+ parser.feed(data)
+ except EncodingFound, e:
+ return e.value.strip()
+ except HTMLParseError:
+ pass
+ return ''
Property changes on: Zope3/branches/fdrake-anguenot_better_xml_support_for_pt/src/zope/tal/encodingsniffer.py
___________________________________________________________________
Name: svn:keywords
+ Id
Added: Zope3/branches/fdrake-anguenot_better_xml_support_for_pt/src/zope/tal/tests/test_encodingsniffer.py
===================================================================
--- Zope3/branches/fdrake-anguenot_better_xml_support_for_pt/src/zope/tal/tests/test_encodingsniffer.py 2005-10-15 03:42:02 UTC (rev 39459)
+++ Zope3/branches/fdrake-anguenot_better_xml_support_for_pt/src/zope/tal/tests/test_encodingsniffer.py 2005-10-15 05:03:48 UTC (rev 39460)
@@ -0,0 +1,48 @@
+##############################################################################
+#
+# Copyright (c) 2005 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""Test the HTML fragement encoding sniffer
+
+$Id$
+"""
+
+import sys
+import unittest
+
+from zope.tal.encodingsniffer import sniff_encoding
+
+class EncodingSnifferTestCase(unittest.TestCase):
+
+ def test_ascii_html_fragment_no_encoding(self):
+ str_ = """<p tal:content="python:u'déjà-vu'">para</p>"""
+ self.failIf(sniff_encoding(str_))
+
+ def test_ascii_html_fragment_with_encoding(self):
+ str_ = """<meta http-equiv='Content-type' content='text/html; charset=ISO-8859-15'><p tal:content="python:u'déjà-vu'">para</p>"""
+ self.assertEqual(sniff_encoding(str_), 'ISO-8859-15')
+
+ def test_ascii_html_fragment_with_encoding_and_whispace(self):
+ str_ = """<meta http-equiv='Content-type' content='text/html; charset= ISO-8859-15 '><p tal:content="python:u'déjà-vu'">para</p>"""
+ self.assertEqual(sniff_encoding(str_), 'ISO-8859-15')
+
+ def test_ascii_html_fragment_with_encoding_and_ligne_break(self):
+ str_ = """<meta http-equiv='Content-type' content='text/html;\n charset= ISO-8859-15 '><p tal:content="python:u'déjà-vu'">para</p>"""
+ self.assertEqual(sniff_encoding(str_), 'ISO-8859-15')
+
+def test_suite():
+ return unittest.TestSuite((
+ unittest.makeSuite(EncodingSnifferTestCase),
+ ))
+
+if __name__=='__main__':
+ main(defaultTest='test_suite')
Property changes on: Zope3/branches/fdrake-anguenot_better_xml_support_for_pt/src/zope/tal/tests/test_encodingsniffer.py
___________________________________________________________________
Name: svn:keywords
+ Id
More information about the Zope3-Checkins
mailing list