[Zope-Checkins] CVS: Zope3/lib/python/Zope/Publisher/Browser - BrowserRequest.py:1.6 BrowserResponse.py:1.3

Stephan Richter srichter@cbu.edu
Fri, 14 Jun 2002 12:50:52 -0400


Update of /cvs-repository/Zope3/lib/python/Zope/Publisher/Browser
In directory cvs.zope.org:/tmp/cvs-serv18258/lib/python/Zope/Publisher/Browser

Modified Files:
	BrowserRequest.py BrowserResponse.py 
Log Message:
Finished Zope 3 Unicode support. Zope 3 should now be able to handle all 
unicode formats. Note that ebcoding and decoding unicode strings is based
on the parameters we get from the browser in HTTP_ACCEPT_CHARSET.

All "text strings" (strings that represent human language text) internally 
in Zope 3 should be unicode strings from now on! I have not checked all of 
Zope 3, so if you see a non-unicode text string somewhere, please convert
it simply by putting 'u' before the string literal.

Note that binary data, such as images are not to be encoded. 

The encoding happens on the HTTPRequest/HTTPResponse abstraction level. 
That means that currenty FTP does not profit from this new code; however
FTP is always data anyhow.


=== Zope3/lib/python/Zope/Publisher/Browser/BrowserRequest.py 1.5 => 1.6 ===
 from cgi_names import isCGI_NAME, hide_key
 from types import StringType
+
+from Zope.ComponentArchitecture import getAdapter
+
 from Zope.Publisher.Converters import get_converter
 from Zope.Publisher.HTTP.HTTPRequest import HTTPRequest
-from IBrowserPresentation import IBrowserPresentation
+from Zope.I18n.IUserPreferredCharsets import IUserPreferredCharsets
 
+from IBrowserPresentation import IBrowserPresentation
 from IBrowserRequest import IBrowserRequest
 from IBrowserPublication import IBrowserPublication
 from IBrowserApplicationRequest import IBrowserApplicationRequest
 from BrowserResponse import BrowserResponse
 
-# Default Encoding
-ENCODING = 'UTF-8'
-
 # Flas Constants
 SEQUENCE = 1
 DEFAULT = 2
@@ -55,6 +56,7 @@
 
     __slots__ = (
         'form',   # Form data
+        'charsets', # helper attribute
         )
 
     use_redirect = 0 # Set this to 1 in a subclass to redirect GET
@@ -68,6 +70,7 @@
 
     def __init__(self, body_instream, outstream, environ, response=None):
         self.form = {}
+        self.charsets = None
         super(BrowserRequest, self).__init__(
             body_instream, outstream, environ, response)
 
@@ -76,7 +79,18 @@
         # Should be overridden by subclasses
         return BrowserResponse(outstream)
 
-    
+    def _decode(self, text):
+        """Try to decode the text using one of the available charsets.""" 
+        if self.charsets is None:
+            envadaptor = getAdapter(self, IUserPreferredCharsets)
+            self.charsets = envadaptor.getPreferredCharsets()
+        for charset in self.charsets:
+            try:
+                text = unicode(text, charset)
+                break
+            except UnicodeError:
+                pass
+        return text
 
     ######################################
     # from: Zope.Publisher.IPublisherRequest.IPublisherRequest
@@ -188,9 +202,9 @@
                     continue
                 
                 # Make it unicode
-                key = unicode(key, ENCODING)
+                key = self._decode(key)
                 if type(item) == StringType:
-                    item = unicode(item, ENCODING)
+                    item = self._decode(item)
 
                 if flags:
 
@@ -588,4 +602,3 @@
         return ', '.join(
             map(lambda item: "%s: %s" % (item[0], repr(item[1])), L1))
     
-


=== Zope3/lib/python/Zope/Publisher/Browser/BrowserResponse.py 1.2 => 1.3 ===
 
 import sys, re
-from types import StringType, ClassType
+from types import StringTypes, UnicodeType, ClassType
 from cgi import escape
 
 from Zope.Publisher.HTTP.HTTPResponse import HTTPResponse
@@ -43,20 +43,16 @@
         updates the "content-length" return header and sets the status to
         200 if it has not already been set.
         """
-        body = str(body)
+
+        if type(body) not in StringTypes:
+            body = unicode(body)
 
         if not ('content-type' in self._headers):
             c = (self.__isHTML(body) and 'text/html' or 'text/plain')
+            if self._charset is not None:
+                c += ';charset='+self._charset
             self.setHeader('content-type', c)
 
-        content_type = self._headers['content-type']
-        if is_text_html(content_type):
-            # Some browsers interpret certain characters in Latin 1 as html
-            # special characters. These cannot be removed by html_quote,
-            # because this is not the case for all encodings.
-            body = body.replace('\213', '<')
-            body = body.replace('\233', '>')
-
         body = self.__insertBase(body)
         self._body = body
         self._updateContentLength()
@@ -103,12 +99,12 @@
         self._base = base
 
 
-
-latin1_alias_match = re.compile(
-    r'text/html(\s*;\s*charset=((latin)|(latin[-_]?1)|'
-    r'(cp1252)|(cp819)|(csISOLatin1)|(IBM819)|(iso-ir-100)|'
-    r'(iso[-_]8859[-_]1(:1987)?)))?$',re.I).match
+#latin1_alias_match = re.compile(
+#    r'text/html(\s*;\s*charset=((latin)|(latin[-_]?1)|'
+#    r'(cp1252)|(cp819)|(csISOLatin1)|(IBM819)|(iso-ir-100)|'
+#    r'(iso[-_]8859[-_]1(:1987)?)))?$',re.I).match
 
 def is_text_html(content_type):
-    return (content_type == 'text/html' or
-            latin1_alias_match(content_type) is not None)
+    return content_type.startswith('text/html') # or
+#    return (content_type == 'text/html' or
+#            latin1_alias_match(content_type) is not None)