[Zope-Checkins] CVS: Zope/lib/python/DocumentTemplate - ustr.py:1.2 DT_Util.py:1.86 DT_Var.py:1.52 cDocumentTemplate.c:1.44 html_quote.py:1.2 pDocumentTemplate.py:1.35

Toby Dickenson tdickenson@geminidataloggers.com
Wed, 27 Mar 2002 05:14:33 -0500


Update of /cvs-repository/Zope/lib/python/DocumentTemplate
In directory cvs.zope.org:/tmp/cvs-serv10497/lib/python/DocumentTemplate

Modified Files:
	DT_Util.py DT_Var.py cDocumentTemplate.c html_quote.py 
	pDocumentTemplate.py 
Added Files:
	ustr.py 
Log Message:
merged toby-stiff-cache-branch and toby-unicode-branch

=== Zope/lib/python/DocumentTemplate/ustr.py 1.1 => 1.2 ===
+
+nasty_exception_str = Exception.__str__.im_func
+
+def ustr(v):
+    """convert an object to a plain string or unicode string
+    """
+    string_types = (StringType,UnicodeType)
+    if type(v) in string_types:
+        return v
+    else:
+        fn = getattr(v,'__str__',None)
+        if fn is not None:
+            # An object that wants to present its own string representation,
+            # but we dont know what type of string. We cant use any built-in
+            # function like str() or unicode() to retrieve it because
+            # they all constrain the type which potentially raises an exception.
+            # To avoid exceptions we have to call __str__ direct.
+            if getattr(fn,'im_func',None)==nasty_exception_str:
+                # Exception objects have been optimised into C, and their
+                # __str__ function fails when given a unicode object.
+                # Unfortunately this scenario is all too common when
+                # migrating to unicode, because of code which does:
+                # raise ValueError(something_I_wasnt_expecting_to_be_unicode)
+                return _exception_str(v)
+            else:
+                # Trust the object to do this right
+                v = fn()
+                if type(v) in string_types:
+                    return v
+                else:
+                    raise ValueError('__str__ returned wrong type')
+        # Drop through for non-instance types, and instances that
+        # do not define a special __str__
+        return str(v)
+
+
+def _exception_str(self):
+    if not self.args:
+        return ''
+    elif len(self.args) == 1:
+        return ustr(self.args[0])
+    else:
+        return str(self.args)


=== Zope/lib/python/DocumentTemplate/DT_Util.py 1.85 => 1.86 ===
 
 import re, os
-from html_quote import html_quote # for import by other modules, dont remove!
+from html_quote import html_quote, ustr # for import by other modules, dont remove!
 from RestrictedPython.Guards import safe_builtins
 from RestrictedPython.Utilities import utility_builtins
 from RestrictedPython.Eval import RestrictionCapableEval
@@ -41,10 +41,9 @@
 try:
     import ExtensionClass
     from cDocumentTemplate import InstanceDict, TemplateDict, \
-         render_blocks, safe_callable
+         render_blocks, safe_callable, join_unicode
 except: from pDocumentTemplate import InstanceDict, TemplateDict, \
-        render_blocks, safe_callable
-
+        render_blocks, safe_callable, join_unicode
 
 functype = type(int_param)
 class NotBindable:


=== Zope/lib/python/DocumentTemplate/DT_Var.py 1.51 => 1.52 ===
 __version__='$Revision$'[11:-2]
 
-from DT_Util import parse_params, name_param, str
+from DT_Util import parse_params, name_param, str, ustr
 import os, string, re,  sys
 from urllib import quote, quote_plus
 from cgi import escape
@@ -253,7 +253,7 @@
 
         # finally, pump it through the actual string format...
         fmt=self.fmt
-        if fmt=='s': val=str(val)
+        if fmt=='s': val=ustr(val)
         else: val = ('%'+self.fmt) % (val,)
 
         # next, look for upper, lower, etc


=== Zope/lib/python/DocumentTemplate/cDocumentTemplate.c 1.43 => 1.44 ===
 static PyObject *py_Unauthorized_fmt, *py_guarded_getattr;
 static PyObject *py__push, *py__pop, *py_aq_base, *py_renderNS;
-static PyObject *py___class__, *html_quote;
+static PyObject *py___class__, *html_quote, *ustr;
 
 /* ----------------------------------------------------- */
 
@@ -42,6 +42,8 @@
 
 staticforward PyExtensionClass InstanceDictType;
 
+staticforward PyObject *_join_unicode(PyObject *prejoin);
+
 static PyObject *
 InstanceDict___init__(InstanceDictobject *self, PyObject *args)
 {
@@ -685,22 +687,43 @@
 	      if (PyString_Check(t)) t=PyObject_GetItem(md, t);
 	      else t=PyObject_CallObject(t, mda);
 
-              if (t == NULL || (! PyString_Check(t)))
+              if (t == NULL) return -1;
+
+              if (! ( PyString_Check(t) || PyUnicode_Check(t) ) )
                 {
-                  if (t) ASSIGN(t, PyObject_Str(t));
+                  ASSIGN(t, PyObject_CallFunction(ustr, "O", t));
                   UNLESS(t) return -1;
                 }
 
-              if (PyString_Check(t) 
-                  && PyTuple_GET_SIZE(block) == 3) /* html_quote */
+              if (PyTuple_GET_SIZE(block) == 3) /* html_quote */
                 {
-                  if (strchr(PyString_AS_STRING(t), '&')
-                      || strchr(PyString_AS_STRING(t), '<')
-                      || strchr(PyString_AS_STRING(t), '>')
-                      || strchr(PyString_AS_STRING(t), '"')
-                      )
-                    ASSIGN(t, PyObject_CallFunction(html_quote, "O", t));
-		    if (t == NULL) return -1;
+                  int skip_html_quote;
+                  if (PyString_Check(t))
+                    {
+                      if (strchr(PyString_AS_STRING(t), '&') ||
+                          strchr(PyString_AS_STRING(t), '<') ||
+                          strchr(PyString_AS_STRING(t), '>') ||
+                          strchr(PyString_AS_STRING(t), '"')     )
+                        {
+                          /* string includes html problem characters, so
+                             we cant skip the quoting process */
+                          skip_html_quote = 0;
+                        }
+                      else
+                        {
+                          skip_html_quote = 1;
+                        }
+                    }
+                  else
+                    {
+                      /* never skip the quoting for unicode strings */
+                      skip_html_quote = 0;
+                    }
+                  if (!skip_html_quote)
+                    {
+                      ASSIGN(t, PyObject_CallFunction(html_quote, "O", t));
+                      if (t == NULL) return -1;
+                    }
                 }
                   
               block = t;
@@ -787,7 +810,7 @@
               return -1;
             }
         }
-      else if (PyString_Check(block))
+      else if (PyString_Check(block) || PyUnicode_Check(block))
 	{
 	  Py_INCREF(block);
 	}
@@ -830,7 +853,7 @@
   else if (l==1)
     ASSIGN(rendered, PySequence_GetItem(rendered,0));
   else
-    ASSIGN(rendered, PyObject_CallFunction(join,"OO",rendered,py_));
+    ASSIGN(rendered, _join_unicode(rendered));
 
   return rendered;
 
@@ -852,11 +875,65 @@
     return PyInt_FromLong(1);
   else
     return PyInt_FromLong(0);
-}  
+}
+
+static PyObject *
+_join_unicode(PyObject *prejoin)
+{
+    PyObject *joined;
+    joined = PyObject_CallFunction(join,"OO",prejoin,py_);
+    if(!joined && PyErr_ExceptionMatches(PyExc_UnicodeError))
+    {
+        int i,l;
+        PyObject *list;
+        PyErr_Clear();
+        list = PySequence_List(prejoin);
+        if(!list)
+        {
+            return NULL;
+        }
+        l = PyList_Size(list);
+        for(i=0;i<l;++i)
+        {
+            PyObject *item = PyList_GetItem(list,i);
+            if(PyString_Check(item))
+            {
+                PyObject *unicode = PyUnicode_DecodeLatin1(PyString_AsString(item),PyString_Size(item),NULL);
+                if(unicode)
+                {
+                    PyList_SetItem(list,i,unicode);
+                }
+                else
+                {
+                    Py_DECREF(list);
+                    return NULL;
+                }
+           }
+       }
+       joined = PyObject_CallFunction(join,"OO",list,py_);
+       Py_DECREF(list);
+    }
+    return joined;
+}
+
+static PyObject *
+join_unicode(PyObject *self, PyObject *args)
+{
+  PyObject *ob;
+
+  UNLESS(PyArg_ParseTuple(args,"O", &ob)) return NULL;
+  return _join_unicode(ob);
+}
+
 
 static struct PyMethodDef Module_Level__methods[] = {
   {"render_blocks", (PyCFunction)render_blocks,	METH_VARARGS,
    ""},
+  {"join_unicode", (PyCFunction)join_unicode,	METH_VARARGS,
+   "join a list of plain strings into a single plain string,"
+   "a list of unicode strings into a single unicode strings,"
+   "or a list containing a mix into a single unicode string with"
+   "the plain strings converted from latin-1"},
   {"safe_callable", (PyCFunction)safe_callable,	METH_VARARGS,
    "callable() with a workaround for a problem with ExtensionClasses\n"
    "and __call__()."},
@@ -871,6 +948,8 @@
   DictInstanceType.ob_type=&PyType_Type;
 
   UNLESS (html_quote = PyImport_ImportModule("html_quote")) return;
+  ASSIGN(ustr, PyObject_GetAttrString(html_quote, "ustr"));
+  UNLESS (ustr) return;
   ASSIGN(html_quote, PyObject_GetAttrString(html_quote, "html_quote"));
   UNLESS (html_quote) return;
 


=== Zope/lib/python/DocumentTemplate/html_quote.py 1.1 => 1.2 ===
 
 from cgi import escape
+from ustr import ustr
 
 def html_quote(v, name='(Unknown name)', md={}):
-    return escape(str(v), 1)
+    return escape(ustr(v), 1)
 


=== Zope/lib/python/DocumentTemplate/pDocumentTemplate.py 1.34 => 1.35 ===
 
 StringType=type('')
+UnicodeType=type(u'')
 TupleType=type(())
 
 
@@ -187,7 +188,7 @@
                 section=section[0]
                 if type(section) is StringType: section=md[section]
                 else: section=section(md)
-                section=str(section)
+                section=ustr(section)
             else:
                 # if
                 cache={}
@@ -220,7 +221,7 @@
 
                 finally: md._pop()
 
-        elif type(section) is not StringType:
+        elif type(section) is not StringType and type(section) is not UnicodeType:
             section=section(md)
 
         if section: rendered.append(section)
@@ -228,5 +229,21 @@
     l=len(rendered)
     if l==0: return ''
     elif l==1: return rendered[0]
-    return ''.join(rendered)
-    return rendered
+    return join_unicode(rendered)
+
+def join_unicode(rendered):
+    """join a list of plain strings into a single plain string,
+    a list of unicode strings into a single unicode strings,
+    or a list containing a mix into a single unicode string with
+    the plain strings converted from latin-1
+    """
+    try:
+        return ''.join(rendered)
+    except UnicodeError:
+        # A mix of unicode string and non-ascii plain strings.
+        # Fix up the list, treating normal strings as latin-1
+        rendered = list(rendered)
+        for i in range(len(rendered)):
+            if type(rendered[i]) is StringType:
+                rendered[i] = unicode(rendered[i],'latin-1')
+        return u''.join(rendered)