[Zope-Checkins] CVS: Zope/lib/python/ZPublisher - TaintedString.py:1.1 Converters.py:1.17 HTTPRequest.py:1.75

Martijn Pieters mj@zope.com
Thu, 1 Aug 2002 12:00:42 -0400


Update of /cvs-repository/Zope/lib/python/ZPublisher
In directory cvs.zope.org:/tmp/cvs-serv9325/lib/python/ZPublisher

Modified Files:
	Converters.py HTTPRequest.py 
Added Files:
	TaintedString.py 
Log Message:
Big change

- Make DTML automatically html quote data indirectly taken from REQUEST
  which contain a '<'. Make sure (almost) all string operation preserve the
  taint on this data.

- Fix exceptions that use REQUEST data; quote the data.

- Don't let form and cookie values mask the REQUEST computed values such as
  URL0 and BASE1.


=== Added File Zope/lib/python/ZPublisher/TaintedString.py ===
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
# 
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# 
##############################################################################

__version__='$Revision: 1.1 $'[11:-2]

from cgi import escape


# TaintedStrings hold potentially dangerous untrusted data; anything that could
# possibly hold HTML is considered dangerous. DTML code will use the quoted
# value of this tring, and raised exceptions in Zope will use the repr()
# conversion.
class TaintedString:
    def __init__(self, value):
        self._value = value

    def __str__(self):
        return self._value

    def __repr__(self):
        return repr(self.quoted())

    def __cmp__(self, o):
        return cmp(self._value, o)

    def __hash__(self):
        return hash(self._value)

    def __len__(self):
        return len(self._value)

    def __getitem__(self, index):
        v = self._value[index]
        if '<' in v:
            v = self.__class__(v)
        return v
            
    def __getslice__(self, i, j):
        i = max(i, 0)
        j = max(j, 0)
        v = self._value[i:j]
        if '<' in v:
            v = self.__class__(v)
        return v

    def __add__(self, o):
        return self.__class__(self._value + o)

    def __radd__(self, o):
        return self.__class__(o + self._value)

    def __mul__(self, o):
        return self.__class__(self._value * o)

    def __rmul__(self, o):
        return self.__class__(o * self._value)

    def __mod__(self, o):
        return self.__class__(self._value % o)

    def __int__(self):
        return int(self._value)

    def __float__(self):
        return float(self._value)

    def __long__(self):
        return long(self._value)

    def __getstate__(self):
        # If an object tries to store a TaintedString, it obviously wasn't aware
        # that it was playing with untrusted data. Complain acordingly.
        raise SystemError("A TaintedString cannot be pickled. Code that "
            "caused this TaintedString to be stored should be more careful "
            "with untrusted data from the REQUEST.")

    def __getattr__(self, a):
        # for string methods support other than those defined below
        return getattr(self._value, a)

    # Python 2.2 only.
    def decode(self, *args):
        return self.__class__(self._value.decode(*args))

    def encode(self, *args):
        return self.__class__(self._value.encode(*args))

    def expandtabs(self, *args):
        return self.__class__(self._value.expandtabs(*args))

    def replace(self, *args):
        v = self._value.replace(*args)
        if '<' in v:
            v = self.__class__(v)
        return v

    def split(self, *args):
        r = self._value.split(*args)
        return map(lambda v, c=self.__class__: '<' in v and c(v) or v, r)

    def splitlines(self, *args):
        r = self._value.splitlines(*args)
        return map(lambda v, c=self.__class__: '<' in v and c(v) or v, r)

    def translate(self, *args):
        v = self._value.translate(*args)
        if '<' in v:
            v = self.__class__(v)
        return v

    def quoted(self):
        return escape(self._value, 1)

    # As called by cDocumentTemplate
    __untaint__ = quoted


def createSimpleWrapper(func):
    return lambda s, f=func: s.__class__(getattr(s._value, f)())

def createOneArgWrapper(func):
    return lambda s, a, f=func: s.__class__(getattr(s._value, f)(a))

simpleWrappedMethods = \
    "capitalize lower lstrip rstrip strip swapcase title upper".split()

oneArgWrappedMethods = "center join ljust rjust".split()

for f in simpleWrappedMethods:
    setattr(TaintedString, f, createSimpleWrapper(f))

for f in oneArgWrappedMethods:
    setattr(TaintedString, f, createOneArgWrapper(f))



=== Zope/lib/python/ZPublisher/Converters.py 1.16 => 1.17 ===
 
 import re
 from types import ListType, TupleType, UnicodeType
+from cgi import escape
 
 def field2string(v):
     if hasattr(v,'read'): return v.read()
@@ -53,7 +54,7 @@
         try: return int(v)
         except ValueError:
             raise ValueError, (
-                "An integer was expected in the value '%s'" % v
+                "An integer was expected in the value '%s'" % escape(v)
                 )
     raise ValueError, 'Empty entry when <strong>integer</strong> expected'
 
@@ -65,7 +66,8 @@
         try: return float(v)
         except ValueError:
             raise ValueError, (
-                "A floating-point number was expected in the value '%s'" % v
+                "A floating-point number was expected in the value '%s'" % 
+                escape(v)
                 )
     raise ValueError, (
         'Empty entry when <strong>floating-point number</strong> expected')
@@ -81,7 +83,7 @@
         try: return long(v)
         except ValueError:
             raise ValueError, (
-                "A long integer was expected in the value '%s'" % v
+                "A long integer was expected in the value '%s'" % escape(v)
                 )
     raise ValueError, 'Empty entry when <strong>integer</strong> expected'
 
@@ -100,7 +102,11 @@
 def field2date(v):
     from DateTime import DateTime
     v = field2string(v)
-    return DateTime(v)
+    try:
+        v = DateTime(v)
+    except DateTime.SyntaxError, e:
+        raise DateTime.SyntaxError, escape(e)
+    return v
 
 def field2boolean(v):
     return not not v


=== Zope/lib/python/ZPublisher/HTTPRequest.py 1.74 => 1.75 === (541/641 lines abridged)
 from HTTPResponse import HTTPResponse
 from cgi import FieldStorage, escape
 from urllib import quote, unquote, splittype, splitport
+from copy import deepcopy
 from Converters import get_converter
+from TaintedString import TaintedString
 from maybe_lock import allocate_lock
 xmlrpc=None # Placeholder for module that we'll import if we have to.
 
@@ -241,6 +243,7 @@
         self.response=response
         other=self.other={'RESPONSE': response}
         self.form={}
+        self.taintedform={}
         self.steps=[]
         self._steps=[]
         self._lazies={}
@@ -306,13 +309,22 @@
         # vars with the same name - they are more like default values
         # for names not otherwise specified in the form.
         cookies={}
+        taintedcookies={}
         k=get_env('HTTP_COOKIE','')
         if k:
             parse_cookie(k, cookies)
-            for k,item in cookies.items():
-                if not other.has_key(k):
-                    other[k]=item
+            for k, v in cookies.items():
+                istainted = 0
+                if '<' in k:
+                    k = TaintedString(k)
+                    istainted = 1
+                if '<' in v:
+                    v = TaintedString(v)
+                    istainted = 1
+                if istainted:
+                    taintedcookies[k] = v
         self.cookies=cookies
+        self.taintedcookies = taintedcookies
     
     def processInputs(
         self,
@@ -343,6 +355,7 @@
 
         form=self.form
         other=self.other
+        taintedform=self.taintedform
 
         meth=None

[-=- -=- -=- 541 lines omitted -=- -=- -=-]

+        if returnTaints: keys.update(self.taintedform)
 
         keys=keys.keys()
         keys.sort()
@@ -966,6 +1302,32 @@
                     base64.decodestring(auth.split()[-1]).split(':')
                 return name, password
 
+    def taintWrapper(self):
+        return TaintRequestWrapper(self)
+
+
+class TaintRequestWrapper:
+    def __init__(self, req):
+        self._req = req
+
+    def __getattr__(self, key):
+        if key in ('get', '__getitem__', '__getattr__', 'has_key', 'keys'):
+            return TaintMethodWrapper(getattr(self._req, key))
+        if not key in self._req.keys():
+            item = getattr(self._req, key, _marker)
+            if item is not _marker:
+                return item
+        return self._req.__getattr__(key, returnTaints=1)
+
+
+class TaintMethodWrapper:
+    def __init__(self, method):
+        self._method = method
+
+    def __call__(self, *args, **kw):
+        kw['returnTaints'] = 1
+        return self._method(*args, **kw)
+
 
 def has_codec(x):
     try:
@@ -1121,6 +1483,12 @@
         L1.sort()
         return '{%s}' % ', '.join(
             map(lambda item: "'%s': %s" % (item[0], repr(item[1])), L1))
+
+    def __cmp__(self, other):
+        return (cmp(type(self), type(other)) or
+                cmp(self.__class__, other.__class__) or
+                cmp(self.__dict__.items(), other.__dict__.items()))
+
 
 # Flags
 SEQUENCE=1