[Zope3-checkins] SVN: Zope3/trunk/src/zope/app/file/browser/ Fix for issue #302: it is now possible to edit the content of File objects

Marius Gedminas marius at pov.lt
Fri Jul 29 16:50:12 EDT 2005


Log message for revision 37566:
  Fix for issue #302: it is now possible to edit the content of File objects
  that contain non-ASCII text in a web form, provided that the content type
  includes an appropriate charset declaration.
  
  

Changed:
  U   Zope3/trunk/src/zope/app/file/browser/configure.zcml
  U   Zope3/trunk/src/zope/app/file/browser/file.py
  U   Zope3/trunk/src/zope/app/file/browser/file.txt
  U   Zope3/trunk/src/zope/app/file/browser/tests/test_file.py

-=-
Modified: Zope3/trunk/src/zope/app/file/browser/configure.zcml
===================================================================
--- Zope3/trunk/src/zope/app/file/browser/configure.zcml	2005-07-29 20:46:23 UTC (rev 37565)
+++ Zope3/trunk/src/zope/app/file/browser/configure.zcml	2005-07-29 20:50:12 UTC (rev 37566)
@@ -6,19 +6,16 @@
 
   <!-- directives for File -->
   
-  <browser:editform
+  <browser:form
       name="edit.html"
-      schema="zope.app.file.interfaces.IFile"
+      for="zope.app.file.interfaces.IFile"
+      schema="zope.app.file.browser.file.IFileEditForm"
       label="Change a file"
-      permission="zope.ManageContent" 
+      permission="zope.ManageContent"
+      class="zope.app.file.browser.file.FileEdit"
       >
+  </browser:form>
 
-    <widget
-        field="data"
-        class="zope.app.form.browser.BytesAreaWidget" />
-
-  </browser:editform>
-
   <browser:menuItem
       menu="zmi_views" title="Edit"
       for="zope.app.file.interfaces.IFile"

Modified: Zope3/trunk/src/zope/app/file/browser/file.py
===================================================================
--- Zope3/trunk/src/zope/app/file/browser/file.py	2005-07-29 20:46:23 UTC (rev 37565)
+++ Zope3/trunk/src/zope/app/file/browser/file.py	2005-07-29 20:50:12 UTC (rev 37566)
@@ -20,11 +20,13 @@
 
 import zope.event
 
+from zope.schema import Text
 from zope.app import content_types
 from zope.app.event import objectevent
 from zope.app.file.file import File
 from zope.app.file.interfaces import IFile
 from zope.app.i18n import ZopeMessageIDFactory as _
+from zope.app.exception.interfaces import UserError
 
 __docformat__ = 'restructuredtext'
 
@@ -218,3 +220,177 @@
         status = _("Updated on ${date_time}")
         status.mapping = {'date_time': formatter.format(datetime.utcnow())}
         return status
+
+
+class IFileEditForm(IFile):
+    """Schema for the File edit form.
+
+    Replaces the Bytes `data` field with a Text field.
+    """
+
+    data = Text(
+        title=_(u'Data'),
+        description=_(u'The actual content of the object.'),
+        default=u'',
+        missing_value=u'',
+        required=False,
+        )
+
+
+class UnknownCharset(Exception):
+    """Unknown character set."""
+
+class CharsetTooWeak(Exception):
+    """Character set cannot encode all characters in text."""
+
+
+class FileEdit(object):
+    r"""File edit form mixin.
+
+    Lets the user edit a text file directly via a browser form.
+
+    Converts between Unicode strings used in browser forms and 8-bit strings
+    stored internally.
+
+        >>> from zope.app.publisher.browser import BrowserView
+        >>> from zope.publisher.browser import TestRequest
+        >>> class FileEditView(FileEdit, BrowserView): pass
+        >>> view = FileEditView(File(), TestRequest())
+        >>> view.getData()
+        {'data': u'', 'contentType': ''}
+
+        >>> view.setData({'contentType': 'text/plain; charset=ISO-8859-13',
+        ...               'data': u'text \u0105'})
+        u'Updated on ${date_time}'
+
+        >>> view.context.contentType
+        'text/plain; charset=ISO-8859-13'
+        >>> view.context.data
+        'text \xe0'
+
+        >>> view.getData()['data']
+        u'text \u0105'
+
+    You will get an error if you try to specify a charset that cannot encode
+    all the characters
+
+        >>> view.setData({'contentType': 'text/xml; charset=ISO-8859-1',
+        ...               'data': u'text \u0105'})
+        Traceback (most recent call last):
+          ...
+        CharsetTooWeak: ISO-8859-1
+
+    You will get a different error if you try to specify an invalid charset
+
+        >>> view.setData({'contentType': 'text/xml; charset=UNKNOWN',
+        ...               'data': u'text \u0105'})
+        Traceback (most recent call last):
+          ...
+        UnknownCharset: UNKNOWN
+
+    The update method catches those errors and replaces them with error
+    messages
+
+        >>> from zope.i18n import translate
+        >>> class FakeFormView(BrowserView):
+        ...     def update(self):
+        ...         raise CharsetTooWeak('ASCII')
+        >>> class FileEditView(FileEdit, FakeFormView): pass
+        >>> view = FileEditView(File(), TestRequest())
+        >>> translate(view.update())
+        u'The character set you specified (ASCII) cannot encode all characters in text.'
+        >>> translate(view.update_status)
+        u'The character set you specified (ASCII) cannot encode all characters in text.'
+
+        >>> class FakeFormView(BrowserView):
+        ...     def update(self):
+        ...         raise UnknownCharset('UNKNOWN')
+        >>> class FileEditView(FileEdit, FakeFormView): pass
+        >>> view = FileEditView(File(), TestRequest())
+        >>> translate(view.update())
+        u'The character set you specified (UNKNOWN) is not supported.'
+        >>> translate(view.update_status)
+        u'The character set you specified (UNKNOWN) is not supported.'
+
+    Speaking about errors, if you trick the system and upload a file with
+    incorrect charset designation, you will get a UserError when you visit the
+    view:
+
+        >>> view.context.contentType = 'text/plain; charset=UNKNOWN'
+        >>> view.context.data = '\xff'
+        >>> view.getData()
+        Traceback (most recent call last):
+          ...
+        UserError: The character set specified in the content type ($charset) is not supported.
+
+        >>> view.context.contentType = 'text/plain; charset=UTF-8'
+        >>> view.context.data = '\xff'
+        >>> view.getData()
+        Traceback (most recent call last):
+          ...
+        UserError: The character set specified in the content type ($charset) does not match file content.
+
+    """
+
+    error = None
+
+    def getData(self):
+        charset = extractCharset(self.context.contentType)
+        try:
+            return {'contentType': self.context.contentType,
+                    'data': self.context.data.decode(charset)}
+        except LookupError:
+            msg = _("The character set specified in the content type"
+                    " ($charset) is not supported.")
+            msg.mapping = {'charset': charset}
+            raise UserError(msg)
+        except UnicodeDecodeError:
+            msg = _("The character set specified in the content type"
+                    " ($charset) does not match file content.")
+            msg.mapping = {'charset': charset}
+            raise UserError(msg)
+
+    def setData(self, data):
+        charset = extractCharset(data['contentType'])
+        try:
+            self.context.data = data['data'].encode(charset)
+        except LookupError:
+            raise UnknownCharset(charset)
+        except UnicodeEncodeError:
+            raise CharsetTooWeak(charset)
+        self.context.contentType = data['contentType']
+        formatter = self.request.locale.dates.getFormatter('dateTime',
+                                                           'medium')
+        status = _("Updated on ${date_time}")
+        status.mapping = {'date_time': formatter.format(datetime.utcnow())}
+        return status
+
+    def update(self):
+        try:
+            return super(FileEdit, self).update()
+        except CharsetTooWeak, charset:
+            self.update_status = _("The character set you specified ($charset)"
+                                   " cannot encode all characters in text.")
+            self.update_status.mapping = {'charset': charset}
+            return self.update_status
+        except UnknownCharset, charset:
+            self.update_status = _("The character set you specified ($charset)"
+                                   " is not supported.")
+            self.update_status.mapping = {'charset': charset}
+            return self.update_status
+
+
+def extractCharset(content_type):
+    """Extract charset information from a MIME type.
+
+        >>> extractCharset('text/plain; charset=UTF-8')
+        'UTF-8'
+        >>> extractCharset('text/html; charset=ISO-8859-1')
+        'ISO-8859-1'
+        >>> extractCharset('text/plain')
+        'ASCII'
+
+    """
+    if 'charset=' not in content_type:
+        return 'ASCII'
+    return content_type.split('charset=')[1]

Modified: Zope3/trunk/src/zope/app/file/browser/file.txt
===================================================================
--- Zope3/trunk/src/zope/app/file/browser/file.txt	2005-07-29 20:46:23 UTC (rev 37565)
+++ Zope3/trunk/src/zope/app/file/browser/file.txt	2005-07-29 20:50:12 UTC (rev 37566)
@@ -71,7 +71,7 @@
     </form>
   ...
 
-Binary files
+Binary Files
 ------------
 
 Let us upload a binary file.
@@ -83,23 +83,23 @@
   ...
   ... -----------------------------73793505419963331401738523176
   ... Content-Disposition: form-data; name="field.contentType"
-  ... 
+  ...
   ... application/octet-stream
   ... -----------------------------73793505419963331401738523176
   ... Content-Disposition: form-data; name="field.data"; filename="hello.txt.gz"
   ... Content-Type: application/x-gzip
-  ... 
+  ...
   ... \x1f\x8b\x08\x08\xcb\x48\xea\x42\x00\x03\x68\x65\x6c\x6c\x6f\x2e\
   ... \x74\x78\x74\x00\xcb\x48\xcd\xc9\xc9\xe7\x02\x00\x20\x30\x3a\x36\
   ... \x06\x00\x00\x00
   ... -----------------------------73793505419963331401738523176
   ... Content-Disposition: form-data; name="UPDATE_SUBMIT"
-  ... 
+  ...
   ... Add
   ... -----------------------------73793505419963331401738523176
   ... Content-Disposition: form-data; name="add_input_name"
-  ... 
   ...
+  ...
   ... -----------------------------73793505419963331401738523176--
   ... """)
   HTTP/1.1 303 See Other
@@ -123,12 +123,12 @@
   ...
 
 Let's make sure the (binary) content of the file is correct
-  
+
   >>> response.getBody().encode('base64')
   'H4sICMtI6kIAA2hlbGxvLnR4dADLSM3JyecCACAwOjYGAAAA\n'
 
 
-Text files
+Text Files
 ----------
 
 Let us now create a text file.
@@ -137,23 +137,23 @@
   ... POST /+/zope.app.file.File%3D HTTP/1.1
   ... Authorization: Basic mgr:mgrpw
   ... Content-Type: multipart/form-data; boundary=---------------------------167769037320366690221542301033
-  ... 
+  ...
   ... -----------------------------167769037320366690221542301033
   ... Content-Disposition: form-data; name="field.contentType"
-  ... 
+  ...
   ... text/plain
   ... -----------------------------167769037320366690221542301033
   ... Content-Disposition: form-data; name="field.data"; filename=""
   ... Content-Type: application/octet-stream
-  ... 
-  ... 
+  ...
+  ...
   ... -----------------------------167769037320366690221542301033
   ... Content-Disposition: form-data; name="UPDATE_SUBMIT"
-  ... 
+  ...
   ... Add
   ... -----------------------------167769037320366690221542301033
   ... Content-Disposition: form-data; name="add_input_name"
-  ... 
+  ...
   ... sample.txt
   ... -----------------------------167769037320366690221542301033--
   ... """)
@@ -179,7 +179,7 @@
   >>> print http(r"""
   ... GET /sample.txt/edit.html HTTP/1.1
   ... Authorization: Basic mgr:mgrpw
-  ... """)
+  ... """, handle_errors=False)
   HTTP/1.1 200 Ok
   Content-Length: ...
   Content-Type: text/html;charset=utf-8
@@ -212,23 +212,23 @@
   ... Authorization: Basic mgr:mgrpw
   ... Content-Length: ...
   ... Content-Type: multipart/form-data; boundary=---------------------------165727764114325486311042046845
-  ... 
+  ...
   ... -----------------------------165727764114325486311042046845
   ... Content-Disposition: form-data; name="field.contentType"
-  ... 
+  ...
   ... text/plain
   ... -----------------------------165727764114325486311042046845
   ... Content-Disposition: form-data; name="field.data"
-  ... 
+  ...
   ... This is a sample text file.
-  ... 
+  ...
   ... It can only contain US-ASCII characters.
   ... -----------------------------165727764114325486311042046845
   ... Content-Disposition: form-data; name="UPDATE_SUBMIT"
-  ... 
+  ...
   ... Change
   ... -----------------------------165727764114325486311042046845--
-  ... """)
+  ... """, handle_errors=False)
   HTTP/1.1 200 Ok
   Content-Length: ...
   Content-Type: text/html;charset=utf-8
@@ -274,3 +274,279 @@
   <BLANKLINE>
   It can only contain US-ASCII characters.
 
+
+Non-ASCII Text Files
+--------------------
+
+If we want an uploaded text file to contain non-ASCII characters, we have to
+explicitly specify the charset.
+
+  >>> print http("""
+  ... POST /sample.txt/edit.html HTTP/1.1
+  ... Authorization: Basic mgr:mgrpw
+  ... Content-Length: ...
+  ... Content-Type: multipart/form-data; boundary=---------------------------165727764114325486311042046845
+  ...
+  ... -----------------------------165727764114325486311042046845
+  ... Content-Disposition: form-data; name="field.contentType"
+  ...
+  ... text/plain; charset=UTF-8
+  ... -----------------------------165727764114325486311042046845
+  ... Content-Disposition: form-data; name="field.data"
+  ...
+  ... This is a sample text file.
+  ...
+  ... It can now contain UTF-8 characters, e.g. \xe2\x98\xbb (U+263B BLACK SMILING FACE).
+  ... -----------------------------165727764114325486311042046845
+  ... Content-Disposition: form-data; name="UPDATE_SUBMIT"
+  ...
+  ... Change
+  ... -----------------------------165727764114325486311042046845--
+  ... """)
+  HTTP/1.1 200 Ok
+  Content-Length: ...
+  Content-Type: text/html;charset=utf-8
+  <BLANKLINE>
+  ...
+      <title>Z3: sample.txt</title>
+  ...
+      <form action="http://localhost/sample.txt/edit.html"
+            method="post" enctype="multipart/form-data">
+        <div>
+          <h3>Change a file</h3>
+  <BLANKLINE>
+          <p>Updated on ...</p>
+  <BLANKLINE>
+        <div class="row">
+  ...<input class="textType" id="field.contentType" name="field.contentType"
+            size="20" type="text" value="text/plain; charset=UTF-8"  />...
+        <div class="row">
+  ...<textarea cols="60" id="field.data" name="field.data" rows="15"
+  >This is a sample text file.
+  <BLANKLINE>
+  It can now contain UTF-8 characters, e.g. ... (U+263B BLACK SMILING FACE).</textarea></div>
+  ...
+          <div class="controls">
+            <input type="submit" value="Refresh" />
+            <input type="submit" name="UPDATE_SUBMIT"
+                   value="Change" />
+          </div>
+  ...
+      </form>
+  ...
+
+Here's the file
+
+  >>> response = http(r"""
+  ... GET /sample.txt HTTP/1.1
+  ... """)
+  >>> print response
+  HTTP/1.1 200 Ok
+  Content-Length: ...
+  Content-Type: text/plain; charset=UTF-8
+  <BLANKLINE>
+  This is a sample text file.
+  <BLANKLINE>
+  It can now contain UTF-8 characters, e.g. ... (U+263B BLACK SMILING FACE).
+
+  >>> u'\u263B' in response.getBody().decode('UTF-8')
+  True
+
+You can use other charsets too.  Note that the browser form is always UTF-8.
+
+  >>> print http("""
+  ... POST /sample.txt/edit.html HTTP/1.1
+  ... Authorization: Basic mgr:mgrpw
+  ... Content-Length: ...
+  ... Content-Type: multipart/form-data; boundary=---------------------------165727764114325486311042046845
+  ...
+  ... -----------------------------165727764114325486311042046845
+  ... Content-Disposition: form-data; name="field.contentType"
+  ...
+  ... text/plain; charset=ISO-8859-1
+  ... -----------------------------165727764114325486311042046845
+  ... Content-Disposition: form-data; name="field.data"
+  ...
+  ... This is a sample text file.
+  ...
+  ... It now contains Latin-1 characters, e.g. \xc2\xa7 (U+00A7 SECTION SIGN).
+  ... -----------------------------165727764114325486311042046845
+  ... Content-Disposition: form-data; name="UPDATE_SUBMIT"
+  ...
+  ... Change
+  ... -----------------------------165727764114325486311042046845--
+  ... """)
+  HTTP/1.1 200 Ok
+  Content-Length: ...
+  Content-Type: text/html;charset=utf-8
+  <BLANKLINE>
+  ...
+      <title>Z3: sample.txt</title>
+  ...
+      <form action="http://localhost/sample.txt/edit.html"
+            method="post" enctype="multipart/form-data">
+        <div>
+          <h3>Change a file</h3>
+  <BLANKLINE>
+          <p>Updated on ...</p>
+  <BLANKLINE>
+        <div class="row">
+  ...<input class="textType" id="field.contentType" name="field.contentType"
+            size="20" type="text" value="text/plain; charset=ISO-8859-1"  />...
+        <div class="row">
+  ...<textarea cols="60" id="field.data" name="field.data" rows="15"
+  >This is a sample text file.
+  <BLANKLINE>
+  It now contains Latin-1 characters, e.g. ... (U+00A7 SECTION SIGN).</textarea></div>
+  ...
+          <div class="controls">
+            <input type="submit" value="Refresh" />
+            <input type="submit" name="UPDATE_SUBMIT"
+                   value="Change" />
+          </div>
+  ...
+      </form>
+  ...
+
+Here's the file
+
+  >>> response = http(r"""
+  ... GET /sample.txt HTTP/1.1
+  ... """)
+  >>> print response
+  HTTP/1.1 200 Ok
+  Content-Length: ...
+  Content-Type: text/plain; charset=ISO-8859-1
+  <BLANKLINE>
+  This is a sample text file.
+  <BLANKLINE>
+  It now contains Latin-1 characters, e.g. ... (U+00A7 SECTION SIGN).
+
+Body is actually encoded in ISO-8859-1, and not UTF-8
+
+  >>> response.getBody().splitlines()[-1]
+  'It now contains Latin-1 characters, e.g. \xa7 (U+00A7 SECTION SIGN).'
+
+The user is not allowed to specify a character set that cannot represent all
+the characters.
+
+  >>> print http("""
+  ... POST /sample.txt/edit.html HTTP/1.1
+  ... Authorization: Basic mgr:mgrpw
+  ... Content-Length: ...
+  ... Content-Type: multipart/form-data; boundary=---------------------------165727764114325486311042046845
+  ...
+  ... -----------------------------165727764114325486311042046845
+  ... Content-Disposition: form-data; name="field.contentType"
+  ...
+  ... text/plain
+  ... -----------------------------165727764114325486311042046845
+  ... Content-Disposition: form-data; name="field.data"
+  ...
+  ... This is a slightly changed sample text file.
+  ...
+  ... It now contains Latin-1 characters, e.g. \xc2\xa7 (U+00A7 SECTION SIGN).
+  ... -----------------------------165727764114325486311042046845
+  ... Content-Disposition: form-data; name="UPDATE_SUBMIT"
+  ...
+  ... Change
+  ... -----------------------------165727764114325486311042046845--
+  ... """, handle_errors=False)
+  HTTP/1.1 200 Ok
+  Content-Length: ...
+  Content-Type: text/html;charset=utf-8
+  <BLANKLINE>
+  ...
+      <title>Z3: sample.txt</title>
+  ...
+      <form action="http://localhost/sample.txt/edit.html"
+            method="post" enctype="multipart/form-data">
+        <div>
+          <h3>Change a file</h3>
+  <BLANKLINE>
+          <p>The character set you specified (ASCII) cannot encode all characters in text.</p>
+  <BLANKLINE>
+        <div class="row">
+  ...<input class="textType" id="field.contentType" name="field.contentType" size="20" type="text" value="text/plain"  />...
+        <div class="row">
+  ...<textarea cols="60" id="field.data" name="field.data" rows="15" >This is a slightly changed sample text file.
+  <BLANKLINE>
+  It now contains Latin-1 characters, e.g. ... (U+00A7 SECTION SIGN).</textarea></div>
+  ...
+          <div class="controls">
+            <input type="submit" value="Refresh" />
+            <input type="submit" name="UPDATE_SUBMIT"
+                   value="Change" />
+          </div>
+  ...
+      </form>
+  ...
+
+Likewise, the user is not allowed to specify a character set that is not supported by Python.
+
+  >>> print http("""
+  ... POST /sample.txt/edit.html HTTP/1.1
+  ... Authorization: Basic mgr:mgrpw
+  ... Content-Length: ...
+  ... Content-Type: multipart/form-data; boundary=---------------------------165727764114325486311042046845
+  ...
+  ... -----------------------------165727764114325486311042046845
+  ... Content-Disposition: form-data; name="field.contentType"
+  ...
+  ... text/plain; charset=I-INVENT-MY-OWN
+  ... -----------------------------165727764114325486311042046845
+  ... Content-Disposition: form-data; name="field.data"
+  ...
+  ... This is a slightly changed sample text file.
+  ...
+  ... It now contains just ASCII characters.
+  ... -----------------------------165727764114325486311042046845
+  ... Content-Disposition: form-data; name="UPDATE_SUBMIT"
+  ...
+  ... Change
+  ... -----------------------------165727764114325486311042046845--
+  ... """, handle_errors=False)
+  HTTP/1.1 200 Ok
+  Content-Length: ...
+  Content-Type: text/html;charset=utf-8
+  <BLANKLINE>
+  ...
+      <title>Z3: sample.txt</title>
+  ...
+      <form action="http://localhost/sample.txt/edit.html"
+            method="post" enctype="multipart/form-data">
+        <div>
+          <h3>Change a file</h3>
+  <BLANKLINE>
+          <p>The character set you specified (I-INVENT-MY-OWN) is not supported.</p>
+  <BLANKLINE>
+        <div class="row">
+  ...<input class="textType" id="field.contentType" name="field.contentType" size="20" type="text" value="text/plain; charset=I-INVENT-MY-OWN"  />...
+        <div class="row">
+  ...<textarea cols="60" id="field.data" name="field.data" rows="15" >This is a slightly changed sample text file.
+  <BLANKLINE>
+  It now contains just ASCII characters.</textarea></div>
+  ...
+          <div class="controls">
+            <input type="submit" value="Refresh" />
+            <input type="submit" name="UPDATE_SUBMIT"
+                   value="Change" />
+          </div>
+  ...
+      </form>
+  ...
+
+If you trick Zope and upload a file with a content type that does not match the
+file contents, you will not be able to access the edit view.
+
+  >>> print http(r"""
+  ... GET /hello.txt.gz/@@edit.html HTTP/1.1
+  ... Authorization: Basic mgr:mgrpw
+  ... """)
+  HTTP/1.1 200 Ok
+  Content-Length: ...
+  Content-Type: text/html;charset=utf-8
+  <BLANKLINE>
+  ...
+     <li>The character set specified in the content type (ASCII) does not match file content.</li>
+  ...

Modified: Zope3/trunk/src/zope/app/file/browser/tests/test_file.py
===================================================================
--- Zope3/trunk/src/zope/app/file/browser/tests/test_file.py	2005-07-29 20:46:23 UTC (rev 37565)
+++ Zope3/trunk/src/zope/app/file/browser/tests/test_file.py	2005-07-29 20:50:12 UTC (rev 37566)
@@ -18,10 +18,13 @@
 import unittest
 
 from zope.testing import doctest
+from zope.app.testing import placelesssetup
 
 
 def test_suite():
-    return doctest.DocTestSuite("zope.app.file.browser.file")
+    return doctest.DocTestSuite("zope.app.file.browser.file",
+                                setUp=placelesssetup.setUp,
+                                tearDown=placelesssetup.tearDown)
 
 if __name__ == "__main__":
     unittest.main(defaultTest="test_suite")



More information about the Zope3-Checkins mailing list