[Zope3-checkins] SVN: Zope3/trunk/src/zope/app/file/browser/ Fix
for issue #302: it is now possible to edit the content of
File objects
Marius Gedminas
marius at pov.lt
Fri Jul 29 16:50:12 EDT 2005
Log message for revision 37566:
Fix for issue #302: it is now possible to edit the content of File objects
that contain non-ASCII text in a web form, provided that the content type
includes an appropriate charset declaration.
Changed:
U Zope3/trunk/src/zope/app/file/browser/configure.zcml
U Zope3/trunk/src/zope/app/file/browser/file.py
U Zope3/trunk/src/zope/app/file/browser/file.txt
U Zope3/trunk/src/zope/app/file/browser/tests/test_file.py
-=-
Modified: Zope3/trunk/src/zope/app/file/browser/configure.zcml
===================================================================
--- Zope3/trunk/src/zope/app/file/browser/configure.zcml 2005-07-29 20:46:23 UTC (rev 37565)
+++ Zope3/trunk/src/zope/app/file/browser/configure.zcml 2005-07-29 20:50:12 UTC (rev 37566)
@@ -6,19 +6,16 @@
<!-- directives for File -->
- <browser:editform
+ <browser:form
name="edit.html"
- schema="zope.app.file.interfaces.IFile"
+ for="zope.app.file.interfaces.IFile"
+ schema="zope.app.file.browser.file.IFileEditForm"
label="Change a file"
- permission="zope.ManageContent"
+ permission="zope.ManageContent"
+ class="zope.app.file.browser.file.FileEdit"
>
+ </browser:form>
- <widget
- field="data"
- class="zope.app.form.browser.BytesAreaWidget" />
-
- </browser:editform>
-
<browser:menuItem
menu="zmi_views" title="Edit"
for="zope.app.file.interfaces.IFile"
Modified: Zope3/trunk/src/zope/app/file/browser/file.py
===================================================================
--- Zope3/trunk/src/zope/app/file/browser/file.py 2005-07-29 20:46:23 UTC (rev 37565)
+++ Zope3/trunk/src/zope/app/file/browser/file.py 2005-07-29 20:50:12 UTC (rev 37566)
@@ -20,11 +20,13 @@
import zope.event
+from zope.schema import Text
from zope.app import content_types
from zope.app.event import objectevent
from zope.app.file.file import File
from zope.app.file.interfaces import IFile
from zope.app.i18n import ZopeMessageIDFactory as _
+from zope.app.exception.interfaces import UserError
__docformat__ = 'restructuredtext'
@@ -218,3 +220,177 @@
status = _("Updated on ${date_time}")
status.mapping = {'date_time': formatter.format(datetime.utcnow())}
return status
+
+
+class IFileEditForm(IFile):
+ """Schema for the File edit form.
+
+ Replaces the Bytes `data` field with a Text field.
+ """
+
+ data = Text(
+ title=_(u'Data'),
+ description=_(u'The actual content of the object.'),
+ default=u'',
+ missing_value=u'',
+ required=False,
+ )
+
+
+class UnknownCharset(Exception):
+ """Unknown character set."""
+
+class CharsetTooWeak(Exception):
+ """Character set cannot encode all characters in text."""
+
+
+class FileEdit(object):
+ r"""File edit form mixin.
+
+ Lets the user edit a text file directly via a browser form.
+
+ Converts between Unicode strings used in browser forms and 8-bit strings
+ stored internally.
+
+ >>> from zope.app.publisher.browser import BrowserView
+ >>> from zope.publisher.browser import TestRequest
+ >>> class FileEditView(FileEdit, BrowserView): pass
+ >>> view = FileEditView(File(), TestRequest())
+ >>> view.getData()
+ {'data': u'', 'contentType': ''}
+
+ >>> view.setData({'contentType': 'text/plain; charset=ISO-8859-13',
+ ... 'data': u'text \u0105'})
+ u'Updated on ${date_time}'
+
+ >>> view.context.contentType
+ 'text/plain; charset=ISO-8859-13'
+ >>> view.context.data
+ 'text \xe0'
+
+ >>> view.getData()['data']
+ u'text \u0105'
+
+ You will get an error if you try to specify a charset that cannot encode
+ all the characters
+
+ >>> view.setData({'contentType': 'text/xml; charset=ISO-8859-1',
+ ... 'data': u'text \u0105'})
+ Traceback (most recent call last):
+ ...
+ CharsetTooWeak: ISO-8859-1
+
+ You will get a different error if you try to specify an invalid charset
+
+ >>> view.setData({'contentType': 'text/xml; charset=UNKNOWN',
+ ... 'data': u'text \u0105'})
+ Traceback (most recent call last):
+ ...
+ UnknownCharset: UNKNOWN
+
+ The update method catches those errors and replaces them with error
+ messages
+
+ >>> from zope.i18n import translate
+ >>> class FakeFormView(BrowserView):
+ ... def update(self):
+ ... raise CharsetTooWeak('ASCII')
+ >>> class FileEditView(FileEdit, FakeFormView): pass
+ >>> view = FileEditView(File(), TestRequest())
+ >>> translate(view.update())
+ u'The character set you specified (ASCII) cannot encode all characters in text.'
+ >>> translate(view.update_status)
+ u'The character set you specified (ASCII) cannot encode all characters in text.'
+
+ >>> class FakeFormView(BrowserView):
+ ... def update(self):
+ ... raise UnknownCharset('UNKNOWN')
+ >>> class FileEditView(FileEdit, FakeFormView): pass
+ >>> view = FileEditView(File(), TestRequest())
+ >>> translate(view.update())
+ u'The character set you specified (UNKNOWN) is not supported.'
+ >>> translate(view.update_status)
+ u'The character set you specified (UNKNOWN) is not supported.'
+
+ Speaking about errors, if you trick the system and upload a file with
+ incorrect charset designation, you will get a UserError when you visit the
+ view:
+
+ >>> view.context.contentType = 'text/plain; charset=UNKNOWN'
+ >>> view.context.data = '\xff'
+ >>> view.getData()
+ Traceback (most recent call last):
+ ...
+ UserError: The character set specified in the content type ($charset) is not supported.
+
+ >>> view.context.contentType = 'text/plain; charset=UTF-8'
+ >>> view.context.data = '\xff'
+ >>> view.getData()
+ Traceback (most recent call last):
+ ...
+ UserError: The character set specified in the content type ($charset) does not match file content.
+
+ """
+
+ error = None
+
+ def getData(self):
+ charset = extractCharset(self.context.contentType)
+ try:
+ return {'contentType': self.context.contentType,
+ 'data': self.context.data.decode(charset)}
+ except LookupError:
+ msg = _("The character set specified in the content type"
+ " ($charset) is not supported.")
+ msg.mapping = {'charset': charset}
+ raise UserError(msg)
+ except UnicodeDecodeError:
+ msg = _("The character set specified in the content type"
+ " ($charset) does not match file content.")
+ msg.mapping = {'charset': charset}
+ raise UserError(msg)
+
+ def setData(self, data):
+ charset = extractCharset(data['contentType'])
+ try:
+ self.context.data = data['data'].encode(charset)
+ except LookupError:
+ raise UnknownCharset(charset)
+ except UnicodeEncodeError:
+ raise CharsetTooWeak(charset)
+ self.context.contentType = data['contentType']
+ formatter = self.request.locale.dates.getFormatter('dateTime',
+ 'medium')
+ status = _("Updated on ${date_time}")
+ status.mapping = {'date_time': formatter.format(datetime.utcnow())}
+ return status
+
+ def update(self):
+ try:
+ return super(FileEdit, self).update()
+ except CharsetTooWeak, charset:
+ self.update_status = _("The character set you specified ($charset)"
+ " cannot encode all characters in text.")
+ self.update_status.mapping = {'charset': charset}
+ return self.update_status
+ except UnknownCharset, charset:
+ self.update_status = _("The character set you specified ($charset)"
+ " is not supported.")
+ self.update_status.mapping = {'charset': charset}
+ return self.update_status
+
+
+def extractCharset(content_type):
+ """Extract charset information from a MIME type.
+
+ >>> extractCharset('text/plain; charset=UTF-8')
+ 'UTF-8'
+ >>> extractCharset('text/html; charset=ISO-8859-1')
+ 'ISO-8859-1'
+ >>> extractCharset('text/plain')
+ 'ASCII'
+
+ """
+ if 'charset=' not in content_type:
+ return 'ASCII'
+ return content_type.split('charset=')[1]
Modified: Zope3/trunk/src/zope/app/file/browser/file.txt
===================================================================
--- Zope3/trunk/src/zope/app/file/browser/file.txt 2005-07-29 20:46:23 UTC (rev 37565)
+++ Zope3/trunk/src/zope/app/file/browser/file.txt 2005-07-29 20:50:12 UTC (rev 37566)
@@ -71,7 +71,7 @@
</form>
...
-Binary files
+Binary Files
------------
Let us upload a binary file.
@@ -83,23 +83,23 @@
...
... -----------------------------73793505419963331401738523176
... Content-Disposition: form-data; name="field.contentType"
- ...
+ ...
... application/octet-stream
... -----------------------------73793505419963331401738523176
... Content-Disposition: form-data; name="field.data"; filename="hello.txt.gz"
... Content-Type: application/x-gzip
- ...
+ ...
... \x1f\x8b\x08\x08\xcb\x48\xea\x42\x00\x03\x68\x65\x6c\x6c\x6f\x2e\
... \x74\x78\x74\x00\xcb\x48\xcd\xc9\xc9\xe7\x02\x00\x20\x30\x3a\x36\
... \x06\x00\x00\x00
... -----------------------------73793505419963331401738523176
... Content-Disposition: form-data; name="UPDATE_SUBMIT"
- ...
+ ...
... Add
... -----------------------------73793505419963331401738523176
... Content-Disposition: form-data; name="add_input_name"
- ...
...
+ ...
... -----------------------------73793505419963331401738523176--
... """)
HTTP/1.1 303 See Other
@@ -123,12 +123,12 @@
...
Let's make sure the (binary) content of the file is correct
-
+
>>> response.getBody().encode('base64')
'H4sICMtI6kIAA2hlbGxvLnR4dADLSM3JyecCACAwOjYGAAAA\n'
-Text files
+Text Files
----------
Let us now create a text file.
@@ -137,23 +137,23 @@
... POST /+/zope.app.file.File%3D HTTP/1.1
... Authorization: Basic mgr:mgrpw
... Content-Type: multipart/form-data; boundary=---------------------------167769037320366690221542301033
- ...
+ ...
... -----------------------------167769037320366690221542301033
... Content-Disposition: form-data; name="field.contentType"
- ...
+ ...
... text/plain
... -----------------------------167769037320366690221542301033
... Content-Disposition: form-data; name="field.data"; filename=""
... Content-Type: application/octet-stream
- ...
- ...
+ ...
+ ...
... -----------------------------167769037320366690221542301033
... Content-Disposition: form-data; name="UPDATE_SUBMIT"
- ...
+ ...
... Add
... -----------------------------167769037320366690221542301033
... Content-Disposition: form-data; name="add_input_name"
- ...
+ ...
... sample.txt
... -----------------------------167769037320366690221542301033--
... """)
@@ -179,7 +179,7 @@
>>> print http(r"""
... GET /sample.txt/edit.html HTTP/1.1
... Authorization: Basic mgr:mgrpw
- ... """)
+ ... """, handle_errors=False)
HTTP/1.1 200 Ok
Content-Length: ...
Content-Type: text/html;charset=utf-8
@@ -212,23 +212,23 @@
... Authorization: Basic mgr:mgrpw
... Content-Length: ...
... Content-Type: multipart/form-data; boundary=---------------------------165727764114325486311042046845
- ...
+ ...
... -----------------------------165727764114325486311042046845
... Content-Disposition: form-data; name="field.contentType"
- ...
+ ...
... text/plain
... -----------------------------165727764114325486311042046845
... Content-Disposition: form-data; name="field.data"
- ...
+ ...
... This is a sample text file.
- ...
+ ...
... It can only contain US-ASCII characters.
... -----------------------------165727764114325486311042046845
... Content-Disposition: form-data; name="UPDATE_SUBMIT"
- ...
+ ...
... Change
... -----------------------------165727764114325486311042046845--
- ... """)
+ ... """, handle_errors=False)
HTTP/1.1 200 Ok
Content-Length: ...
Content-Type: text/html;charset=utf-8
@@ -274,3 +274,279 @@
<BLANKLINE>
It can only contain US-ASCII characters.
+
+Non-ASCII Text Files
+--------------------
+
+If we want an uploaded text file to contain non-ASCII characters, we have to
+explicitly specify the charset.
+
+ >>> print http("""
+ ... POST /sample.txt/edit.html HTTP/1.1
+ ... Authorization: Basic mgr:mgrpw
+ ... Content-Length: ...
+ ... Content-Type: multipart/form-data; boundary=---------------------------165727764114325486311042046845
+ ...
+ ... -----------------------------165727764114325486311042046845
+ ... Content-Disposition: form-data; name="field.contentType"
+ ...
+ ... text/plain; charset=UTF-8
+ ... -----------------------------165727764114325486311042046845
+ ... Content-Disposition: form-data; name="field.data"
+ ...
+ ... This is a sample text file.
+ ...
+ ... It can now contain UTF-8 characters, e.g. \xe2\x98\xbb (U+263B BLACK SMILING FACE).
+ ... -----------------------------165727764114325486311042046845
+ ... Content-Disposition: form-data; name="UPDATE_SUBMIT"
+ ...
+ ... Change
+ ... -----------------------------165727764114325486311042046845--
+ ... """)
+ HTTP/1.1 200 Ok
+ Content-Length: ...
+ Content-Type: text/html;charset=utf-8
+ <BLANKLINE>
+ ...
+ <title>Z3: sample.txt</title>
+ ...
+ <form action="http://localhost/sample.txt/edit.html"
+ method="post" enctype="multipart/form-data">
+ <div>
+ <h3>Change a file</h3>
+ <BLANKLINE>
+ <p>Updated on ...</p>
+ <BLANKLINE>
+ <div class="row">
+ ...<input class="textType" id="field.contentType" name="field.contentType"
+ size="20" type="text" value="text/plain; charset=UTF-8" />...
+ <div class="row">
+ ...<textarea cols="60" id="field.data" name="field.data" rows="15"
+ >This is a sample text file.
+ <BLANKLINE>
+ It can now contain UTF-8 characters, e.g. ... (U+263B BLACK SMILING FACE).</textarea></div>
+ ...
+ <div class="controls">
+ <input type="submit" value="Refresh" />
+ <input type="submit" name="UPDATE_SUBMIT"
+ value="Change" />
+ </div>
+ ...
+ </form>
+ ...
+
+Here's the file
+
+ >>> response = http(r"""
+ ... GET /sample.txt HTTP/1.1
+ ... """)
+ >>> print response
+ HTTP/1.1 200 Ok
+ Content-Length: ...
+ Content-Type: text/plain; charset=UTF-8
+ <BLANKLINE>
+ This is a sample text file.
+ <BLANKLINE>
+ It can now contain UTF-8 characters, e.g. ... (U+263B BLACK SMILING FACE).
+
+ >>> u'\u263B' in response.getBody().decode('UTF-8')
+ True
+
+You can use other charsets too. Note that the browser form is always UTF-8.
+
+ >>> print http("""
+ ... POST /sample.txt/edit.html HTTP/1.1
+ ... Authorization: Basic mgr:mgrpw
+ ... Content-Length: ...
+ ... Content-Type: multipart/form-data; boundary=---------------------------165727764114325486311042046845
+ ...
+ ... -----------------------------165727764114325486311042046845
+ ... Content-Disposition: form-data; name="field.contentType"
+ ...
+ ... text/plain; charset=ISO-8859-1
+ ... -----------------------------165727764114325486311042046845
+ ... Content-Disposition: form-data; name="field.data"
+ ...
+ ... This is a sample text file.
+ ...
+ ... It now contains Latin-1 characters, e.g. \xc2\xa7 (U+00A7 SECTION SIGN).
+ ... -----------------------------165727764114325486311042046845
+ ... Content-Disposition: form-data; name="UPDATE_SUBMIT"
+ ...
+ ... Change
+ ... -----------------------------165727764114325486311042046845--
+ ... """)
+ HTTP/1.1 200 Ok
+ Content-Length: ...
+ Content-Type: text/html;charset=utf-8
+ <BLANKLINE>
+ ...
+ <title>Z3: sample.txt</title>
+ ...
+ <form action="http://localhost/sample.txt/edit.html"
+ method="post" enctype="multipart/form-data">
+ <div>
+ <h3>Change a file</h3>
+ <BLANKLINE>
+ <p>Updated on ...</p>
+ <BLANKLINE>
+ <div class="row">
+ ...<input class="textType" id="field.contentType" name="field.contentType"
+ size="20" type="text" value="text/plain; charset=ISO-8859-1" />...
+ <div class="row">
+ ...<textarea cols="60" id="field.data" name="field.data" rows="15"
+ >This is a sample text file.
+ <BLANKLINE>
+ It now contains Latin-1 characters, e.g. ... (U+00A7 SECTION SIGN).</textarea></div>
+ ...
+ <div class="controls">
+ <input type="submit" value="Refresh" />
+ <input type="submit" name="UPDATE_SUBMIT"
+ value="Change" />
+ </div>
+ ...
+ </form>
+ ...
+
+Here's the file
+
+ >>> response = http(r"""
+ ... GET /sample.txt HTTP/1.1
+ ... """)
+ >>> print response
+ HTTP/1.1 200 Ok
+ Content-Length: ...
+ Content-Type: text/plain; charset=ISO-8859-1
+ <BLANKLINE>
+ This is a sample text file.
+ <BLANKLINE>
+ It now contains Latin-1 characters, e.g. ... (U+00A7 SECTION SIGN).
+
+Body is actually encoded in ISO-8859-1, and not UTF-8
+
+ >>> response.getBody().splitlines()[-1]
+ 'It now contains Latin-1 characters, e.g. \xa7 (U+00A7 SECTION SIGN).'
+
+The user is not allowed to specify a character set that cannot represent all
+the characters.
+
+ >>> print http("""
+ ... POST /sample.txt/edit.html HTTP/1.1
+ ... Authorization: Basic mgr:mgrpw
+ ... Content-Length: ...
+ ... Content-Type: multipart/form-data; boundary=---------------------------165727764114325486311042046845
+ ...
+ ... -----------------------------165727764114325486311042046845
+ ... Content-Disposition: form-data; name="field.contentType"
+ ...
+ ... text/plain
+ ... -----------------------------165727764114325486311042046845
+ ... Content-Disposition: form-data; name="field.data"
+ ...
+ ... This is a slightly changed sample text file.
+ ...
+ ... It now contains Latin-1 characters, e.g. \xc2\xa7 (U+00A7 SECTION SIGN).
+ ... -----------------------------165727764114325486311042046845
+ ... Content-Disposition: form-data; name="UPDATE_SUBMIT"
+ ...
+ ... Change
+ ... -----------------------------165727764114325486311042046845--
+ ... """, handle_errors=False)
+ HTTP/1.1 200 Ok
+ Content-Length: ...
+ Content-Type: text/html;charset=utf-8
+ <BLANKLINE>
+ ...
+ <title>Z3: sample.txt</title>
+ ...
+ <form action="http://localhost/sample.txt/edit.html"
+ method="post" enctype="multipart/form-data">
+ <div>
+ <h3>Change a file</h3>
+ <BLANKLINE>
+ <p>The character set you specified (ASCII) cannot encode all characters in text.</p>
+ <BLANKLINE>
+ <div class="row">
+ ...<input class="textType" id="field.contentType" name="field.contentType" size="20" type="text" value="text/plain" />...
+ <div class="row">
+ ...<textarea cols="60" id="field.data" name="field.data" rows="15" >This is a slightly changed sample text file.
+ <BLANKLINE>
+ It now contains Latin-1 characters, e.g. ... (U+00A7 SECTION SIGN).</textarea></div>
+ ...
+ <div class="controls">
+ <input type="submit" value="Refresh" />
+ <input type="submit" name="UPDATE_SUBMIT"
+ value="Change" />
+ </div>
+ ...
+ </form>
+ ...
+
+Likewise, the user is not allowed to specify a character set that is not supported by Python.
+
+ >>> print http("""
+ ... POST /sample.txt/edit.html HTTP/1.1
+ ... Authorization: Basic mgr:mgrpw
+ ... Content-Length: ...
+ ... Content-Type: multipart/form-data; boundary=---------------------------165727764114325486311042046845
+ ...
+ ... -----------------------------165727764114325486311042046845
+ ... Content-Disposition: form-data; name="field.contentType"
+ ...
+ ... text/plain; charset=I-INVENT-MY-OWN
+ ... -----------------------------165727764114325486311042046845
+ ... Content-Disposition: form-data; name="field.data"
+ ...
+ ... This is a slightly changed sample text file.
+ ...
+ ... It now contains just ASCII characters.
+ ... -----------------------------165727764114325486311042046845
+ ... Content-Disposition: form-data; name="UPDATE_SUBMIT"
+ ...
+ ... Change
+ ... -----------------------------165727764114325486311042046845--
+ ... """, handle_errors=False)
+ HTTP/1.1 200 Ok
+ Content-Length: ...
+ Content-Type: text/html;charset=utf-8
+ <BLANKLINE>
+ ...
+ <title>Z3: sample.txt</title>
+ ...
+ <form action="http://localhost/sample.txt/edit.html"
+ method="post" enctype="multipart/form-data">
+ <div>
+ <h3>Change a file</h3>
+ <BLANKLINE>
+ <p>The character set you specified (I-INVENT-MY-OWN) is not supported.</p>
+ <BLANKLINE>
+ <div class="row">
+ ...<input class="textType" id="field.contentType" name="field.contentType" size="20" type="text" value="text/plain; charset=I-INVENT-MY-OWN" />...
+ <div class="row">
+ ...<textarea cols="60" id="field.data" name="field.data" rows="15" >This is a slightly changed sample text file.
+ <BLANKLINE>
+ It now contains just ASCII characters.</textarea></div>
+ ...
+ <div class="controls">
+ <input type="submit" value="Refresh" />
+ <input type="submit" name="UPDATE_SUBMIT"
+ value="Change" />
+ </div>
+ ...
+ </form>
+ ...
+
+If you trick Zope and upload a file with a content type that does not match the
+file contents, you will not be able to access the edit view.
+
+ >>> print http(r"""
+ ... GET /hello.txt.gz/@@edit.html HTTP/1.1
+ ... Authorization: Basic mgr:mgrpw
+ ... """)
+ HTTP/1.1 200 Ok
+ Content-Length: ...
+ Content-Type: text/html;charset=utf-8
+ <BLANKLINE>
+ ...
+ <li>The character set specified in the content type (ASCII) does not match file content.</li>
+ ...
Modified: Zope3/trunk/src/zope/app/file/browser/tests/test_file.py
===================================================================
--- Zope3/trunk/src/zope/app/file/browser/tests/test_file.py 2005-07-29 20:46:23 UTC (rev 37565)
+++ Zope3/trunk/src/zope/app/file/browser/tests/test_file.py 2005-07-29 20:50:12 UTC (rev 37566)
@@ -18,10 +18,13 @@
import unittest
from zope.testing import doctest
+from zope.app.testing import placelesssetup
def test_suite():
- return doctest.DocTestSuite("zope.app.file.browser.file")
+ return doctest.DocTestSuite("zope.app.file.browser.file",
+ setUp=placelesssetup.setUp,
+ tearDown=placelesssetup.tearDown)
if __name__ == "__main__":
unittest.main(defaultTest="test_suite")
More information about the Zope3-Checkins
mailing list