[CMF-checkins] SVN: CMF/trunk/CMFDefault/ Issue #452: Make HTML
scrubbing pluggable using a utility.
Tres Seaver
tseaver at palladion.com
Mon Apr 9 18:07:17 EDT 2007
Log message for revision 74068:
Issue #452: Make HTML scrubbing pluggable using a utility.
Changed:
U CMF/trunk/CMFDefault/interfaces/_content.py
U CMF/trunk/CMFDefault/tests/test_utils.py
U CMF/trunk/CMFDefault/utils.py
-=-
Modified: CMF/trunk/CMFDefault/interfaces/_content.py
===================================================================
--- CMF/trunk/CMFDefault/interfaces/_content.py 2007-04-09 22:05:03 UTC (rev 74067)
+++ CMF/trunk/CMFDefault/interfaces/_content.py 2007-04-09 22:07:17 UTC (rev 74068)
@@ -17,6 +17,12 @@
from zope.interface import Interface
+class IHTMLScrubber(Interface):
+ """ Utility inteface for scrubbing user-supplied HTML.
+ """
+ def scrub(html):
+ """ Return 'scrubbed' HTML.
+ """
class IDocument(Interface):
Modified: CMF/trunk/CMFDefault/tests/test_utils.py
===================================================================
--- CMF/trunk/CMFDefault/tests/test_utils.py 2007-04-09 22:05:03 UTC (rev 74067)
+++ CMF/trunk/CMFDefault/tests/test_utils.py 2007-04-09 22:07:17 UTC (rev 74068)
@@ -122,7 +122,7 @@
self.assertNotEqual( preloaded[ 'Author' ], headers[ 'Author' ] )
self.assertEqual( preloaded[ 'text_format' ], headers[ 'text_format' ] )
- def test_scrubHTML(self):
+ def test_scrubHTML_no_adapter_falls_back(self):
from Products.CMFDefault.utils import scrubHTML
self.assertEqual( scrubHTML('<a href="foo.html">bar</a>'),
@@ -144,6 +144,46 @@
self.assertEqual( scrubHTML('<meta name="title" content="" /><meta>'),
'<meta name="title" content="" /><meta />' )
+ def test_scrubHTML_with_adapter(self):
+ from zope.interface import implements
+ from zope.component.testing import setUp
+ from zope.component.testing import tearDown
+ from zope.app.testing import ztapi
+ from Products.CMFDefault.interfaces import IHTMLScrubber
+ from Products.CMFDefault.utils import scrubHTML
+
+ class _Scrubber:
+ implements(IHTMLScrubber)
+ def scrub(self, html):
+ return html.upper()
+
+
+ setUp()
+ try:
+ ztapi.provideUtility(IHTMLScrubber, _Scrubber())
+ self.assertEqual( scrubHTML('<a href="foo.html">bar</a>'),
+ '<A HREF="FOO.HTML">BAR</A>' )
+ self.assertEqual( scrubHTML('<b>bar</b>'),
+ '<B>BAR</B>' )
+ self.assertEqual( scrubHTML('<base href="" /><base>'),
+ '<BASE HREF="" /><BASE>' )
+ self.assertEqual( scrubHTML('<blockquote>bar</blockquote>'),
+ '<BLOCKQUOTE>BAR</BLOCKQUOTE>' )
+ self.assertEqual( scrubHTML('<body bgcolor="#ffffff">bar</body>'),
+ '<BODY BGCOLOR="#FFFFFF">BAR</BODY>' )
+ self.assertEqual( scrubHTML('<br /><br>'),
+ '<BR /><BR>' )
+ self.assertEqual( scrubHTML('<hr /><hr>'),
+ '<HR /><HR>' )
+ self.assertEqual( scrubHTML('<img src="foo.png" /><img>'),
+ '<IMG SRC="FOO.PNG" /><IMG>' )
+ self.assertEqual( scrubHTML(
+ '<meta name="title" content="" /><meta>'),
+ '<META NAME="TITLE" CONTENT="" /><META>' )
+
+ finally:
+ tearDown()
+
def test_bodyfinder(self):
from Products.CMFDefault.utils import bodyfinder
Modified: CMF/trunk/CMFDefault/utils.py
===================================================================
--- CMF/trunk/CMFDefault/utils.py 2007-04-09 22:05:03 UTC (rev 74067)
+++ CMF/trunk/CMFDefault/utils.py 2007-04-09 22:07:17 UTC (rev 74068)
@@ -31,13 +31,15 @@
from zope import i18n
from zope.component import getUtility
+from zope.component import queryUtility
from zope.i18n.interfaces import IUserPreferredCharsets
from zope.i18nmessageid import MessageFactory
from Products.CMFCore.interfaces import IPropertiesTool
-from exceptions import EmailAddressInvalid
-from exceptions import IllegalHTML
+from Products.CMFDefault.interfaces import IHTMLScrubber
+from Products.CMFDefault.exceptions import EmailAddressInvalid
+from Products.CMFDefault.exceptions import IllegalHTML
security = ModuleSecurityInfo( 'Products.CMFDefault.utils' )
@@ -294,10 +296,12 @@
from htmlentitydefs import entitydefs # replace entitydefs from sgmllib
- def __init__( self ):
+ def __init__( self, valid_tags=None, nasty_tags=None ):
SGMLParser.__init__( self )
self.result = ""
+ self.valid_tags = valid_tags or VALID_TAGS
+ self.nasty_tags = nasty_tags or NASTY_TAGS
def handle_data( self, data ):
@@ -321,7 +325,7 @@
def unknown_starttag(self, tag, attrs):
""" Delete all tags except for legal ones.
"""
- if VALID_TAGS.has_key(tag):
+ if self.valid_tags.has_key(tag):
self.result = self.result + '<' + tag
@@ -340,12 +344,12 @@
self.result = '%s %s="%s"' % (self.result, k, v)
endTag = '</%s>' % tag
- if VALID_TAGS.get(tag):
+ if self.valid_tags.get(tag):
self.result = self.result + '>'
else:
self.result = self.result + ' />'
- elif NASTY_TAGS.get(tag):
+ elif self.nasty_tags.get(tag):
msg = _(u"Dynamic tag '${tag}' not allowed.",
mapping={'tag': tag})
raise IllegalHTML(msg)
@@ -355,7 +359,7 @@
def unknown_endtag(self, tag):
- if VALID_TAGS.get(tag):
+ if self.valid_tags.get(tag):
self.result = "%s</%s>" % (self.result, tag)
remTag = '</%s>' % tag
@@ -365,7 +369,14 @@
def scrubHTML( html ):
""" Strip illegal HTML tags from string text.
+
+ o Prefer a utility, if registered.
"""
+ scrubber = queryUtility(IHTMLScrubber)
+
+ if scrubber is not None:
+ return scrubber.scrub(html)
+
parser = StrippingParser()
parser.feed( html )
parser.close()
More information about the CMF-checkins
mailing list