[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters - html.py:1.1.2.2 null.py:1.1.2.2
Andreas Jung
andreas@digicool.com
Wed, 27 Feb 2002 19:18:30 -0500
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters
In directory cvs.zope.org:/tmp/cvs-serv11780
Modified Files:
Tag: ajung-textindexng-branch
html.py null.py
Log Message:
rewrote converters as classes
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/html.py 1.1.2.1 => 1.1.2.2 ===
# $Id$
-import re
+from sgmllib import SGMLParser
+from types import StringType
+from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter
-reg = re.compile('<.*?>')
-def convert(s):
- return reg.sub('',s)
+# Code taken from Dieter Maurer's CatalogSupport Module
+# http://www.handshake.de/~dieter/pyprojects/zope
+# Thank you!
+class _StripTagParser(SGMLParser):
+ '''SGML Parser removing any tags and translating HTML entities.'''
+
+ from htmlentitydefs import entitydefs
+
+ data= None
+
+ def handle_data(self,data):
+ if self.data is None: self.data=[]
+ self.data.append(data)
+
+ def __str__(self):
+ if self.data is None: return ''
+ return ''.join(self.data)
+
+
+class Converter(BaseConverter):
+
+ content_type = ('text/html',)
+ content_description = "Converter HTML to ASCII"
+
+ def convert(self, s):
+ """Convert html data to raw text"""
+
+ p = _StripTagParser()
+ p.feed(s)
+ p.close()
+
+ return str(p)
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/null.py 1.1.2.1 => 1.1.2.2 ===
+# a stupid null converter
#
# $Id$
-def convert(s):
- return s
+from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter
+
+class Converter(BaseConverter):
+
+ content_type = ('*',)
+ content_description = "Null converter"
+
+ def convert(self, s):
+ return s
+
+
+