[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters - html.py:1.1.2.2 null.py:1.1.2.2

Andreas Jung andreas@digicool.com
Wed, 27 Feb 2002 19:18:30 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters
In directory cvs.zope.org:/tmp/cvs-serv11780

Modified Files:
      Tag: ajung-textindexng-branch
	html.py null.py 
Log Message:
rewrote converters as classes


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/html.py 1.1.2.1 => 1.1.2.2 ===
 # $Id$
 
-import re
+from sgmllib import SGMLParser
+from types import StringType
+from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter
 
-reg = re.compile('<.*?>')
 
-def convert(s):
-    return reg.sub('',s)
+# Code taken from Dieter Maurer's CatalogSupport Module
+# http://www.handshake.de/~dieter/pyprojects/zope
+# Thank you!
 
+class _StripTagParser(SGMLParser):
+  '''SGML Parser removing any tags and translating HTML entities.'''
+
+  from htmlentitydefs import entitydefs
+
+  data= None
+
+  def handle_data(self,data):
+    if self.data is None: self.data=[]
+    self.data.append(data)
+
+  def __str__(self):
+    if self.data is None: return ''
+    return ''.join(self.data)
+
+
+class Converter(BaseConverter):
+
+    content_type = ('text/html',)
+    content_description = "Converter HTML to ASCII"
+
+    def convert(self, s):
+        """Convert html data to raw text"""
+
+        p = _StripTagParser()
+        p.feed(s)
+        p.close()
+
+        return str(p)


=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/null.py 1.1.2.1 => 1.1.2.2 ===
+# a stupid null converter
 #
 # $Id$
 
-def convert(s):
-    return s
+from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter
+
+class Converter(BaseConverter):
+
+    content_type = ('*',)
+    content_description = "Null converter"
+
+    def convert(self, s):
+        return s
+
+
+