[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters - __init__.py:1.1.2.6 doc.py:1.1.2.4 html.py:1.1.2.3 null.py:1.1.2.4 pdf.py:1.1.2.4 ppt.py:1.1.2.3 ps.py:1.1.2.3
Andreas Jung
andreas@digicool.com
Wed, 20 Mar 2002 18:15:56 -0500
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters
In directory cvs.zope.org:/tmp/cvs-serv14765/converters
Modified Files:
Tag: ajung-textindexng-branch
__init__.py doc.py html.py null.py pdf.py ppt.py ps.py
Log Message:
Moved more logic into Converter base class to make converters more smart
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/__init__.py 1.1.2.5 => 1.1.2.6 ===
cv = cv[:-3]
mod = __import__(cv, globals(), globals(), __path__)
- converter = mod.Converter()
- for t in converter.getType():
- _converters[t] = converter
+
+ try:
+ converter = mod.Converter()
+ for t in converter.getType():
+ _converters[t] = converter
+ except:
+ continue
del converters
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/doc.py 1.1.2.3 => 1.1.2.4 ===
# $Id$
-import tempfile, os
+import os
from Globals import package_home
from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter
@@ -17,10 +17,8 @@
def convert(doc):
"""Convert WinWord document to raw text"""
- tmp_name = tempfile.mktemp()
- open(tmp_name,'w').write(doc)
+ tmp_name = self.saveFile(doc)
text = self.execute('wvWare -x %s %s 2> /dev/null' % (wvConf_file, tmp_name))
- os.remove(tmp_name)
return text
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/html.py 1.1.2.2 => 1.1.2.3 ===
# $Id$
-from sgmllib import SGMLParser
from types import StringType
from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter
-
-
-# Code taken from Dieter Maurer's CatalogSupport Module
-# http://www.handshake.de/~dieter/pyprojects/zope
-# Thank you!
-
-class _StripTagParser(SGMLParser):
- '''SGML Parser removing any tags and translating HTML entities.'''
-
- from htmlentitydefs import entitydefs
-
- data= None
-
- def handle_data(self,data):
- if self.data is None: self.data=[]
- self.data.append(data)
-
- def __str__(self):
- if self.data is None: return ''
- return ''.join(self.data)
-
+from StripTagParser import StripTagParser
class Converter(BaseConverter):
@@ -35,7 +14,7 @@
def convert(self, s):
"""Convert html data to raw text"""
- p = _StripTagParser()
+ p = StripTagParser()
p.feed(s)
p.close()
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/null.py 1.1.2.3 => 1.1.2.4 ===
def convert(self, s):
return s
-
-
-
-
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/pdf.py 1.1.2.3 => 1.1.2.4 ===
# $Id$
-import tempfile
-from os import popen, remove
from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter
class Converter(BaseConverter):
@@ -15,10 +13,8 @@
def convert(doc):
"""Convert pdf data to raw text"""
- tmp_name = tempfile.mktemp()
- open(tmp_name,'w').write(doc)
+ tmp_name = self.saveFile(doc)
text = self.execute('pdftotext %s -' % tmp_name)
- remove(tmp_name)
return text
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/ppt.py 1.1.2.2 => 1.1.2.3 ===
# $Id$
-import tempfile, os
-from sgmllib import SGMLParser
+import os
from Globals import package_home
from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter
+from StripTagParser import StripTagParser
wvConf_file = os.path.join(package_home(globals()), 'wvText.xml')
-class _StripTagParser(SGMLParser):
- '''SGML Parser removing any tags and translating HTML entities.'''
-
- from htmlentitydefs import entitydefs
-
- data= None
-
- def handle_data(self,data):
- if self.data is None: self.data=[]
- self.data.append(data)
-
- def __str__(self):
- if self.data is None: return ''
- return ''.join(self.data)
-
class Converter(BaseConverter):
@@ -35,12 +20,10 @@
def convert(doc):
"""Convert PowerPoint document to raw text"""
- tmp_name = tempfile.mktemp()
- open(tmp_name,'w').write(doc)
+ tmp_name = self.saveFile(doc)
text = self.execute('pptHtml %s 2> /dev/null' % tmp_name)
- os.remove(tmp_name)
- p = _StripTagParser()
+ p = StripTagParser()
p.feed(text)
p.close()
=== Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/ps.py 1.1.2.2 => 1.1.2.3 ===
# $Id$
-import tempfile
-from os import popen, remove
from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter
class Converter(BaseConverter):
@@ -15,10 +13,8 @@
def convert(doc):
"""Convert postscript data to raw text"""
- tmp_name = tempfile.mktemp()
- open(tmp_name,'w').write(doc)
+ tmp_name = self.saveFile(doc)
text = self.execute('ps2ascii %s -' % tmp_name)
- remove(tmp_name)
return text