[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters - doc.py:1.1.2.1 pdf.py:1.1.2.1 ps.py:1.1.2.1
Andreas Jung
andreas@digicool.com
Thu, 28 Feb 2002 17:49:43 -0500
Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters
In directory cvs.zope.org:/tmp/cvs-serv28243
Added Files:
Tag: ajung-textindexng-branch
doc.py pdf.py ps.py
Log Message:
added some new converters
=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/doc.py ===
# WinWord converter
#
# $Id: doc.py,v 1.1.2.1 2002/02/28 22:49:43 andreasjung Exp $
import tempfile
from os import popen, remove
from Globals import package_home
from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter
wvConf_file = path.join(package_home(globals()), 'wvText.xml')
class Converter(BaseConverter):
content_type = ('application/msword','application/ms-word','application/vnd.ms-word')
content_description = "Microsoft Word"
depends_on = 'wvWare'
def convert(doc):
"""Convert WinWord document to raw text"""
tmp_name = tempfile.mktemp()
open(tmp_name,'w').write(doc)
text = popen('wvWare -x %s %s 2> /dev/null' % (wvConf_file, tmp_name)).read()
remove(tmp_name)
return text
=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/pdf.py ===
# pdf converter
#
# $Id: pdf.py,v 1.1.2.1 2002/02/28 22:49:43 andreasjung Exp $
import tempfile
from os import popen, remove
from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter
class Converter(BaseConverter):
content_type = ('application/pdf',)
content_description = "Adobe Acrobad PDF"
depends_on = 'pdftotext'
def convert(doc):
"""Convert pdf data to raw text"""
tmp_name = tempfile.mktemp()
open(tmp_name,'w').write(doc)
text = popen('pdftotext %s -' % tmp_name).read()
remove(tmp_name)
return text
=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/ps.py ===
# pdf converter
#
# $Id: ps.py,v 1.1.2.1 2002/02/28 22:49:43 andreasjung Exp $
import tempfile
from os import popen, remove
from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter
class Converter(BaseConverter):
content_type = ('application/postscript',)
content_description = "Adobe Postscript Document"
depends_on = 'ps2ascii'
def convert(doc):
"""Convert postscript data to raw text"""
tmp_name = tempfile.mktemp()
open(tmp_name,'w').write(doc)
text = popen('ps2ascii %s -' % tmp_name).read()
remove(tmp_name)
return text