[Zope-Checkins] CVS: Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters - doc.py:1.1.2.1 pdf.py:1.1.2.1 ps.py:1.1.2.1

Andreas Jung andreas@digicool.com
Thu, 28 Feb 2002 17:49:43 -0500


Update of /cvs-repository/Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters
In directory cvs.zope.org:/tmp/cvs-serv28243

Added Files:
      Tag: ajung-textindexng-branch
	doc.py pdf.py ps.py 
Log Message:
added some new converters


=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/doc.py ===
# WinWord converter
#
# $Id: doc.py,v 1.1.2.1 2002/02/28 22:49:43 andreasjung Exp $

import tempfile
from os import popen, remove
from Globals import package_home
from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter

wvConf_file = path.join(package_home(globals()), 'wvText.xml')

class Converter(BaseConverter):

    content_type = ('application/msword','application/ms-word','application/vnd.ms-word')
    content_description = "Microsoft Word"
    depends_on = 'wvWare'

    def convert(doc):
        """Convert WinWord document to raw text"""
        
        tmp_name = tempfile.mktemp()
        open(tmp_name,'w').write(doc)
        text = popen('wvWare -x %s %s 2> /dev/null' % (wvConf_file, tmp_name)).read()
        remove(tmp_name)
        
        return text



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/pdf.py ===
# pdf converter
#
# $Id: pdf.py,v 1.1.2.1 2002/02/28 22:49:43 andreasjung Exp $

import tempfile
from os import popen, remove
from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter

class Converter(BaseConverter):

    content_type = ('application/pdf',)
    content_description = "Adobe Acrobad PDF"
    depends_on = 'pdftotext'

    def convert(doc):
        """Convert pdf data to raw text"""
        
        tmp_name = tempfile.mktemp()
        open(tmp_name,'w').write(doc)
        text = popen('pdftotext %s -' % tmp_name).read()
        remove(tmp_name)
        
        return text



=== Added File Zope/lib/python/Products/PluginIndexes/TextIndexNG/converters/ps.py ===
# pdf converter
#
# $Id: ps.py,v 1.1.2.1 2002/02/28 22:49:43 andreasjung Exp $

import tempfile
from os import popen, remove
from Products.PluginIndexes.TextIndexNG.BaseConverter import BaseConverter

class Converter(BaseConverter):

    content_type = ('application/postscript',)
    content_description = "Adobe Postscript Document"
    depends_on = 'ps2ascii'

    def convert(doc):
        """Convert postscript data to raw text"""
        
        tmp_name = tempfile.mktemp()
        open(tmp_name,'w').write(doc)
        text = popen('ps2ascii %s -' % tmp_name).read()
        remove(tmp_name)
        
        return text