[CMF-checkins] CVS: CMF - PDFFile.py:1.1 README.txt:1.1 __init__.py:1.1 pypdf.py:1.1 version.txt:1.1

jack@digicool.com jack@digicool.com
Wed, 30 May 2001 14:51:33 -0400 (EDT)


Update of /cvs-repository/Packages/Products/DCProject/CMF_PDF_Files
In directory korak.digicool.com:/tmp/cvs-serv13300

Added Files:
	PDFFile.py README.txt __init__.py pypdf.py version.txt 
Log Message:
Adding CMF_PDF_Files Product



--- Added File PDFFile.py in package Packages/Products/DCProject ---
# PDFFile.py
# John Platten
# 03.06.01
 
"""
"""

ADD_CONTENT_PERMISSION = 'Add portal content'

OLE_DEBUG = 1

command_str = None
command2_str = None

from Globals import HTMLFile, HTML
from Products.CMFCore.PortalContent import PortalContent
from Products.CMFDefault.Discussions import Discussable
import Globals
from Products.CMFDefault.DublinCore import DefaultDublinCoreImpl

from Products.CMFCore import CMFCorePermissions
from Products.CMFCore.WorkflowCore import WorkflowAction, afterCreate


import OFS.Image

import os
import tempfile

def addPDFFile( self
           , id
           , title=''
           , file=''
           , content_type=''
           , precondition=''
           , subject=()
           , description=''
           , contributors=()
           , effective_date=None
           , expiration_date=None
           , format='text/html'
           , language='en-US'
           , rights=''
           ):
    """
    Add a File
    """

    # cookId sets the id and title if they are not explicity specified
    id, title = OFS.Image.cookId(id, title, file)

    self=self.this()  # Why?

    # Instantiate the object and set it's description.
    # The description is not set by the constructor because I didn't
    # want to extend File's constructor.  Perhaps I should.
    fobj = PDFFile( id, title, '', content_type, precondition, subject
               , description, contributors, effective_date, expiration_date
               , format, language, rights
               )
    
    # Add the File instance to self
    self._setObject(id, fobj)

    # 'Upload' the file.  This is done now rather than in the
    # constructor because it's faster.  Why is it faster?
    self._getOb(id).manage_upload(file)

    afterCreate(self._getOb(id))

    """
    if RESPONSE is not None:
        RESPONSE.redirect(self.absolute_url()+'/folder_contents')
    """

class PDFFile( OFS.Image.File
          , PortalContent
          , DefaultDublinCoreImpl
          ):
    """
        A Portal-managed File
    """

    # The order of base classes is very significant in this case.
    # Image.File does not store it's id in it's 'id' attribute.
    # Rather, it has an 'id' method which returns the contents of the
    # instnace's __name__ attribute.  Inheriting in the other order
    # obscures this method, resulting in much pulling of hair and
    # gnashing of teeth and fraying of nerves.  Don't do it.
    #
    # Really.
    
    meta_type='PDFFile'
    effective_date = expiration_date = None
    _isDiscussable = 1
    icon = PortalContent.icon

    __ac_permissions__ = (
        (CMFCorePermissions.View, ('',)),
        (CMFCorePermissions.ModifyPortalContent, ('edit',)),
        )    
                             
    def __init__( self
                , id
                , title=''
                , file=''
                , content_type=''
                , precondition=''
                , subject=()
                , description=''
                , contributors=()
                , effective_date=None
                , expiration_date=None
                , format='text/html'
                , language='en-US'
                , rights=''
                ):
        OFS.Image.File.__init__( self, id, title, file
                               , content_type, precondition )
        DefaultDublinCoreImpl.__init__( self, title, subject, description
                               , contributors, effective_date, expiration_date
                               , format, language, rights )
    
    #--------------------------------------------------------------------------#
    def SearchableText(self):
    #--------------------------------------------------------------------------#
        """
        SeachableText is used for full text seraches of a portal.  It
        should return a concatanation of all useful text.
        """
        val=""
        try:

            fnameDoc = tempfile.mktemp()
            if OLE_DEBUG: print ":PDFFile:SearchableText:fnameDoc:", fnameDoc

            fnameTxt = tempfile.mktemp()
            if OLE_DEBUG: print ":PDFFile:SearchableText:fnameTxt:", fnameTxt

            f = open(fnameDoc,"wb")
            f.write(str(self.data))
            f.close()

            global command_str
            if command_str == None:
                if os.environ.has_key('XPDF_LOCATION'):
                    command_str = os.environ['XPDF_LOCATION'] + "/pdftotext %s %s"
                else:
                    command_str = "./Products/CMF_PDF_Files/pdftotext %s %s"
            command = command_str % (fnameDoc, fnameTxt)
            if OLE_DEBUG: print ":PDFFile:SearchableText:command:", command

            err = os.system(command)
            if OLE_DEBUG: print ":PDFFile:SearchableText:err:", err

            f = open(fnameTxt, "rb")
            val = f.read()
            f.close()

            if OLE_DEBUG: print ":PDFFile.py:SearchableText:len(val):", len(val)

            os.remove(fnameDoc)
            os.remove(fnameTxt)

        except:
            pass

        return "%s %s %s" % (self.title, self.description, val)

    #--------------------------------------------------------------------------#
    def listActions(self, info):
    #--------------------------------------------------------------------------#

        content_url = info.content_url
        return ( 
                   { 'name'         : 'View HTML'
                   , 'url'          :  content_url + '/viewHTMLForm'
                   , 'permissions'  : ( 'View', )
                   , 'category'     : 'object'
                   }
                 , { 'name'         : 'Edit'
                   , 'url'          :  content_url + '/editForm'
                   , 'permissions'  : ( 'Modify portal content', )
                   , 'category'     : 'object'
                   }
                 , { 'name'         : 'Metadata'
                   , 'url'          :  content_url + '/metadata_edit_form'
                   , 'permissions'  : ( 'Modify portal content', )
                   , 'category'     : 'object'
                   }
                 , { 'name'         : 'Download'
                   , 'url'          :  content_url + '/download'
                   , 'permissions'  : ( 'View', )
                   , 'category'     : 'object'
                   }
                 , { 'name'         : 'View statistics'
                   , 'url'          :  content_url + '/view'
                   , 'permissions'  : ( 'View', )
                   , 'category'     : 'object'
                   }
               )

    def manage_afterAdd(self, item, container):
        """Both of my parents have an afterAdd method"""
        OFS.Image.File.manage_afterAdd(self, item, container)
        PortalContent.manage_afterAdd(self, item, container)

    def manage_beforeDelete(self, item, container):
        """Both of my parents have a beforeDelete method"""
        PortalContent.manage_beforeDelete(self, item, container)
        OFS.Image.File.manage_beforeDelete(self, item, container)

    def edit(self, precondition='', file=''):
        """
        Perform changes for user
        """
        if precondition: self.precondition = precondition
        elif self.precondition: del self.precondition

        if file.filename != '' and file.read() != '':
            self.manage_upload(file)

        self.setFormat(self.content_type)
        #self.reindexObject()
        
    edit = WorkflowAction(edit)

    def download(self, REQUEST, RESPONSE):
        """
        Download this item.  Calls OFS.Image.File.index_html to perform the
        actual transfer after first setting Content-Disposition to suggest
        a filename.
        """

        RESPONSE.setHeader('Content-Disposition',
                           'attachment; filename="%s"' % self.id())
        return OFS.Image.File.index_html(self, REQUEST, RESPONSE)

    editForm = HTMLFile('dtml/pdfFileEdit', globals())
    viewHTMLForm = HTMLFile('dtml/pdfFileViewHTML', globals())
    view = HTMLFile('dtml/pdfFileView', globals())

    index_html = view
    #index_html = download

    #--------------------------------------------------------------------------#
    def viewHTML(self):
    #--------------------------------------------------------------------------#

        fnameDoc = tempfile.mktemp()
        fnameHtml = tempfile.mktemp()
        
        f = open(fnameDoc,"wb")
        f.write(str(self.data))
        f.close()

        global command2_str
        if command2_str == None:
            if os.environ.has_key('PDF2HTML_LOCATION'):
                command2_str = os.environ['PDF2HTML_LOCATION'] + "/pdftohtml.bin -noframes %s %s"
            else:
                command2_str = "./Products/CMF_PDF_Files/pdftohtml -noframes %s %s"
        command = command2_str % (fnameDoc, fnameHtml)
        os.system(command)
 
        f = open(fnameHtml+".html","rb")
        val = f.read()
        f.close()

        if OLE_DEBUG: print ":PDFFile.py:viewHTML:len(val):", len(val)
        
        os.remove(fnameDoc)
        os.remove(fnameHtml+".html")
        
        return val

    #--------------------------------------------------------------------------#
    ##def __call__(self, REQUEST, **kw):
    #--------------------------------------------------------------------------#
    ##return apply(self.view, (self, REQUEST), kw)

    #--------------------------------------------------------------------------#
    def PUT(self, REQUEST, RESPONSE):
    #--------------------------------------------------------------------------#
        """Handle HTTP(and presumably FTP?)PUT requests"""
        
        try:
            self.dav__init(REQUEST, RESPONSE)

            headers = {}
            for k, v in self.getMetadataHeaders():
                headers[k] = v

            if OLE_DEBUG: print "\n"
            if OLE_DEBUG: print "\n:BEFORE:\n"

            keys = headers.keys()
            keys.sort()
            for i in keys:
                if OLE_DEBUG: print "headers[", i, "]:", headers[i]
                continue
            
            body = REQUEST.get('BODY', '')

            """
            if not headers.has_key('Format'): # not supplied in headers
                print "\n:not headers.has_key('Format'):\n"
                pass
            """
            
            import string
            if OLE_DEBUG: print "self.__name__:", self.__name__
            found = string.find(self.__name__, '~')
            if OLE_DEBUG: print "found:", found

            sniffFmt, enc = OFS.content_types.guess_content_type(self.__name__, body)

            format = REQUEST.get_header('content-type', sniffFmt)
            if OLE_DEBUG: print "format:", format
            headers['Format'] = format
            
            if headers['Format'] in ( 'application/pdf',):
                try:
                    from Products.CMF_PDF_Files.pypdf import getHeaders
                                        
                    headers = getHeaders(body, headers)

                    if OLE_DEBUG: print "\n"
                    if OLE_DEBUG: print "\n:AFTER:\n"

                    keys = headers.keys()
                    keys.sort()
                    for i in keys:
                        if OLE_DEBUG: print "headers[", i, "]:", headers[i]
                        continue
                    
                except:
                    if OLE_DEBUG: print "\n:ERROR:pylibole2.getHeaders(body):FAILED:\n"
                    pass

            #import pdb; pdb.set_trace()
            
            self.editMetadata( contributors = headers['Contributors']
                             , description = headers['Description']
                             , effective_date = headers['Effective_date']
                             , expiration_date = headers['Expiration_date']
                             , format = headers['Format']
                             , language = headers['Language']
                             , rights = headers['Rights']
                             , subject = headers['Subject']
                             , title = headers['Title']
                             )
            
            self.manage_upload(body)
            self.reindexObject()

            RESPONSE.setStatus(204)
            return RESPONSE
            
        except:
            import traceback
            traceback.print_exc()
            raise

Globals.default__class_init__(PDFFile)

from Products.CMFCore.register import registerPortalContent
registerPortalContent(PDFFile,                    # The class to be registered
                      constructors=(addPDFFile,), # It's factory (constructor)
                      action='Wizards/PDFFile',   # The URL of it's add interface
                      icon="pdf.gif",         # Filename to take icon from
                      productGlobals=globals())

--- Added File README.txt in package Packages/Products/DCProject ---
Follow these steps to install the CMF_PDF_Files Product;

	1: Untar the package.

	2: Load and install the latest version of 'xpdf' from 
		http://www.foolabs.com/xpdf/

	3: Load and Install the latests version of 'pdftohtml' from
		http://www.ra.informatik.uni-stuttgart.de/~gosho/pdftohtml/

	4: Set up the following two Environment Variables:

		XPDF_LOCATION - this should be the directory that 'pdftotext, pdfinfo' are found.
		PDF2HTML_LOCATION - this should be the directory that 'pdftohtml' is found.

You can restart Zope now.


--- Added File __init__.py in package Packages/Products/DCProject ---
#------------------------------------------------------------------------------#
# __init__.py
# John Platten
# 03.06.01
#------------------------------------------------------------------------------#

ADD_CONTENT_PERMISSION = 'Add portal content'

import PDFFile
from Products.CMFCore import utils
import Products.CMFCore

bases = (PDFFile.PDFFile,)
contentClasses = ( PDFFile.PDFFile, )
contentConstructors = ( PDFFile.addPDFFile, )

import sys
this_module = sys.modules[ __name__ ]

z_bases = utils.initializeBasesPhase1(bases, this_module)

#------------------------------------------------------------------------------#
def initialize(context):
#------------------------------------------------------------------------------#

    factory_type_information = (
        {'id': 'PDF File', 'meta_type': 'PDFFile', 'description':
         'File objects of Adode Acrobat PDF format.',
         'product':'CMF_PDF_Files', 'factory':'addPDFFile', 'icon': 'PDFFile.gif',
         'immediate_view':'metadata_edit_form', 'actions':
         ({'name': 'View statistics',
           'action': 'view',
           'permissions': ('View',)},
          {'name': 'Download',
           'action': 'download',
           'permissions': ('View',)},
          {'name': 'Edit',
           'action': 'file_edit_form',
           'permissions': ('Modify portal content',)},
          {'name': 'Metadata',
           'action': 'metadata_edit_form',
           'permissions': ('Modify portal content',)},
          ),
         },
        )

    utils.initializeBasesPhase2(z_bases, context)

    utils.ContentInit( 'CMF PDF Content'
                     , content_types=contentClasses
                     , permission=ADD_CONTENT_PERMISSION
                     , extra_constructors=contentConstructors
                     , fti=factory_type_information
                     ).initialize( context )

    """
    context.registerClass(PDFFile.PDFFile,
                          constructors=(PDFFile.addPDFFile,
                          ('factory_type_information',
                           PDFFile.factory_type_information)
                          ),
                          icon='pdf.gif')
    """
    
    utils.registerIcon(PDFFile.PDFFile,
                       'images/PDFFile.gif', globals())
    context.registerHelp()

    Products.CMFCore.PortalFolder.addPortalTypeHandler('application/pdf', PDFFile.PDFFile)

#------------------------------------------------------------------------------#

--- Added File pypdf.py in package Packages/Products/DCProject ---
#------------------------------------------------------------------------------#
# pypdf.py
# John Platten
# 03.05.01
#------------------------------------------------------------------------------#

import os
import tempfile
from string import strip

OLE_DEBUG = 1
from Products.CMFDefault.utils import parseHeadersBody
#------------------------------------------------------------------------------#
def getHeaders(body, headers):
#------------------------------------------------------------------------------#

# STUFF HEADERS
    try:
        if OLE_DEBUG: print "Stuffing headers"
        fnameDoc = tempfile.mktemp()
        if OLE_DEBUG: print ":pypdf:getHeaders:fnameDoc:", fnameDoc

        fnameTxt = tempfile.mktemp()
        if OLE_DEBUG: print ":pypdf:getHeaders:fnameTxt:", fnameTxt

        f = open(fnameDoc,"w")
        f.write(body)
        f.close()

        global command_str
        if command_str == None:
            if os.environ.has_key('XPDF_LOCATION'):
                command_str = os.environ['XPDF_LOCATION'] + "/pdfinfo %s > %s"
            else:
                command_str = "./Products/CMF_PDF_Files/pdfinfo %s > %s"
        command = command_str % (fnameDoc, fnameTxt)
        if OLE_DEBUG: print ":pypdf:getHeaders:command:", command

        err = os.system(command)
        if OLE_DEBUG: print ":pypdf:getHeaders:err:", err

        f = open(fnameTxt,"r")
        val = f.read()
        if OLE_DEBUG: print ":pypdf:getHeaders:val:", val
        f.close()

        os.remove(fnameDoc)
        os.remove(fnameTxt)

        newHTuple = parseHeadersBody(val, headers)
        if OLE_DEBUG: print ":pypdf:getHeaders:newHTuple:", newHTuple
        newHeaders = newHTuple[0]
        if OLE_DEBUG: print ":pypdf:getHeaders:newHeaders:", newHeaders

        if newHeaders.has_key('Author'):
            headers['Contributors'] = strip(newHeaders['Author'])

        if newHeaders.has_key('Keywords'):
            headers['Description'] = strip(newHeaders['Keywords'])

        if newHeaders.has_key('Creator'):
            headers['Publisher'] = strip(newHeaders['Creator'])
        else:
            headers['Publisher'] = ''

        if newHeaders.has_key('Producer'):
            headers['Publisher'] = headers['Publisher'] + strip(newHeaders['Producer'])

        if newHeaders.has_key('Subject'):
            headers['Subject'] = strip(newHeaders['Subject'])

        if newHeaders.has_key('Title'):
            headers['Title'] = strip(newHeaders['Title'])

    except:
        if OLE_DEBUG: print "\n:ERROR:Stuffing Headers:FAILED:\n"
        return headers

    return headers

#------------------------------------------------------------------------------#

--- Added File version.txt in package Packages/Products/DCProject ---
1.0beta



--- Added File PDFFile.py in package CMF ---
# PDFFile.py
# John Platten
# 03.06.01
 
"""
"""

ADD_CONTENT_PERMISSION = 'Add portal content'

OLE_DEBUG = 1

command_str = None
command2_str = None

from Globals import HTMLFile, HTML
from Products.CMFCore.PortalContent import PortalContent
from Products.CMFDefault.Discussions import Discussable
import Globals
from Products.CMFDefault.DublinCore import DefaultDublinCoreImpl

from Products.CMFCore import CMFCorePermissions
from Products.CMFCore.WorkflowCore import WorkflowAction, afterCreate


import OFS.Image

import os
import tempfile

def addPDFFile( self
           , id
           , title=''
           , file=''
           , content_type=''
           , precondition=''
           , subject=()
           , description=''
           , contributors=()
           , effective_date=None
           , expiration_date=None
           , format='text/html'
           , language='en-US'
           , rights=''
           ):
    """
    Add a File
    """

    # cookId sets the id and title if they are not explicity specified
    id, title = OFS.Image.cookId(id, title, file)

    self=self.this()  # Why?

    # Instantiate the object and set it's description.
    # The description is not set by the constructor because I didn't
    # want to extend File's constructor.  Perhaps I should.
    fobj = PDFFile( id, title, '', content_type, precondition, subject
               , description, contributors, effective_date, expiration_date
               , format, language, rights
               )
    
    # Add the File instance to self
    self._setObject(id, fobj)

    # 'Upload' the file.  This is done now rather than in the
    # constructor because it's faster.  Why is it faster?
    self._getOb(id).manage_upload(file)

    afterCreate(self._getOb(id))

    """
    if RESPONSE is not None:
        RESPONSE.redirect(self.absolute_url()+'/folder_contents')
    """

class PDFFile( OFS.Image.File
          , PortalContent
          , DefaultDublinCoreImpl
          ):
    """
        A Portal-managed File
    """

    # The order of base classes is very significant in this case.
    # Image.File does not store it's id in it's 'id' attribute.
    # Rather, it has an 'id' method which returns the contents of the
    # instnace's __name__ attribute.  Inheriting in the other order
    # obscures this method, resulting in much pulling of hair and
    # gnashing of teeth and fraying of nerves.  Don't do it.
    #
    # Really.
    
    meta_type='PDFFile'
    effective_date = expiration_date = None
    _isDiscussable = 1
    icon = PortalContent.icon

    __ac_permissions__ = (
        (CMFCorePermissions.View, ('',)),
        (CMFCorePermissions.ModifyPortalContent, ('edit',)),
        )    
                             
    def __init__( self
                , id
                , title=''
                , file=''
                , content_type=''
                , precondition=''
                , subject=()
                , description=''
                , contributors=()
                , effective_date=None
                , expiration_date=None
                , format='text/html'
                , language='en-US'
                , rights=''
                ):
        OFS.Image.File.__init__( self, id, title, file
                               , content_type, precondition )
        DefaultDublinCoreImpl.__init__( self, title, subject, description
                               , contributors, effective_date, expiration_date
                               , format, language, rights )
    
    #--------------------------------------------------------------------------#
    def SearchableText(self):
    #--------------------------------------------------------------------------#
        """
        SeachableText is used for full text seraches of a portal.  It
        should return a concatanation of all useful text.
        """
        val=""
        try:

            fnameDoc = tempfile.mktemp()
            if OLE_DEBUG: print ":PDFFile:SearchableText:fnameDoc:", fnameDoc

            fnameTxt = tempfile.mktemp()
            if OLE_DEBUG: print ":PDFFile:SearchableText:fnameTxt:", fnameTxt

            f = open(fnameDoc,"wb")
            f.write(str(self.data))
            f.close()

            global command_str
            if command_str == None:
                if os.environ.has_key('XPDF_LOCATION'):
                    command_str = os.environ['XPDF_LOCATION'] + "/pdftotext %s %s"
                else:
                    command_str = "./Products/CMF_PDF_Files/pdftotext %s %s"
            command = command_str % (fnameDoc, fnameTxt)
            if OLE_DEBUG: print ":PDFFile:SearchableText:command:", command

            err = os.system(command)
            if OLE_DEBUG: print ":PDFFile:SearchableText:err:", err

            f = open(fnameTxt, "rb")
            val = f.read()
            f.close()

            if OLE_DEBUG: print ":PDFFile.py:SearchableText:len(val):", len(val)

            os.remove(fnameDoc)
            os.remove(fnameTxt)

        except:
            pass

        return "%s %s %s" % (self.title, self.description, val)

    #--------------------------------------------------------------------------#
    def listActions(self, info):
    #--------------------------------------------------------------------------#

        content_url = info.content_url
        return ( 
                   { 'name'         : 'View HTML'
                   , 'url'          :  content_url + '/viewHTMLForm'
                   , 'permissions'  : ( 'View', )
                   , 'category'     : 'object'
                   }
                 , { 'name'         : 'Edit'
                   , 'url'          :  content_url + '/editForm'
                   , 'permissions'  : ( 'Modify portal content', )
                   , 'category'     : 'object'
                   }
                 , { 'name'         : 'Metadata'
                   , 'url'          :  content_url + '/metadata_edit_form'
                   , 'permissions'  : ( 'Modify portal content', )
                   , 'category'     : 'object'
                   }
                 , { 'name'         : 'Download'
                   , 'url'          :  content_url + '/download'
                   , 'permissions'  : ( 'View', )
                   , 'category'     : 'object'
                   }
                 , { 'name'         : 'View statistics'
                   , 'url'          :  content_url + '/view'
                   , 'permissions'  : ( 'View', )
                   , 'category'     : 'object'
                   }
               )

    def manage_afterAdd(self, item, container):
        """Both of my parents have an afterAdd method"""
        OFS.Image.File.manage_afterAdd(self, item, container)
        PortalContent.manage_afterAdd(self, item, container)

    def manage_beforeDelete(self, item, container):
        """Both of my parents have a beforeDelete method"""
        PortalContent.manage_beforeDelete(self, item, container)
        OFS.Image.File.manage_beforeDelete(self, item, container)

    def edit(self, precondition='', file=''):
        """
        Perform changes for user
        """
        if precondition: self.precondition = precondition
        elif self.precondition: del self.precondition

        if file.filename != '' and file.read() != '':
            self.manage_upload(file)

        self.setFormat(self.content_type)
        #self.reindexObject()
        
    edit = WorkflowAction(edit)

    def download(self, REQUEST, RESPONSE):
        """
        Download this item.  Calls OFS.Image.File.index_html to perform the
        actual transfer after first setting Content-Disposition to suggest
        a filename.
        """

        RESPONSE.setHeader('Content-Disposition',
                           'attachment; filename="%s"' % self.id())
        return OFS.Image.File.index_html(self, REQUEST, RESPONSE)

    editForm = HTMLFile('dtml/pdfFileEdit', globals())
    viewHTMLForm = HTMLFile('dtml/pdfFileViewHTML', globals())
    view = HTMLFile('dtml/pdfFileView', globals())

    index_html = view
    #index_html = download

    #--------------------------------------------------------------------------#
    def viewHTML(self):
    #--------------------------------------------------------------------------#

        fnameDoc = tempfile.mktemp()
        fnameHtml = tempfile.mktemp()
        
        f = open(fnameDoc,"wb")
        f.write(str(self.data))
        f.close()

        global command2_str
        if command2_str == None:
            if os.environ.has_key('PDF2HTML_LOCATION'):
                command2_str = os.environ['PDF2HTML_LOCATION'] + "/pdftohtml.bin -noframes %s %s"
            else:
                command2_str = "./Products/CMF_PDF_Files/pdftohtml -noframes %s %s"
        command = command2_str % (fnameDoc, fnameHtml)
        os.system(command)
 
        f = open(fnameHtml+".html","rb")
        val = f.read()
        f.close()

        if OLE_DEBUG: print ":PDFFile.py:viewHTML:len(val):", len(val)
        
        os.remove(fnameDoc)
        os.remove(fnameHtml+".html")
        
        return val

    #--------------------------------------------------------------------------#
    ##def __call__(self, REQUEST, **kw):
    #--------------------------------------------------------------------------#
    ##return apply(self.view, (self, REQUEST), kw)

    #--------------------------------------------------------------------------#
    def PUT(self, REQUEST, RESPONSE):
    #--------------------------------------------------------------------------#
        """Handle HTTP(and presumably FTP?)PUT requests"""
        
        try:
            self.dav__init(REQUEST, RESPONSE)

            headers = {}
            for k, v in self.getMetadataHeaders():
                headers[k] = v

            if OLE_DEBUG: print "\n"
            if OLE_DEBUG: print "\n:BEFORE:\n"

            keys = headers.keys()
            keys.sort()
            for i in keys:
                if OLE_DEBUG: print "headers[", i, "]:", headers[i]
                continue
            
            body = REQUEST.get('BODY', '')

            """
            if not headers.has_key('Format'): # not supplied in headers
                print "\n:not headers.has_key('Format'):\n"
                pass
            """
            
            import string
            if OLE_DEBUG: print "self.__name__:", self.__name__
            found = string.find(self.__name__, '~')
            if OLE_DEBUG: print "found:", found

            sniffFmt, enc = OFS.content_types.guess_content_type(self.__name__, body)

            format = REQUEST.get_header('content-type', sniffFmt)
            if OLE_DEBUG: print "format:", format
            headers['Format'] = format
            
            if headers['Format'] in ( 'application/pdf',):
                try:
                    from Products.CMF_PDF_Files.pypdf import getHeaders
                                        
                    headers = getHeaders(body, headers)

                    if OLE_DEBUG: print "\n"
                    if OLE_DEBUG: print "\n:AFTER:\n"

                    keys = headers.keys()
                    keys.sort()
                    for i in keys:
                        if OLE_DEBUG: print "headers[", i, "]:", headers[i]
                        continue
                    
                except:
                    if OLE_DEBUG: print "\n:ERROR:pylibole2.getHeaders(body):FAILED:\n"
                    pass

            #import pdb; pdb.set_trace()
            
            self.editMetadata( contributors = headers['Contributors']
                             , description = headers['Description']
                             , effective_date = headers['Effective_date']
                             , expiration_date = headers['Expiration_date']
                             , format = headers['Format']
                             , language = headers['Language']
                             , rights = headers['Rights']
                             , subject = headers['Subject']
                             , title = headers['Title']
                             )
            
            self.manage_upload(body)
            self.reindexObject()

            RESPONSE.setStatus(204)
            return RESPONSE
            
        except:
            import traceback
            traceback.print_exc()
            raise

Globals.default__class_init__(PDFFile)

from Products.CMFCore.register import registerPortalContent
registerPortalContent(PDFFile,                    # The class to be registered
                      constructors=(addPDFFile,), # It's factory (constructor)
                      action='Wizards/PDFFile',   # The URL of it's add interface
                      icon="pdf.gif",         # Filename to take icon from
                      productGlobals=globals())

--- Added File README.txt in package CMF ---
Follow these steps to install the CMF_PDF_Files Product;

	1: Untar the package.

	2: Load and install the latest version of 'xpdf' from 
		http://www.foolabs.com/xpdf/

	3: Load and Install the latests version of 'pdftohtml' from
		http://www.ra.informatik.uni-stuttgart.de/~gosho/pdftohtml/

	4: Set up the following two Environment Variables:

		XPDF_LOCATION - this should be the directory that 'pdftotext, pdfinfo' are found.
		PDF2HTML_LOCATION - this should be the directory that 'pdftohtml' is found.

You can restart Zope now.


--- Added File __init__.py in package CMF ---
#------------------------------------------------------------------------------#
# __init__.py
# John Platten
# 03.06.01
#------------------------------------------------------------------------------#

ADD_CONTENT_PERMISSION = 'Add portal content'

import PDFFile
from Products.CMFCore import utils
import Products.CMFCore

bases = (PDFFile.PDFFile,)
contentClasses = ( PDFFile.PDFFile, )
contentConstructors = ( PDFFile.addPDFFile, )

import sys
this_module = sys.modules[ __name__ ]

z_bases = utils.initializeBasesPhase1(bases, this_module)

#------------------------------------------------------------------------------#
def initialize(context):
#------------------------------------------------------------------------------#

    factory_type_information = (
        {'id': 'PDF File', 'meta_type': 'PDFFile', 'description':
         'File objects of Adode Acrobat PDF format.',
         'product':'CMF_PDF_Files', 'factory':'addPDFFile', 'icon': 'PDFFile.gif',
         'immediate_view':'metadata_edit_form', 'actions':
         ({'name': 'View statistics',
           'action': 'view',
           'permissions': ('View',)},
          {'name': 'Download',
           'action': 'download',
           'permissions': ('View',)},
          {'name': 'Edit',
           'action': 'file_edit_form',
           'permissions': ('Modify portal content',)},
          {'name': 'Metadata',
           'action': 'metadata_edit_form',
           'permissions': ('Modify portal content',)},
          ),
         },
        )

    utils.initializeBasesPhase2(z_bases, context)

    utils.ContentInit( 'CMF PDF Content'
                     , content_types=contentClasses
                     , permission=ADD_CONTENT_PERMISSION
                     , extra_constructors=contentConstructors
                     , fti=factory_type_information
                     ).initialize( context )

    """
    context.registerClass(PDFFile.PDFFile,
                          constructors=(PDFFile.addPDFFile,
                          ('factory_type_information',
                           PDFFile.factory_type_information)
                          ),
                          icon='pdf.gif')
    """
    
    utils.registerIcon(PDFFile.PDFFile,
                       'images/PDFFile.gif', globals())
    context.registerHelp()

    Products.CMFCore.PortalFolder.addPortalTypeHandler('application/pdf', PDFFile.PDFFile)

#------------------------------------------------------------------------------#

--- Added File pypdf.py in package CMF ---
#------------------------------------------------------------------------------#
# pypdf.py
# John Platten
# 03.05.01
#------------------------------------------------------------------------------#

import os
import tempfile
from string import strip

OLE_DEBUG = 1
from Products.CMFDefault.utils import parseHeadersBody
#------------------------------------------------------------------------------#
def getHeaders(body, headers):
#------------------------------------------------------------------------------#

# STUFF HEADERS
    try:
        if OLE_DEBUG: print "Stuffing headers"
        fnameDoc = tempfile.mktemp()
        if OLE_DEBUG: print ":pypdf:getHeaders:fnameDoc:", fnameDoc

        fnameTxt = tempfile.mktemp()
        if OLE_DEBUG: print ":pypdf:getHeaders:fnameTxt:", fnameTxt

        f = open(fnameDoc,"w")
        f.write(body)
        f.close()

        global command_str
        if command_str == None:
            if os.environ.has_key('XPDF_LOCATION'):
                command_str = os.environ['XPDF_LOCATION'] + "/pdfinfo %s > %s"
            else:
                command_str = "./Products/CMF_PDF_Files/pdfinfo %s > %s"
        command = command_str % (fnameDoc, fnameTxt)
        if OLE_DEBUG: print ":pypdf:getHeaders:command:", command

        err = os.system(command)
        if OLE_DEBUG: print ":pypdf:getHeaders:err:", err

        f = open(fnameTxt,"r")
        val = f.read()
        if OLE_DEBUG: print ":pypdf:getHeaders:val:", val
        f.close()

        os.remove(fnameDoc)
        os.remove(fnameTxt)

        newHTuple = parseHeadersBody(val, headers)
        if OLE_DEBUG: print ":pypdf:getHeaders:newHTuple:", newHTuple
        newHeaders = newHTuple[0]
        if OLE_DEBUG: print ":pypdf:getHeaders:newHeaders:", newHeaders

        if newHeaders.has_key('Author'):
            headers['Contributors'] = strip(newHeaders['Author'])

        if newHeaders.has_key('Keywords'):
            headers['Description'] = strip(newHeaders['Keywords'])

        if newHeaders.has_key('Creator'):
            headers['Publisher'] = strip(newHeaders['Creator'])
        else:
            headers['Publisher'] = ''

        if newHeaders.has_key('Producer'):
            headers['Publisher'] = headers['Publisher'] + strip(newHeaders['Producer'])

        if newHeaders.has_key('Subject'):
            headers['Subject'] = strip(newHeaders['Subject'])

        if newHeaders.has_key('Title'):
            headers['Title'] = strip(newHeaders['Title'])

    except:
        if OLE_DEBUG: print "\n:ERROR:Stuffing Headers:FAILED:\n"
        return headers

    return headers

#------------------------------------------------------------------------------#

--- Added File version.txt in package CMF ---
1.0beta