[CMF-checkins] CVS: CMF/CMFCollector - WebTextDocument.py:1.1
Ken Manheimer
klm@zope.com
Thu, 8 Nov 2001 00:45:38 -0500
Update of /cvs-repository/CMF/CMFCollector
In directory cvs.zope.org:/tmp/cvs-serv5496
Added Files:
WebTextDocument.py
Log Message:
Package format_webtext in a Document-ish content type, to provide
CookedText caching of the formatting.
WebTextDocument is a Document derivative for presenting plain text on
the web.
- Paragraphs of contiguous lines at the left margin are flowed until hard
newlines.
- Indented and '>' cited lines are presented exactly, preserving whitespace.
- URLs (outside indented and cited literal regions) are turned into links.
- Character entities outside of linkified URLs are html-quoted.
This makes it easy to present both flowed paragraphs and source code (and
other literal text), without having to know and navigate the nuances of HTML
and/or structured text.
=== Added File CMF/CMFCollector/WebTextDocument.py ===
##############################################################################
# Copyright (c) 2001 Zope Corporation. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 1.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND
# FITNESS FOR A PARTICULAR PURPOSE.
##############################################################################
"""A Document derivative for presenting plain text on the web.
- Paragraphs of contiguous lines at the left margin are flowed until hard
newlines.
- Indented and '>' cited lines are presented exactly, preserving whitespace.
- URLs (outside indented and cited literal regions) are turned into links.
- Character entities outside of linkified URLs are html-quoted.
This makes it easy to present both flowed paragraphs and source code (and
other literal text), without having to know and navigate the nuances of HTML
and/or structured text."""
import os, urllib, string, re
from Globals import InitializeClass
from AccessControl import ClassSecurityInfo, getSecurityManager
from Acquisition import aq_base
import util # Collector utilities.
from Products.CMFDefault.Document import Document
from Products.CMFDefault.utils import SimpleHTMLParser, bodyfinder
from Products.CMFCore import CMFCorePermissions
from CollectorPermissions import *
factory_type_information = (
{'id': 'WebText Document',
'meta_type': 'WebText Document',
'icon': 'document_icon.gif',
'description': ('A document for simple text, with blank-line delimited'
' paragraphs and special (indented, cited text)'
' preformatting.'),
'product': 'CMFCollector',
'factory': None, # XXX Register add method when blessed.
'immediate_view': 'metadata_edit_form',
# XXX May need its own forms, in order to inhibit formatting option.
'actions': ({'name': 'View',
'action': 'document_view',
'permissions': (CMFCorePermissions.View,)},
{'name': 'Edit',
'action': 'document_edit_form',
'permissions': (CMFCorePermissions.ModifyPortalContent,)},
{'name': 'Metadata',
'action': 'metadata_edit_form',
'permissions': (CMFCorePermissions.ModifyPortalContent,)},
),
},
)
def addWebTextDocument(self, id, title='', description='', text_format='',
text=''):
""" Add a WebText Document """
import pdb; pdb.set_trace()
o = WebTextDocument(id, title=title, description=description,
text_format=text_format, text=text)
self._setObject(id,o)
class WebTextDocument(Document):
__doc__ # Use the module documentation.
meta_type = 'WebText Document'
TEXT_FORMAT = 'webtext'
text_format = TEXT_FORMAT
_stx_level = 0
security = ClassSecurityInfo()
def __init__(self, id, title='', description='', text_format='',
text=''):
Document.__init__(self, id, title=title, description=description,
text_format=text_format or self.text_format,
text=text)
self.text_format = text_format or self.TEXT_FORMAT
security.declarePrivate('guessFormat')
def guessFormat(self, text):
"""Infer inner document content type."""
# Respect the registered text_format, if we can, else sniff.
if string.lower(self.text_format) == self.TEXT_FORMAT:
return self.TEXT_FORMAT
elif string.lower(self.text_format) == 'html':
return 'text/html'
elif string.lower(self.text_format) in ['stx', 'structuredtext',
'structured-text',
'structured_text']:
return 'structured-text'
else:
return Document.guessFormat(self, text)
def _edit(self, text_format, text, file='', safety_belt=''):
got = Document._edit(self, text_format or self.text_format,
text=text, file=file, safety_belt=safety_belt)
# The document stubbornly insists on a text format it likes, despite
# our explicit specification - set it back:
self.text_format = text_format or self.TEXT_FORMAT
security.declarePrivate('handleText')
def handleText(self, text, format=None, stx_level=None):
"""Handle the raw text, returning headers, body, cooked, format"""
if not format:
format = self.guessFormat(text)
if format != self.TEXT_FORMAT:
return Document.handleText(self, text, format=format,
stx_level=stx_level or self._stx_level)
else:
cooked = body = ''
parser = SimpleHTMLParser()
parser.feed(text)
headers = parser.metatags
if parser.title:
headers['Title'] = parser.title
bodyfound = bodyfinder(text)
if bodyfound:
body = bodyfound
cooked = util.format_webtext(bodyfound)
return headers, body, cooked, format
InitializeClass(WebTextDocument)