[CMF-checkins] CVS: CMF - Document.py:1.14 utils.py:1.4
Jeffrey Shell
jeffrey@digicool.com
Thu, 24 May 2001 22:24:48 -0400 (EDT)
Update of /cvs-repository/CMF/CMFDefault
In directory korak.digicool.com:/home/jeffrey/InstanceHomes/cmf-dev/CMF/CMFDefault
Modified Files:
Document.py utils.py
Log Message:
Some interesting FTP/DAV polishes, such as:
o In HTML, value of <title> tag has precedence over <meta name="title"...>,
and when rendering to FTP/DAV Source port, <meta name="title"..> is
filtered out of Dublin Core meta tags sent to client.
o Dealt with interesting case where a full HTML example in a structured
text document uploaded via FTP would be thought of as an HTML document,
which would throw everything outside of the first HTML example it found
away.
--- Updated File Document.py in package CMF --
--- Document.py 2001/05/24 20:39:40 1.13
+++ Document.py 2001/05/25 02:22:48 1.14
@@ -87,7 +87,7 @@
ADD_CONTENT_PERMISSION = 'Add portal content'
-import Globals, StructuredText, string
+import Globals, StructuredText, string, utils
from StructuredText.HTMLWithImages import HTMLWithImages
from Globals import DTMLFile, InitializeClass
from AccessControl import ClassSecurityInfo
@@ -235,9 +235,9 @@
if format == 'html':
parser = SimpleHTMLParser()
parser.feed(text)
+ headers.update(parser.metatags)
if parser.title:
headers['Title'] = parser.title
- headers.update(parser.metatags)
bodyfound = bodyfinder.search(text)
if bodyfound:
cooked = body = bodyfound.group('bodycontent')
@@ -292,9 +292,8 @@
""" Handle HTTP (and presumably FTP?) PUT requests """
self.dav__init(REQUEST, RESPONSE)
body = REQUEST.get('BODY', '')
- bodyfound = bodyfinder.search(body)
guessedformat = REQUEST.get_header('Content-Type', 'text/plain')
- ishtml = (guessedformat == 'text/html') or (bodyfound is not None)
+ ishtml = (guessedformat == 'text/html') or utils.html_headcheck(body)
if ishtml: self.setFormat('text/html')
else: self.setFormat('text/plain')
@@ -307,7 +306,7 @@
_htmlsrc = (
'<html>\n <head>\n'
' <title>%(title)s</title>\n'
- ' %(metatags)s'
+ '%(metatags)s\n'
' </head>\n'
' <body>\n%(body)s\n </body>\n'
'</html>\n'
@@ -317,10 +316,17 @@
def manage_FTPget(self):
"Get the document body for FTP download (also used for the WebDAV SRC)"
join = string.join
+ lower = string.lower
hdrlist = self.getMetadataHeaders()
if self.Format() == 'text/html':
- hdrtext = join(map(lambda x: '<meta name="%s" content="%s" />' %(
- x[0], x[1]), hdrlist), '\n')
+ hdrtext = ''
+ for name, content in hdrlist:
+ if lower(name) == 'title':
+ continue
+ else:
+ hdrtext = '%s\n <meta name="%s" content="%s" />' % (
+ hdrtext, name, content)
+
bodytext = self._htmlsrc % {
'title': self.Title(),
'metatags': hdrtext,
--- Updated File utils.py in package CMF --
--- utils.py 2001/05/11 03:41:43 1.3
+++ utils.py 2001/05/25 02:22:48 1.4
@@ -138,3 +138,18 @@
bodyfinder = re.compile(r'<body.*?>(?P<bodycontent>.*?)</body>',
re.DOTALL|re.I)
+htfinder = re.compile(r'<html.*?>', re.DOTALL|re.I)
+
+def html_headcheck(html):
+ """ Returns 'true' if document looks HTML-ish enough """
+ if not htfinder.search(html):
+ return 0
+ lines = re.split(r'[\n\r]+?', html)
+ for line in lines:
+ line = strip(line)
+ if not line:
+ continue
+ elif lower(line[:5]) == '<html':
+ return 1
+ elif line[:2] not in ('<!', '<?'):
+ return 0