[CMF-checkins] CVS: CMF - Document.py:1.14 utils.py:1.4

Jeffrey Shell jeffrey@digicool.com
Thu, 24 May 2001 22:24:48 -0400 (EDT)


Update of /cvs-repository/CMF/CMFDefault
In directory korak.digicool.com:/home/jeffrey/InstanceHomes/cmf-dev/CMF/CMFDefault

Modified Files:
	Document.py utils.py 
Log Message:
Some interesting FTP/DAV polishes, such as:

 o In HTML, value of <title> tag has precedence over <meta name="title"...>,
   and when rendering to FTP/DAV Source port, <meta name="title"..> is 
   filtered out of Dublin Core meta tags sent to client.

 o Dealt with interesting case where a full HTML example in a structured
   text document uploaded via FTP would be thought of as an HTML document,
   which would throw everything outside of the first HTML example it found
   away.




--- Updated File Document.py in package CMF --
--- Document.py	2001/05/24 20:39:40	1.13
+++ Document.py	2001/05/25 02:22:48	1.14
@@ -87,7 +87,7 @@
 
 ADD_CONTENT_PERMISSION = 'Add portal content'
 
-import Globals, StructuredText, string
+import Globals, StructuredText, string, utils
 from StructuredText.HTMLWithImages import HTMLWithImages
 from Globals import DTMLFile, InitializeClass
 from AccessControl import ClassSecurityInfo
@@ -235,9 +235,9 @@
         if format == 'html':
             parser = SimpleHTMLParser()
             parser.feed(text)
+            headers.update(parser.metatags)
             if parser.title:
                 headers['Title'] = parser.title
-            headers.update(parser.metatags)
             bodyfound = bodyfinder.search(text)
             if bodyfound:
                 cooked = body = bodyfound.group('bodycontent')
@@ -292,9 +292,8 @@
         """ Handle HTTP (and presumably FTP?) PUT requests """
         self.dav__init(REQUEST, RESPONSE)
         body = REQUEST.get('BODY', '')
-        bodyfound = bodyfinder.search(body)
         guessedformat = REQUEST.get_header('Content-Type', 'text/plain')
-        ishtml = (guessedformat == 'text/html') or (bodyfound is not None)
+        ishtml = (guessedformat == 'text/html') or utils.html_headcheck(body)
 
         if ishtml: self.setFormat('text/html')
         else: self.setFormat('text/plain')
@@ -307,7 +306,7 @@
     _htmlsrc = (
         '<html>\n <head>\n'
         ' <title>%(title)s</title>\n'
-        ' %(metatags)s'
+        '%(metatags)s\n'
         ' </head>\n'
         ' <body>\n%(body)s\n </body>\n'
         '</html>\n'
@@ -317,10 +316,17 @@
     def manage_FTPget(self):
         "Get the document body for FTP download (also used for the WebDAV SRC)"
         join = string.join
+        lower = string.lower
         hdrlist = self.getMetadataHeaders()
         if self.Format() == 'text/html':
-            hdrtext = join(map(lambda x: '<meta name="%s" content="%s" />' %(
-                x[0], x[1]), hdrlist), '\n')
+            hdrtext = ''
+            for name, content in hdrlist:
+                if lower(name) == 'title':
+                    continue
+                else:
+                    hdrtext = '%s\n <meta name="%s" content="%s" />' % (
+                        hdrtext, name, content)
+
             bodytext = self._htmlsrc % {
                 'title': self.Title(),
                 'metatags': hdrtext,

--- Updated File utils.py in package CMF --
--- utils.py	2001/05/11 03:41:43	1.3
+++ utils.py	2001/05/25 02:22:48	1.4
@@ -138,3 +138,18 @@
 
 bodyfinder = re.compile(r'<body.*?>(?P<bodycontent>.*?)</body>',
                         re.DOTALL|re.I)
+htfinder = re.compile(r'<html.*?>', re.DOTALL|re.I)
+
+def html_headcheck(html):
+    """ Returns 'true' if document looks HTML-ish enough """
+    if not htfinder.search(html):
+        return 0
+    lines = re.split(r'[\n\r]+?', html)
+    for line in lines:
+        line = strip(line)
+        if not line:
+            continue
+        elif lower(line[:5]) == '<html':
+            return 1
+        elif line[:2] not in ('<!', '<?'):
+            return 0