[Checkins] SVN: z3c.pt/trunk/ Preserve CDATA sections.

Malthe Borch mborch at gmail.com
Tue Aug 12 09:17:52 EDT 2008


Log message for revision 89724:
  Preserve CDATA sections.

Changed:
  U   z3c.pt/trunk/CHANGES.txt
  U   z3c.pt/trunk/src/z3c/pt/etree.py
  U   z3c.pt/trunk/src/z3c/pt/translation.py

-=-
Modified: z3c.pt/trunk/CHANGES.txt
===================================================================
--- z3c.pt/trunk/CHANGES.txt	2008-08-12 12:51:20 UTC (rev 89723)
+++ z3c.pt/trunk/CHANGES.txt	2008-08-12 13:17:52 UTC (rev 89724)
@@ -4,6 +4,8 @@
 Version 1.0dev
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+- Preserve CDATA sections. [malthe]
+
 - The Genshi interpolation operator ${} should not have its result
   escaped when used in the text or tail regions. [malthe]
 

Modified: z3c.pt/trunk/src/z3c/pt/etree.py
===================================================================
--- z3c.pt/trunk/src/z3c/pt/etree.py	2008-08-12 12:51:20 UTC (rev 89723)
+++ z3c.pt/trunk/src/z3c/pt/etree.py	2008-08-12 13:17:52 UTC (rev 89724)
@@ -20,6 +20,79 @@
         def tostring(self):
             return lxml.etree.tostring(self)
 
+        @property
+        def raw_text(self):
+            """Return raw text.
+
+            CDATA sections are returned in their original formatting;
+            the routine relies on the fact that ``tostring`` will
+            output CDATA sections even though they're not present in
+            the .text-attribute.
+            """
+
+            if self.text in ("", None):
+                return self.text
+
+            elements = tuple(self)
+            del self[:]
+            xml = lxml.etree.tostring(self, encoding='utf-8', with_tail=False)
+            self.extend(elements)
+
+            element = parser.makeelement(self.tag, nsmap=self.nsmap)
+            for attr, value in self.items():
+                element.attrib[attr] = value
+
+            html = lxml.etree.tostring(element)                
+            text = xml[len(html)-1:-len(element.tag[element.tag.rfind('}'):])-2]
+
+            return text
+
+        @property
+        def raw_tail(self):
+            """Return raw text.
+
+            CDATA sections are returned in their original formatting;
+            the routine relies on the fact that ``tostring`` will
+            output CDATA sections even though they're not present in
+            the .text-attribute.
+            """
+
+            if self.tail in ("", None):
+                return self.tail
+
+            elements = tuple(self)
+            del self[:]
+
+            parent = self.getparent()
+            if parent is None:
+                return self.tail
+            
+            length = len(lxml.etree.tostring(self, encoding='utf-8', with_tail=False))
+            
+            # wrap element
+            index = parent.index(self)
+            element = parser.makeelement(self.tag, nsmap=self.nsmap)
+            element.append(self)
+            xml = lxml.etree.tostring(element, encoding='utf-8', with_tail=False)
+            self.extend(elements)
+            parent.insert(index, self)
+
+            ns = self.tag[self.tag.find('{')+1:self.tag.find('}')]
+            for prefix, namespace in self.nsmap.items():
+                if ns == namespace:
+                    if prefix is None:
+                        tag = len(self.tag) - len(ns)
+                    else:
+                        tag = len(self.tag) - len(ns) + len(prefix) + 1
+                    break
+            else:
+                raise ValueError(
+                    "Unable to determine tag length: %s." % self.tag)
+                
+            tail = xml[length+tag:-tag-1]
+            
+            return tail
+            
     element_factory = parser.makeelement
 
     def parse(body):

Modified: z3c.pt/trunk/src/z3c/pt/translation.py
===================================================================
--- z3c.pt/trunk/src/z3c/pt/translation.py	2008-08-12 12:51:20 UTC (rev 89723)
+++ z3c.pt/trunk/src/z3c/pt/translation.py	2008-08-12 13:17:52 UTC (rev 89724)
@@ -254,7 +254,7 @@
             _.append(clauses.Repeat(variables[0], expression))
 
         # tag tail (deferred)
-        tail = self.tail
+        tail = self.raw_tail
         if tail and not self.metal_fillslot:
             if isinstance(tail, unicode):
                 tail = tail.encode('utf-8')
@@ -289,7 +289,7 @@
                 _.append(tag)
 
         # tag text (if we're not replacing tag body)
-        text = self.text
+        text = self.raw_text
         if text and not dynamic:
             if isinstance(text, unicode):
                 text = text.encode('utf-8')



More information about the Checkins mailing list