[Zope3-checkins] CVS: zopeproducts/xml/dom - README.txt:1.1 __init__.py:1.1 core.py:1.1 expatbuilder.py:1.1 loadsave.py:1.1 saxbuilder.py:1.1 traversal.py:1.1 xmlextended.py:1.1
Philipp von Weitershausen
philikon@philikon.de
Fri, 20 Jun 2003 11:11:39 -0400
Update of /cvs-repository/zopeproducts/xml/dom
In directory cvs.zope.org:/tmp/cvs-serv15767/xml/dom
Added Files:
README.txt __init__.py core.py expatbuilder.py loadsave.py
saxbuilder.py traversal.py xmlextended.py
Log Message:
Moved the xml_examples, xslt, xslt_examples and xmldom products to one
xml product.
=== Added File zopeproducts/xml/dom/README.txt ===
Documentation
=============
xmldom implements an XML DOM, the DOM standard is the right
place to find documentation:
http://www.w3.org/DOM/
xmldom implements DOM level 2 (in particular Core, and buggily parts
of Traversal), with some experimental level 3 extensions.
http://www.w3.org/DOM/DOMTR#dom2
http://www.w3.org/DOM/DOMTR#dom3
Example
=======
from zopeproducts.xmldom import expatbuilder
from StringIO import StringIO
xml = '<doc>Hello world!</doc>'
f = StringIO(xml)
dom = expatbuilder.parse(f)
Background
==========
xmldom is derived from the Zope 2 ParsedXML product. It only contains
that product's DOM implementation and tests, not any of the Zope
integration (which would be very different in Zope 3 anyway). It has
been changed to work with Zope 3:
* now uses ContextWrappers instead of Zope 2 explicit acquisition.
* uses Python 2.2 properties instead of custom __getattr__ based
access handlers. All kinds of __dict__ workarounds have also been
removed.
* module names are in lower case.
* relicensed to ZPL 2.0.
It passes the (extensive) DOM unit tests which are also included.
Problems
========
The code is extremely grotty in many places. This is in part due to
the requirements of XML conformance and the DOM standard in particular
(which doesn't excel in clean design). It does pass the tests,
however.
Traversal implementation seems to be buggy.
It still exhibits the same fundamental problem as ParsedXML does
concerning ContextWrappers, for more information see
tests/test_contextwrapperpain.py.
=== Added File zopeproducts/xml/dom/__init__.py ===
# this is a package
=== Added File zopeproducts/xml/dom/core.py === (1913/2313 lines abridged)
##############################################################################
#
# Copyright (c) 2001-2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""
XML DOM core implementation
See the W3C DOM specification more information: http://www.w3.org/DOM/
$Id: core.py,v 1.1 2003/06/20 15:11:38 philikon Exp $
"""
__metaclass__ = type
import string
import xml.dom
from zope.interface import implements
from zope.context import ContextProperty, ContainmentIterator
from zope.context import ContextAwareDescriptors
from zope.context import getWrapperContext, getWrapperContainer, getbaseobject
from zope.context.wrapper import setcontext
from zope.app.context import ContextWrapper
from zopeproducts.xml.interfaces.dom.core import IDOMImplementation, \
INode, IDocument, IDocumentFragment, INodeList, INamedNodeMap, \
ICharacterData, IAttr, IElement, IText, IComment
from zopeproducts.xml.interfaces.dom.traversal import IDocumentTraversal
# legal qualified name pattern, from PyXML xml/dom/Document.py
# see http://www.w3.org/TR/REC-xml-names/#NT-QName
# we don't enforce namespace usage if using namespaces, which basically
# means that we don't disallow a leading ':'
# XXX there's more to the world than ascii a-z
# FIXME: should allow combining characters: fix when Python gets Unicode
import re
_ok_qualified_name = re.compile('[a-zA-Z_:][\w\.\-_:]*\Z').match
_TupleType = type(())
_StringTypes = (type(''), type(unicode('')))
# http://www.w3.org/TR/1999/REC-xml-names-19990114/#ns-qualnames
def _check_qualified_name(name, uri='ok'):
"test name for well-formedness"
if _ok_qualified_name(name) is not None:
if ":" in name:
parts = name.split(':')
if len(parts) != 2:
raise xml.dom.NamespaceErr("malformed qualified name")
if not (parts[0] and parts[1]):
raise xml.dom.NamespaceErr("malformed qualified name")
if not uri:
raise xml.dom.NamespaceErr("no namespace URI for prefix")
return True
else:
raise xml.dom.InvalidCharacterErr()
# Common namespaces:
XML_NS = "http://www.w3.org/XML/1998/namespace"
XMLNS_NS = "http://www.w3.org/2000/xmlns/"
def _check_reserved_prefixes(prefix, namespaceURI):
"""
Helper function to centralize the enforcement of reserved prefixes.
Raises the appropriate NamespaceErr if the prefix is reserved but
the namespaceURI doesn't match it.
"""
if prefix == "xml" and namespaceURI != XML_NS:
raise xml.dom.NamespaceErr(
"illegal use of the 'xml' prefix")
if prefix == "xmlns" and namespaceURI != XMLNS_NS:
raise xml.dom.NamespaceErr(
"illegal use of the 'xmlns' prefix")
def _reparent(node, parent):
setcontext(node, parent)
# These are indexes into the list that is used to represent an Attr node.
_ATTR_NS = 0
_ATTR_NAME = 1
_ATTR_LOCALNAME = 2
_ATTR_PREFIX = 3
_ATTR_VALUE = 4
_ATTR_SPECIFIED = 5
# These are used for schema-derived information, and are not used for
# specified attributes.
_ATTR_TYPE = 6
_ATTR_REQUIRED = 7
_SUPPORTED_FEATURES = (
("core", None),
("xml", None),
#("traversal", None),
#("load", None),
# According to DOM Erratum Core-14, the empty string should be
# accepted as equivalent to null for hasFeature().
("core", ""),
("xml", ""),
#("traversal", ""),
#("load", ""),
("core", "1.0"),
("xml", "1.0"),
("core", "2.0"),
("xml", "2.0"),
#("traversal", "2.0"),
#("load", "3.0"),
)
# DOMProperty could just be a property, but if we make it a ContextProperty
# then we avoid a level of indirection when used in a ContextAware class.
class DOMProperty(ContextProperty):
_readonly = False
def __init__(self, getter, setter=None):
if setter is not None:
super(DOMProperty, self).__init__(getter, setter)
else:
self._readonly = True
super(DOMProperty, self).__init__(getter)
def __set__(self, inst, value):
readonly = getattr(inst, '_readonly', False)
if readonly:
raise xml.dom.NoModificationAllowedErr(
"cannot set attribute on read-only node")
# XXX would like to say which attribute is readonly
if self._readonly:
raise xml.dom.NoModificationAllowedErr(
"read-only attribute")
super(DOMProperty, self).__set__(inst, value)
class _Dummy:
pass
class DOMImplementation:
implements(IDOMImplementation)
ContextAwareDescriptors()
def hasFeature(self, feature, version):
feature = (feature.lower(), version)
return feature in _SUPPORTED_FEATURES
def createDocumentType(self, qualifiedName, publicId, systemId):
_check_qualified_name(qualifiedName)
import xmlextended
doctype = xmlextended.DocumentType(qualifiedName, publicId, systemId)
doctype = ContextWrapper(doctype, _Dummy())
_reparent(doctype, None)
return doctype
def createDocument(self, namespaceURI, qualifiedName, docType=None):
return Document(docType, namespaceURI, qualifiedName)
# DOM Level 3 Core (working draft, 5 Jun 2001)
def getAs(self, feature):
return self
# DOM Level 3 Load/Save (working draft, 9 Feb 2001)
def createDOMBuilder(self):
import soadsave
return loadsave.DOMBuilder()
theDOMImplementation = DOMImplementation()
class Node:
implements(INode)
ContextAwareDescriptors()
ELEMENT_NODE = 1
ATTRIBUTE_NODE = 2
TEXT_NODE = 3
CDATA_SECTION_NODE = 4
ENTITY_REFERENCE_NODE = 5
ENTITY_NODE = 6
PROCESSING_INSTRUCTION_NODE = 7
COMMENT_NODE = 8
DOCUMENT_NODE = 9
DOCUMENT_TYPE_NODE = 10
DOCUMENT_FRAGMENT_NODE = 11
[-=- -=- -=- 1913 lines omitted -=- -=- -=-]
#
# The workhorse of item removal; this removes whatever
# item the 'matcher' test determines matches. 'name' is
# passed to the matcher but is not used otherwise.
#
if self._parent._readonly or self._readonly:
raise xml.dom.NoModificationAllowedErr()
pList = self._getParentList()
for i in range(len(pList)):
item = pList[i]
if matcher(item, name):
break
else:
raise xml.dom.NotFoundErr()
self._delFromParentList(pList, i)
node = self._item_helper(item)
node._set_owner_element(None)
return node
def __delitem__(self, name):
if self._parent._readonly or self._readonly:
raise xml.dom.NoModificationAllowedErr()
pList = self._getParentList()
for i in range(len(pList)):
item = pList[i]
s = self._key_helper(item)
if s == name:
self._delFromParentList(pList, i)
return
raise KeyError, name
def has_key(self, name):
for item in self._getParentList():
if self._key_helper(item) == name:
return True
return False
def items(self):
L = []
for item in self._getParentList():
L.append((self._key_helper(item), self._item_helper(item)))
return L
def keys(self):
L = []
for item in self._getParentList():
L.append(self._key_helper(item))
return L
def values(self):
L = []
for item in self._getParentList():
L.append(self._item_helper(item))
return L
class AttributeMap(MapFromParent):
"""NamedNodeMap that works on the attribute structure.
This doesn't do anything about the namespace declarations.
"""
ContextAwareDescriptors()
_parentListName = '_attributes'
def __init__(self, parent):
self._attr_info = parent._attr_info
self._parent = parent
self._nameMatcher = _attr_item_match_name
self._nsMatcher = _attr_item_match_ns
def _get_length(self):
d = {}
for item in self._parent._attributes:
if item[_ATTR_NS]:
key = item[_ATTR_NS], item[_ATTR_LOCALNAME]
else:
key = item[_ATTR_NAME]
d[key] = True
for item in self._attr_info:
if item[_ATTR_NS]:
key = item[_ATTR_NS], item[_ATTR_LOCALNAME]
else:
key = item[_ATTR_NAME]
d[key] = True
return len(d)
__len__ = _get_length
length = DOMProperty(_get_length)
def item(self, i):
node = MapFromParent.item(self, i)
if node is None and self._attr_info:
d = {}
for item in self._parent._attributes:
if item[_ATTR_NS]:
key = item[_ATTR_NS], item[_ATTR_LOCALNAME]
else:
key = item[_ATTR_NAME]
d[key] = True
j = len(d)
for item in self._attr_info:
name = item[_ATTR_NAME]
if d.has_key(name):
pass
else:
if j == i:
item = list(item)
if self._parent._attributes:
self._parent._attributes.append(item)
else:
self._parent._attributes = [item]
node = Attr(
item, self._parent.ownerDocument, self._parent)
node = ContextWrapper(node, self._parent)
break
j = j + 1
return node
def _item_helper(self, itemSource):
"used by item; create an Attribute from the item and return it"
node = Attr(itemSource, self._parent.ownerDocument, self._parent)
return ContextWrapper(node, self._parent)
def _set_named_item(self, nameinfo, matcher, node):
"utility function for setNamedItem"
if self._parent._readonly or self._readonly:
raise xml.dom.NoModificationAllowedErr()
if node.nodeType != Node.ATTRIBUTE_NODE:
raise xml.dom.HierarchyRequestErr()
if not self._parent.ownerDocument.isSameNode(node.ownerDocument):
raise xml.dom.WrongDocumentErr()
if node.ownerElement:
if node.ownerElement.isSameNode(self._parent):
# This is already our node; no extra work needed, and no
# change to the storage object.
return node
raise xml.dom.InuseAttributeErr()
attributes = self._getParentList()
if not attributes:
self._parent._attributes = [node._item]
node._set_owner_element(self._parent)
return node
oldNode = None
for i in range(len(attributes)):
item = attributes[i]
if matcher(item, nameinfo):
oldNode = item
attributes[i] = node._item
break
if oldNode is None:
self._addToParentList(attributes, node)
node._set_owner_element(self._parent)
return oldNode
def _delFromParentList(self, attrs, i):
"workhorse for __delitem__; remove ith item from attrs"
del attrs[i] #XXX ownerElement needs to be updated in other refs
self._parent._changed()
def _addToParentList(self, attrs, node):
if self._parent._attributes:
self._parent._attributes.append(node._item)
else:
self._parent._attributes = [node._item]
self._parent._changed()
def _key_helper(self, itemSource):
"given an item source, return an appropriate key for our mapping"
return itemSource[_ATTR_NAME]
# Utility functions for Attrs, used by more than the Attr class.
def _attr_item_match_name(item, name,
_ATTR_NAME=_ATTR_NAME):
"utility function for AttributeMap; return true if name matches item"
return item[_ATTR_NAME] == name
def _attr_item_match_ns(item, (namespaceURI, localName),
_ATTR_NS=_ATTR_NS, _ATTR_LOCALNAME=_ATTR_LOCALNAME):
"utility function for AttributeMap; return true if name matches item"
return (item[_ATTR_LOCALNAME] == localName
and item[_ATTR_NS] == namespaceURI)
def _attr_get_value(nodes):
"utility function to get attr value; concatenate values of list of nodes"
L = []
for node in nodes:
L.append(node.nodeValue)
return ''.join(filter(None, L))
def _attr_set_value(item, value):
"utility function to safely set shared value of attr item"
newChild = Text(value)
del newChild._in_tree
while item[_ATTR_VALUE]:
item[_ATTR_VALUE].pop()
item[_ATTR_VALUE].append(newChild)
=== Added File zopeproducts/xml/dom/expatbuilder.py ===
##############################################################################
#
# Copyright (c) 2001-2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""
XML DOM implementation
Create a DOM tree from a string or file using the Expat parser.
$Id: expatbuilder.py,v 1.1 2003/06/20 15:11:38 philikon Exp $
"""
# Warning!
#
# This module is tightly bound to the implementation details of the
# Parsed XML DOM and can't be used with other DOM implementations. This
# is due, in part, to a lack of appropriate methods in the DOM (there is
# no way to create Entity and Notation nodes via the DOM Level 2
# interface), and for performance. The later is the cause of some fairly
# cryptic code.
#
# Performance hacks:
#
# - .character_data_handler() has an extra case in which continuing
# data is appended to an existing Text node; this can be a
# substantial speedup since Expat seems to break data at every
# newline.
#
# - Determining that a node exists is done using an identity comparison
# with None rather than a truth test; this avoids searching for and
# calling any methods on the node object if it exists. (A rather
# nice speedup is achieved this way as well!)
import string
import core
import xmlextended
from xml.parsers import expat
class Options:
"""Features object that has variables set for each DOMBuilder feature.
The DOMBuilder class uses an instance of this class to pass settings to
the ExpatBuilder class.
"""
# Note that the DOMBuilder class in LoadSave constrains which of these
# values can be set using the DOM Level 3 LoadSave feature.
namespaces = 1
namespace_declarations = 1
validation = 0
external_general_entities = 1
external_parameter_entities = 1
validate_if_cm = 0
create_entity_ref_nodes = 1
entity_nodes = 1
white_space_in_element_content = 1
cdata_nodes = 1
comments = 1
charset_overrides_xml_encoding = 1
errorHandler = None
filter = None
class ExpatBuilder:
"""Document builder that uses Expat to build a ParsedXML.DOM document
instance."""
def __init__(self, options=None):
if options is None:
options = Options()
self._options = options
self._parser = None
self.reset()
try:
{}.setdefault
except AttributeError:
def _intern(self, s):
try:
return self._interns[s]
except KeyError:
self._interns[s] = s
return s
else:
def _intern(self, s):
return self._interns.setdefault(s, s)
def createParser(self):
"""Create a new parser object."""
return expat.ParserCreate()
def getParser(self):
"""Return the parser object, creating a new one if needed."""
if not self._parser:
self._parser = self.createParser()
self.install(self._parser)
return self._parser
def reset(self):
"""Free all data structures used during DOM construction."""
self.document = None
self._cdata = 0
self._standalone = -1
self._version = None
self._encoding = None
self._doctype_args = None
self._entities = []
self._notations = []
self._pre_doc_events = []
self._attr_info = {}
self._elem_info = {}
self._interns = {}
def install(self, parser):
"""Install the callbacks needed to build the DOM into the parser."""
# This creates circular references!
parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler
parser.StartElementHandler = self.start_element_handler
parser.EndElementHandler = self.end_element_handler
parser.ProcessingInstructionHandler = self.pi_handler
parser.CharacterDataHandler = self.character_data_handler
parser.EntityDeclHandler = self.entity_decl_handler
parser.NotationDeclHandler = self.notation_decl_handler
parser.CommentHandler = self.comment_handler
parser.StartCdataSectionHandler = self.start_cdata_section_handler
parser.EndCdataSectionHandler = self.end_cdata_section_handler
parser.ExternalEntityRefHandler = self.external_entity_ref_handler
parser.ordered_attributes = 1
parser.specified_attributes = 1
parser.XmlDeclHandler = self.xml_decl_handler
parser.ElementDeclHandler = self.element_decl_handler
parser.AttlistDeclHandler = self.attlist_decl_handler
def parseFile(self, file):
"""Parse a document from a file object, returning the document
node."""
parser = self.getParser()
first_buffer = 1
strip_newline = 0
while 1:
buffer = file.read(16*1024)
if not buffer:
break
if strip_newline:
if buffer[0] == "\n":
buffer = buffer[1:]
strip_newline = 0
if buffer and buffer[-1] == "\r":
strip_newline = 1
buffer = _normalize_lines(buffer)
parser.Parse(buffer, 0)
if first_buffer and self.document:
if self.document.doctype:
self._setup_subset(buffer)
first_buffer = 0
parser.Parse("", 1)
doc = self.document
self.reset()
self._parser = None
return doc
def parseString(self, string):
"""Parse a document from a string, returning the document node."""
string = _normalize_lines(string)
parser = self.getParser()
parser.Parse(string, 1)
self._setup_subset(string)
doc = self.document
self.reset()
self._parser = None
return doc
def _setup_subset(self, buffer):
"""Load the internal subset if there might be one."""
if self.document.doctype:
extractor = InternalSubsetExtractor()
extractor.parseString(buffer)
subset = extractor.getSubset()
if subset is not None:
self.document.doctype._internalSubset = subset
def start_doctype_decl_handler(self, doctypeName, systemId, publicId,
has_internal_subset):
self._pre_doc_events.append(("doctype",))
self._doctype_args = (self._intern(doctypeName), publicId, systemId)
def pi_handler(self, target, data):
target = self._intern(target)
if self.document is None:
self._pre_doc_events.append(("pi", target, data))
else:
node = self.document.createProcessingInstruction(target, data)
self.curNode.appendChild(node)
def character_data_handler(self, data):
if self._cdata:
if (self._cdata_continue
and (self.curNode._children[-1].nodeType
== core.Node.CDATA_SECTION_NODE)):
n = self.curNode._children[-1]
data = n._data + data
n._data = n._nodeValue = data
return
node = self.document.createCDATASection(data)
self._cdata_continue = 1
elif (self.curNode._children
and self.curNode._children[-1].nodeType == core.Node.TEXT_NODE):
node = self.curNode._children[-1]
data = node._data + data
node._data = node._nodeValue = data
return
else:
node = self.document.createTextNode(data)
self.curNode.appendChild(node)
def entity_decl_handler(self, entityName, is_parameter_entity, value,
base, systemId, publicId, notationName):
if is_parameter_entity:
# we don't care about parameter entities for the DOM
return
if not self._options.entity_nodes:
return
entityName = self._intern(entityName)
notationName = self._intern(notationName)
node = xmlextended.Entity(entityName, publicId, systemId, notationName)
if value is not None:
# internal entity
child = core.Text(value)
# must still get to parent, even if entity isn't _in_tree
child._in_tree = 1
child._readonly = 1
node._children = [child]
self._entities.append(node)
def notation_decl_handler(self, notationName, base, systemId, publicId):
notationName = self._intern(notationName)
node = xmlextended.Notation(notationName, publicId, systemId)
self._notations.append(node)
def comment_handler(self, data):
if self._options.comments:
if self.document is None:
self._pre_doc_events.append(("comment", data))
else:
node = self.document.createComment(data)
self.curNode.appendChild(node)
def start_cdata_section_handler(self):
if self._options.cdata_nodes:
self._cdata = 1
self._cdata_continue = 0
def end_cdata_section_handler(self):
self._cdata = 0
self._cdata_continue = 0
def external_entity_ref_handler(self, context, base, systemId, publicId):
return 1
def start_element_handler(self, name, attributes):
name = self._intern(name)
if self.document is None:
doctype = self._create_doctype()
doc = core.theDOMImplementation.createDocument(
None, name, doctype)
if self._standalone >= 0:
doc.standalone = self._standalone
doc.encoding = self._encoding
doc.version = self._version
doc._elem_info = self._elem_info
doc._attr_info = self._attr_info
self.document = doc
self._include_early_events()
node = doc.documentElement
# chicken & egg: if this isn't inserted here, the document
# element doesn't get the information about the defined
# attributes for its element type
if self._attr_info.has_key(name):
node._attr_info = self._attr_info[name]
else:
node = self.document.createElement(name)
self.curNode.appendChild(node)
self.curNode = node
if attributes:
L = []
for i in range(0, len(attributes), 2):
L.append([None, self._intern(attributes[i]),
None, None, attributes[i+1], 1])
node._attributes = L
def end_element_handler(self, name):
curNode = self.curNode
assert curNode.tagName == name, "element stack messed up!"
self.curNode = self.curNode.parentNode
self._handle_white_text_nodes(curNode)
if self._options.filter:
self._options.filter.endElement(curNode)
def _handle_white_text_nodes(self, node):
info = self._elem_info.get(node.tagName)
if not info:
return
type = info[0]
if type in (expat.model.XML_CTYPE_ANY,
expat.model.XML_CTYPE_MIXED):
return
#
# We have element type information; look for text nodes which
# contain only whitespace.
#
L = []
for child in node.childNodes:
if ( child.nodeType == core.Node.TEXT_NODE
and not string.strip(child.data)):
L.append(child)
#
# Depending on the options, either mark the nodes as ignorable
# whitespace or remove them from the tree.
#
for child in L:
if self._options.white_space_in_element_content:
child._isWhitespaceInElementContent = 1
else:
node.removeChild(child)
def element_decl_handler(self, name, model):
self._elem_info[self._intern(name)] = model
def attlist_decl_handler(self, elem, name, type, default, required):
elem = self._intern(elem)
name = self._intern(name)
type = self._intern(type)
if self._attr_info.has_key(elem):
L = self._attr_info[elem]
else:
L = []
self._attr_info[elem] = L
L.append([None, name, None, None, default, 0, type, required])
def xml_decl_handler(self, version, encoding, standalone):
self._version = version
self._encoding = encoding
self._standalone = standalone
def _create_doctype(self):
if not self._doctype_args:
return
doctype = apply(core.theDOMImplementation.createDocumentType,
self._doctype_args)
doctype._entities[:] = self._entities
self._entities = doctype._entities
doctype._notations[:] = self._notations
self._notations = doctype._notations
return doctype
def _include_early_events(self):
doc = self.document
if self._doctype_args:
docelem = doc.doctype
else:
docelem = doc.documentElement
for event in self._pre_doc_events:
t = event[0]
if t == "comment":
node = doc.createComment(event[1])
elif t == "doctype":
# marker; switch to before docelem
docelem = doc.documentElement
continue
elif t == "pi":
node = doc.createProcessingInstruction(event[1], event[2])
else:
raise RuntimeError, "unexpected early event type: " + `t`
doc.insertBefore(node, docelem)
def _normalize_lines(s):
"""Return a copy of 's' with line-endings normalized according to
XML 1.0 section 2.11."""
s = string.replace(s, "\r\n", "\n")
return string.replace(s, "\r", "\n")
# framework document used by the fragment builder.
# Takes a string for the doctype, subset string, and namespace attrs string.
_FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID = \
"http://xml.zope.org/entities/fragment-builder/internal"
_FRAGMENT_BUILDER_TEMPLATE = (
'''\
<!DOCTYPE wrapper
%%s [
<!ENTITY fragment-builder-internal
SYSTEM "%s">
%%s
]>
<wrapper %%s
>&fragment-builder-internal;</wrapper>'''
% _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID)
class FragmentBuilder(ExpatBuilder):
"""Builder which constructs document fragments given XML source
text and a context node.
The context node is expected to provide information about the
namespace declarations which are in scope at the start of the
fragment.
"""
def __init__(self, context, options=None):
if context.nodeType == core.Node.DOCUMENT_NODE:
self.originalDocument = context
self.context = context
else:
self.originalDocument = context.ownerDocument
self.context = context
ExpatBuilder.__init__(self, options)
def reset(self):
ExpatBuilder.reset(self)
self.fragment = None
def parseFile(self, file):
"""Parse a document fragment from a file object, returning the
fragment node."""
return self.parseString(file.read())
def parseString(self, string):
"""Parse a document fragment from a string, returning the
fragment node."""
self._source = string
parser = self.getParser()
doctype = self.originalDocument.doctype
ident = ""
if doctype:
subset = doctype.internalSubset or self._getDeclarations()
if doctype.publicId:
ident = ('PUBLIC "%s" "%s"'
% (doctype.publicId, doctype.systemId))
elif doctype.systemId:
ident = 'SYSTEM "%s"' % doctype.systemId
else:
subset = ""
nsattrs = self._getNSattrs() # get ns decls from node's ancestors
document = _FRAGMENT_BUILDER_TEMPLATE % (ident, subset, nsattrs)
try:
parser.Parse(document, 1)
except:
self.reset()
raise
fragment = self.fragment
self.reset()
## self._parser = None
return fragment
def _getDeclarations(self):
"""Re-create the internal subset from the DocumentType node.
This is only needed if we don't already have the
internalSubset as a string.
"""
doctype = self.originalDocument.doctype
s = ""
if doctype:
for i in range(doctype.notations.length):
notation = doctype.notations.item(i)
if s:
s = s + "\n "
s = "%s<!NOTATION %s" % (s, notation.nodeName)
if notation.publicId:
s = '%s PUBLIC "%s"\n "%s">' \
% (s, notation.publicId, notation.systemId)
else:
s = '%s SYSTEM "%s">' % (s, notation.systemId)
for i in range(doctype.entities.length):
entity = doctype.entities.item(i)
if s:
s = s + "\n "
s = "%s<!ENTITY %s" % (s, entity.nodeName)
if entity.publicId:
s = '%s PUBLIC "%s"\n "%s"' \
% (s, entity.publicId, entity.systemId)
elif entity.systemId:
s = '%s SYSTEM "%s"' % (s, entity.systemId)
else:
s = '%s "%s"' % (s, entity.firstChild.data)
if entity.notationName:
s = "%s NOTATION %s" % (s, entity.notationName)
s = s + ">"
return s
def _getNSattrs(self):
return ""
def external_entity_ref_handler(self, context, base, systemId, publicId):
if systemId == _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID:
# this entref is the one that we made to put the subtree
# in; all of our given input is parsed in here.
old_document = self.document
old_cur_node = self.curNode
self._save_namespace_decls()
parser = self._parser.ExternalEntityParserCreate(context)
self._restore_namespace_decls()
# put the real document back, parse into the fragment to return
self.document = self.originalDocument
self.fragment = self.document.createDocumentFragment()
self.curNode = self.fragment
try:
parser.Parse(self._source, 1)
finally:
self.curNode = old_cur_node
self.document = old_document
self._source = None
return -1
else:
return ExpatBuilder.external_entity_ref_handler(
self, context, base, systemId, publicId)
def _save_namespace_decls(self):
pass
def _restore_namespace_decls(self):
pass
class Namespaces:
"""Mix-in class for builders; adds support for namespaces."""
def _initNamespaces(self):
#
# These first two dictionaries are used to track internal
# namespace state, and contain all "current" declarations.
#
# URI -> [prefix, prefix, prefix...]
#
# The last prefix in list is most recently declared; there's
# no way to be sure we're using the right one if more than one
# has been defined for a particular URI.
self._nsmap = {
core.XML_NS: ["xml"],
core.XMLNS_NS: ["xmlns"],
}
# prefix -> URI
self._prefixmap = {
"xml": [core.XML_NS],
"xmlns": [core.XMLNS_NS],
}
#
# These dictionaries are used to store the namespace
# declaractions made on a single element; they are used to add
# the attributes of the same name to the DOM structure. When
# added to the DOM, they are replaced with new, empty
# dictionaries on the Builder object.
#
self._ns_prefix_uri = {}
self._ns_uri_prefixes = {}
# list of (prefix, uri) ns declarations. Namespace attrs are
# constructed from this and added to the element's attrs.
self._ns_ordered_prefixes = []
def createParser(self):
"""Create a new namespace-handling parser."""
return expat.ParserCreate(namespace_separator=" ")
def install(self, parser):
"""Insert the namespace-handlers onto the parser."""
ExpatBuilder.install(self, parser)
parser.StartNamespaceDeclHandler = self.start_namespace_decl_handler
parser.EndNamespaceDeclHandler = self.end_namespace_decl_handler
def start_namespace_decl_handler(self, prefix, uri):
"push this namespace declaration on our storage"
#
# These are what we use internally:
#
prefix = self._intern(prefix)
uri = self._intern(uri)
L = self._nsmap.get(uri)
if L is None:
self._nsmap[uri] = L = []
L.append(prefix)
L = self._prefixmap.get(prefix)
if L is None:
self._prefixmap[prefix] = L = []
L.append(uri)
#
# These are used to provide namespace declaration info to the DOM:
#
self._ns_prefix_uri[prefix] = uri
L = self._ns_uri_prefixes.get(uri)
if not L:
self._ns_uri_prefixes[uri] = L = []
L.append(prefix)
self._ns_ordered_prefixes.append((prefix, uri))
def end_namespace_decl_handler(self, prefix):
"pop the latest namespace declaration."
uri = self._prefixmap[prefix].pop()
self._nsmap[uri].pop()
def _save_namespace_decls(self):
"""Save the stored namespace decls and reset the new ones.
This lets us launch another parser and have its namespace declarations
not affect future elements. Must be called outside of any start/end
namespace_decl_handler calls."""
self._oldnsmap = self._nsmap
self._oldprefixmap = self._prefixmap
self._oldns_prefix_uri = self._ns_prefix_uri
self._oldns_uri_prefixes = self._ns_uri_prefixes
self._oldns_ordered_prefixes = self._ns_ordered_prefixes
self._initNamespaces()
def _restore_namespace_decls(self):
"Restore the namespace decls from _save_namespace_decls."
self._nsmap = self._oldnsmap
self._prefixmap = self._oldprefixmap
self._ns_prefix_uri = self._oldns_prefix_uri
self._ns_uri_prefixes = self._oldns_uri_prefixes
self._ns_ordered_prefixes = self._oldns_ordered_prefixes
def start_element_handler(self, name, attributes):
if ' ' in name:
uri, localname = string.split(name, ' ')
localname = self._intern(localname)
uri = self._intern(uri)
prefix = self._intern(self._nsmap[uri][-1])
if prefix:
qname = "%s:%s" % (prefix, localname)
else:
qname = localname
else:
uri = None
qname = name
localname = prefix = None
qname = self._intern(qname)
if self.document is None:
doctype = self._create_doctype()
doc = core.theDOMImplementation.createDocument(
uri, qname, doctype)
if self._standalone >= 0:
doc.standalone = self._standalone
doc.encoding = self._encoding
doc.version = self._version
doc._elem_info = self._elem_info
doc._attr_info = self._attr_info
self.document = doc
self._include_early_events()
node = doc.documentElement
# chicken & egg: if this isn't inserted here, the document
# element doesn't get the information about the defined
# attributes for its element type
if self._attr_info.has_key(qname):
node._attr_info = self._attr_info[qname]
else:
node = self.document.createElementNS(
uri, qname, (prefix, localname))
self.curNode.appendChild(node)
self.curNode = node
L = [] # [[namespaceURI, qualifiedName, localName, prefix,
# value, specified]]
if self._ns_ordered_prefixes and self._options.namespace_declarations:
for prefix, uri in self._ns_ordered_prefixes:
if prefix:
attrPrefix = "xmlns"
tagName = self._intern('%s:%s' % (attrPrefix, prefix))
else:
attrPrefix = tagName = "xmlns"
L.append([core.XMLNS_NS, tagName, self._intern(prefix),
attrPrefix, uri, 1])
if attributes:
# This uses the most-recently declared prefix, not necessarily
# the right one.
for i in range(0, len(attributes), 2):
aname = attributes[i]
value = attributes[i+1]
if ' ' in aname:
uri, localname = string.split(aname, ' ')
localname = self._intern(localname)
prefix = self._intern(self._nsmap[uri][-1])
uri = self._intern(uri)
if prefix:
qualifiedname = self._intern(
'%s:%s' % (prefix, localname))
else:
qualifiedname = localname
L.append([uri, qualifiedname, localname, prefix, value, 1])
else:
name = self._intern(aname)
L.append([None, name, name, None, value, 1])
if L:
node._attributes = L
if self._ns_prefix_uri:
# insert this stuff on the element:
node._ns_prefix_uri = self._ns_prefix_uri
node._ns_uri_prefixes = self._ns_uri_prefixes
# reset for the next:
self._ns_prefix_uri = {}
self._ns_uri_prefixes = {}
self._ns_ordered_prefixes = []
def end_element_handler(self, name):
if ' ' in name:
uri, localname = string.split(name, ' ')
assert (self.curNode.namespaceURI == uri
and self.curNode.localName == localname), \
"element stack messed up! (namespace)"
else:
assert self.curNode.nodeName == name, \
"element stack messed up - bad nodeName"
assert self.curNode.namespaceURI is None, \
"element stack messed up - bad namespaceURI"
self._handle_white_text_nodes(self.curNode)
self.curNode = self.curNode.parentNode
class ExpatBuilderNS(Namespaces, ExpatBuilder):
"""Document builder that supports namespaces."""
def reset(self):
ExpatBuilder.reset(self)
self._initNamespaces()
class FragmentBuilderNS(Namespaces, FragmentBuilder):
"""Fragment builder that supports namespaces."""
def reset(self):
FragmentBuilder.reset(self)
self._initNamespaces()
def _getNSattrs(self):
"""Return string of namespace attributes from this element and
ancestors."""
attrs = ""
context = self.context
L = []
while context:
if hasattr(context, '_ns_prefix_uri'):
for prefix, uri in context._ns_prefix_uri.items():
# add every new NS decl from context to L and attrs string
if prefix in L:
continue
L.append(prefix)
if prefix:
declname = "xmlns:" + prefix
else:
declname = "xmlns"
if attrs:
attrs = "%s\n %s='%s'" % (attrs, declname, uri)
else:
attrs = " %s='%s'" % (declname, uri)
context = context.parentNode
return attrs
class ParseEscape(Exception):
"""Exception raised to short-circuit parsing in InternalSubsetExtractor."""
pass
class InternalSubsetExtractor(ExpatBuilder):
"""XML processor which can rip out the internal document type subset."""
def getSubset(self):
"""Return the internal subset as a string."""
subset = self.subset
while subset and subset[0] != "[":
del subset[0]
if subset:
x = subset.index("]")
return string.join(subset[1:x], "")
else:
return None
def parseFile(self, file):
try:
ExpatBuilder.parseFile(self, file)
except ParseEscape:
pass
def parseString(self, string):
try:
ExpatBuilder.parseString(self, string)
except ParseEscape:
pass
def install(self, parser):
parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler
parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler
parser.StartElementHandler = self.start_element_handler
def start_doctype_decl_handler(self, *args):
self.subset = []
self.getParser().DefaultHandler = self.default_handler
def end_doctype_decl_handler(self):
self.getParser().DefaultHandler = None
raise ParseEscape()
def start_element_handler(self, name, attrs):
raise ParseEscape()
def default_handler(self, s):
self.subset.append(s)
def parse(file, namespaces=1):
"""Parse a document, returning the resulting Document node.
'file' may be either a file name or an open file object.
"""
if namespaces:
builder = ExpatBuilderNS()
else:
builder = ExpatBuilder()
if isinstance(file, type('')):
fp = open(file, 'rb')
result = builder.parseFile(fp)
fp.close()
else:
result = builder.parseFile(file)
return result
def parseFragment(file, context, namespaces=1):
"""Parse a fragment of a document, given the context from which it was
originally extracted. context should be the parent of the node(s) which
are in the fragment.
'file' may be either a file name or an open file object.
"""
if namespaces:
builder = FragmentBuilderNS(context)
else:
builder = FragmentBuilder(context)
if isinstance(file, type('')):
fp = open(file, 'rb')
result = builder.parseFile(fp)
fp.close()
else:
result = builder.parseFile(file)
return result
def makeBuilder(options):
"""Create a builder based on an Options object."""
if options.namespaces:
return ExpatBuilderNS(options)
else:
return ExpatBuilder(options)
=== Added File zopeproducts/xml/dom/loadsave.py ===
##############################################################################
#
# Copyright (c) 2001-2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""
XML DOM Implementation
Implementation of the DOM Level 3 'Load' feature.
$Id: loadsave.py,v 1.1 2003/06/20 15:11:38 philikon Exp $
"""
import core
import expatbuilder
import copy
import string
import xml.dom
__all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"]
class DOMBuilder:
_entityResolver = None
_errorHandler = None
_filter = None
def __init__(self):
self._options = ExpatBuilder.Options()
def _get_entityResolver(self):
return self._entityResolver
def _set_entityResolver(self, entityResolver):
self._entityResolver
entityResolver = property(_get_entityResolver, _set_entityResolver)
def _get_errorHandler(self):
return self._errorHandler
def _set_errorHandler(self, errorHandler):
self._errorHandler = errorHandler
errorHandler = property(_get_errorHandler, _set_errorHandler)
def _get_filter(self):
return self._filter
def _set_filter(self, filter):
self._filter = filter
filter = property(_get_filter, _set_filter)
def setFeature(self, name, state):
if self.supportsFeature(name):
try:
settings = self._settings[(_name_xform(name), state)]
except KeyError:
raise xml.dom.NotSupportedErr(
"unsupported feature: " + `name`)
else:
for name, value in settings:
setattr(self._options, name, value)
else:
raise xml.dom.NotFoundErr("unknown feature: " + `name`)
def supportsFeature(self, name):
return hasattr(self._options, _name_xform(name))
def canSetFeature(self, name, state):
key = (_name_xform(name), state and 1 or 0)
return self._settings.has_key(key)
_settings = {
("namespaces", 0): [("namespaces", 0)],
("namespaces", 1): [("namespaces", 1)],
("namespace_declarations", 0): [("namespace_declarations", 0)],
("namespace_declarations", 1): [("namespace_declarations", 1)],
("validation", 0): [("validation", 0)],
("external_general_entities", 0): [("external_general_entities", 0)],
("external_general_entities", 1): [("external_general_entities", 1)],
("external_parameter_entities", 0): [("external_parameter_entities", 0)],
("external_parameter_entities", 1): [("external_parameter_entities", 1)],
("validate_if_cm", 0): [("validate_if_cm", 0)],
("create_entity_ref_nodes", 0): [("create_entity_ref_nodes", 0)],
("create_entity_ref_nodes", 1): [("create_entity_ref_nodes", 1)],
("entity_nodes", 0): [("create_entity_ref_nodes", 0),
("entity_nodes", 0)],
("entity_nodes", 1): [("entity_nodes", 1)],
("white_space_in_element_content", 0):
[("white_space_in_element_content", 0)],
("white_space_in_element_content", 1):
[("white_space_in_element_content", 1)],
("cdata_nodes", 0): [("cdata_nodes", 0)],
("cdata_nodes", 1): [("cdata_nodes", 1)],
("comments", 0): [("comments", 0)],
("comments", 1): [("comments", 1)],
("charset_overrides_xml_encoding", 0):
[("charset_overrides_xml_encoding", 0)],
("charset_overrides_xml_encoding", 1):
[("charset_overrides_xml_encoding", 1)],
}
def getFeature(self, name):
try:
return getattr(self._options, _name_xform(name))
except AttributeError:
raise xml.dom.NotFoundErr()
def parseURI(self, uri):
if self.entityResolver:
input = self.entityResolver.resolveEntity(None, uri)
else:
input = DOMEntityResolver().resolveEntity(None, uri)
return self.parseDOMInputSource(input)
def parseDOMInputSource(self, input):
options = copy.copy(self._options)
options.filter = self.filter
options.errorHandler = self.errorHandler
fp = input.byteStream
if fp is None and options.systemId:
import urllib
fp = urllib.urlopen(input.systemId)
builder = ExpatBuilder.makeBuilder(options)
return builder.parseFile(fp)
class DOMEntityResolver(core.DOMImplementation):
def resolveEntity(self, publicId, systemId):
source = DOMInputSource()
source.publicId = publicId
source.systemId = systemId
if systemId:
import urllib
self.byteStream = urllib.urlopen(systemId)
# Should parse out the content-type: header to
# get charset information so that we can set the
# encoding attribute on the DOMInputSource.
return source
class DOMInputSource:
byteStream = None
characterStream = None
encoding = None
publicId = None
systemId = None
def _get_byteStream(self):
return self._byteStream
def _set_byteStream(self, byteStream):
self._byteStream = byteStream
byteStream = property(_get_byteStream, _set_byteStream)
def _get_characterStream(self):
return self._characterStream
def _set_characterStream(self, characterStream):
self._characterStream = characterStream
characterStream = property(_get_characterStream, _set_characterStream)
def _get_encoding(self):
return self._encoding
def _set_encoding(self, encoding):
self._encoding = encoding
encoding = property(_get_encoding, _set_encoding)
def _get_publicId(self):
return self._publicId
def _set_publicId(self, publicId):
self._publicId = publicId
publicId = property(_get_publicId, _set_publicId)
def _get_systemId(self):
return self._systemId
def _set_systemId(self, systemId):
self._systemId = systemId
systemId = property(_get_systemId, _set_systemId)
class DOMBuilderFilter:
"""Element filter which can be used to tailor construction of
a DOM instance.
"""
# There's really no need for this class; concrete implementations
# should just implement the endElement() method as appropriate.
def endElement(self, element):
# Why this method is supposed to return anything at all
# is a mystery; the result doesn't appear to be used.
return 1
def _name_xform(name):
return string.replace(string.lower(name), '-', '_')
=== Added File zopeproducts/xml/dom/saxbuilder.py ===
##############################################################################
#
# Copyright (c) 2001-2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""
XML DOM Implementation
Create a DOM tree using a SAX parser.
$Id: saxbuilder.py,v 1.1 2003/06/20 15:11:38 philikon Exp $
"""
import os
import urllib
import xml.sax
class SAXBuilder(xml.sax.ContentHandler):
_locator = None
document = None
documentElement = None
def __init__(self, documentFactory=None):
self.documentFactory = documentFactory
self._ns_contexts = [{}] # contains uri -> prefix dicts
self._current_context = self._ns_contexts[-1]
def install(self, parser):
parser.setContentHandler(self)
def setDocumentLocator(self, locator):
self._locator = locator
def startPrefixMapping(self, prefix, uri):
self._ns_contexts.append(self._current_context.copy())
self._current_context[uri] = prefix or None
def endPrefixMapping(self, prefix):
self._current_context = self._ns_contexts.pop()
def _make_qname(self, uri, localname, tagname):
# When using namespaces, the reader may or may not
# provide us with the original name. If not, create
# *a* valid tagName from the current context.
if uri:
if tagname is None:
prefix = self._current_context.get(uri)
if prefix:
tagname = "%s:%s" % (prefix, localname)
else:
tagname = localname
else:
tagname = localname
return tagname
def startElementNS(self, name, tagName, attrs):
uri, localname = name
tagName = self._make_qname(uri, localname, tagName)
if not self.document:
factory = self.documentFactory
self.document = factory.createDocument(uri or None, tagName, None)
node = self.document.documentElement
else:
if uri:
node = self.document.createElementNS(uri, tagName)
else:
node = self.document.createElement(localname)
self.curNode.appendChild(node)
self.curNode = node
for aname, value in attrs.items():
a_uri, a_localname = aname
if a_uri:
qname = "%s:%s" % (self._current_context[a_uri], a_localname)
node.setAttributeNS(a_uri, qname, value)
else:
attr = self.document.createAttribute(a_localname)
node.setAttribute(a_localname, value)
def endElementNS(self, name, tagName):
self.curNode = self.curNode.parentNode
def startElement(self, name, attrs):
if self.documentElement is None:
factory = self.documentFactory
self.document = factory.createDocument(None, name, None)
node = self.document.documentElement
self.documentElement = 1
else:
node = self.document.createElement(name)
self.curNode.appendChild(node)
self.curNode = node
for aname, value in attrs.items():
node.setAttribute(aname, value)
def endElement(self, name):
self.curNode = self.curNode.parentNode
def comment(self, s):
node = self.document.createComment(s)
self.curNode.appendChild(node)
def processingInstruction(self, target, data):
node = self.document.createProcessingInstruction(target, data)
self.curNode.appendChild(node)
def ignorableWhitespace(self, chars):
node = self.document.createTextNode(chars)
self.curNode.appendChild(node)
def characters(self, chars):
node = self.document.createTextNode(chars)
self.curNode.appendChild(node)
def parse(file, namespaces=1, dom=None, parser=None):
if not parser:
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_namespaces, namespaces)
if not dom:
import Core
dom = Core.theDOMImplementation
if isinstance(file, type('')):
try:
fp = open(file)
except IOError, e:
if e.errno != errno.ENOENT:
raise
fp = urllib.urlopen(file)
systemId = file
else:
# Ugh! Why doesn't urllib.pathname2url() do something useful?
systemId = "file://" + os.path.abspath(file)
else:
source = xml.sax.InputSource()
fp = file
try:
systemId = file.name
except AttributeError:
systemId = None
source = xml.sax.InputSource(file)
source.setByteStream(fp)
source.setSystemId(systemId)
builder = SAXBuilder(documentFactory=dom)
builder.install(parser)
parser.parse(source)
if fp is not file:
fp.close()
return builder.document
=== Added File zopeproducts/xml/dom/traversal.py ===
##############################################################################
#
# Copyright (c) 2001-2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""
XML DOM Implementation
DOM Level 2 Traversal, based on the W3C recommendation at:
http://www.w3.org/TR/DOM-Level-2-Traversal-Range/
$Id: traversal.py,v 1.1 2003/06/20 15:11:38 philikon Exp $
"""
# This code could be sped up.
# - uses DOM methods, could use implementation internals, esp. childNodes
# - if we had mutation events, NodeIterator could build an array as it
# iterated, and move over that, only updating on mutation
import xml.dom
from zope.interface import implements
from zopeproducts.xml.interfaces.dom.traversal import \
INodeFilter, INodeIterator, ITreeWalker
__all__ = [
"NodeFilter",
"NodeIterator",
"TreeWalker",
]
class NodeFilter:
implements(INodeFilter)
# Constants returned by acceptNode():
FILTER_ACCEPT = 1
FILTER_REJECT = 2
FILTER_SKIP = 3
# Constants for whatToShow:
SHOW_ALL = 0xFFFFFFFF
SHOW_ELEMENT = 0x00000001
SHOW_ATTRIBUTE = 0x00000002
SHOW_TEXT = 0x00000004
SHOW_CDATA_SECTION = 0x00000008
SHOW_ENTITY_REFERENCE = 0x00000010
SHOW_ENTITY = 0x00000020
SHOW_PROCESSING_INSTRUCTION = 0x00000040
SHOW_COMMENT = 0x00000080
SHOW_DOCUMENT = 0x00000100
SHOW_DOCUMENT_TYPE = 0x00000200
SHOW_DOCUMENT_FRAGMENT = 0x00000400
SHOW_NOTATION = 0x00000800
def acceptNode(self, node):
# Just accept everything by default:
return NodeFilter.FILTER_ACCEPT
_whatToShow_bits = (
(xml.dom.Node.ELEMENT_NODE, NodeFilter.SHOW_ELEMENT),
(xml.dom.Node.ATTRIBUTE_NODE, NodeFilter.SHOW_ATTRIBUTE),
(xml.dom.Node.TEXT_NODE, NodeFilter.SHOW_TEXT),
(xml.dom.Node.CDATA_SECTION_NODE, NodeFilter.SHOW_CDATA_SECTION),
(xml.dom.Node.ENTITY_REFERENCE_NODE, NodeFilter.SHOW_ENTITY_REFERENCE),
(xml.dom.Node.ENTITY_NODE, NodeFilter.SHOW_ENTITY),
(xml.dom.Node.PROCESSING_INSTRUCTION_NODE,
NodeFilter.SHOW_PROCESSING_INSTRUCTION),
(xml.dom.Node.COMMENT_NODE, NodeFilter.SHOW_COMMENT),
(xml.dom.Node.DOCUMENT_NODE, NodeFilter.SHOW_DOCUMENT),
(xml.dom.Node.DOCUMENT_TYPE_NODE, NodeFilter.SHOW_DOCUMENT_TYPE),
(xml.dom.Node.DOCUMENT_FRAGMENT_NODE, NodeFilter.SHOW_DOCUMENT_FRAGMENT),
(xml.dom.Node.NOTATION_NODE, NodeFilter.SHOW_NOTATION),
)
class AccessorBase:
def __init__(self, root, whatToShow, filter, entityReferenceExpansion):
if root is None:
raise xml.dom.NotSupportedErr(
"root of traversal object can't be None")
d = self.__dict__
d['root'] = root
d['whatToShow'] = whatToShow
d['filter'] = filter
d['expandEntityReferences'] = entityReferenceExpansion
#
# Decode the whatToShow flags for faster tests; the W3C
# reserves the first 200 NodeType values, but the whatToShow
# flags have to fit in 32 bits (leave slot 0 empty since it's
# not a valid NodeType).
#
d['_whatToShow'] = what = [0] * 33
for nodeType, bit in _whatToShow_bits:
what[nodeType] = whatToShow & bit
def __setattr__(self, name, value):
setter = getattr(self, '_set_' + name, None)
if setter is None:
getter = getattr(self, '_get_' + name, None)
if getter:
raise xml.dom.NoModificationAllowedErr(
"read-only attribute: " + `name`)
else:
raise AttributeError, "no such attribute: " + `name`
setter(value)
def _get_root(self):
return self.root
def _get_whatToShow(self):
return self.whatToShow
def _get_filter(self):
return filter
def _get_expandEntityReferences(self):
return self.expandEntityReferences
def _should_show(self, node):
if not self._whatToShow[node.nodeType]:
return NodeFilter.FILTER_SKIP
else:
if ( node.nodeType == xml.dom.Node.ENTITY_REFERENCE_NODE
and not self.expandEntityReferences):
return NodeFilter.FILTER_REJECT
elif self.filter is not None:
return self._filterNode(node)
return NodeFilter.FILTER_ACCEPT
def _nextInTree(self, node):
"""Return first visible node in node's subtree, or None."""
# check given node first
if self._should_show(node) == NodeFilter.FILTER_ACCEPT:
return node
elif self._should_show(node) == NodeFilter.FILTER_REJECT:
return None
for c in node.childNodes:
child = self._nextInTree(c)
if child:
return child
if c.isSameNode(self.root): # don't leave root subtree
return None
return None
def _lastInTree(self, node):
"""Return last visible node in node's subtree, or None."""
if self._should_show(node) == NodeFilter.FILTER_REJECT:
return None
childNodes = node.childNodes
childNums = range(childNodes.length)
childNums.reverse()
for c in childNums:
childNode = childNodes[c]
child = self._lastInTree(childNode)
if child:
return child
if childNode.isSameNode(self.root): # don't leave root subtree
return None
# subtree exhausted, check given node
if self._should_show(node) == NodeFilter.FILTER_ACCEPT:
return node
return None
# we don't do any visibilty tests here, _nextInTree does.
def _nextNode(self, startNode):
"""Return next visible node after startNode, or None."""
# check children
for child in startNode.childNodes:
node = self._nextInTree(child)
if node:
return node
if child.isSameNode(self.root): # don't leave root subtree
return None
# check next siblings
sib = startNode.nextSibling
while sib:
node = self._nextInTree(sib)
if node:
return node
if sib.isSameNode(self.root): # don't leave root subtree
return None
sib = sib.nextSibling
# check ancestors' next siblings; don't visit ancestors
ancestor = startNode.parentNode
while ancestor:
sib = ancestor.nextSibling
while sib:
node = self._nextInTree(sib)
if node:
return node
if sib.isSameNode(self.root): # don't leave root subtree
return None
sib = sib.nextSibling
# no visible nodes in siblings or subtrees of this ancestor
if ancestor.isSameNode(self.root):
# don't leave root subtree
return None
ancestor = ancestor.parentNode
return None
# we *do* a visibilty test here, _lastInTree does too.
def _previousNode(self, startNode):
"""Return the previous visible node after startNode, or None."""
# check previous siblings
sib = startNode.previousSibling
while sib:
node = self._lastInTree(sib)
if node:
return node
if sib.isSameNode(self.root): # don't leave root subtree
return None
sib = sib.previousSibling
# check ancestors, then ancestors' previous siblings
ancestor = startNode.parentNode
while ancestor:
if self._should_show(ancestor) == NodeFilter.FILTER_ACCEPT:
return ancestor
sib = ancestor.previousSibling
while sib:
node = self._lastInTree(sib)
if node:
return node
if sib.isSameNode(self.root): # don't leave root subtree
return None
sib = sib.previousSibling
if ancestor.isSameNode(self.root):
# don't leave root subtree
return None
ancestor = ancestor.parentNode
return None
# Since we don't need to know about structure, we could probably be a lot
# faster if we kept a list of nodes in document order and updated
# it when we got a mutation event - once we have mutation events.
class NodeIterator(AccessorBase):
implements(INodeIterator)
BEFORE_NODE = 1 # iterator crossed reference node moving forward
AFTER_NODE = 0 # iterator crossed reference node moving backward
def __init__(self, root, whatToShow=NodeFilter.SHOW_ALL, filter=None,
entityReferenceExpansion=1):
AccessorBase.__init__(self, root, whatToShow,
filter, entityReferenceExpansion)
self.__dict__['_refNode'] = None
self.__dict__['_refPos'] = NodeIterator.BEFORE_NODE
def detach(self):
self.__dict__['root'] = None
def nextNode(self):
if self.root is None:
raise xml.dom.InvalidStateErr(
"can't iterate using a detached NodeIterator")
if self._refNode == None:
self.__dict__['_refNode'] = self.root
self.__dict__['_refPos'] = NodeIterator.AFTER_NODE
if self._should_show(self._refNode) == NodeFilter.FILTER_ACCEPT:
return self._refNode
elif self._refPos == NodeIterator.BEFORE_NODE:
if self._should_show(self._refNode) == NodeFilter.FILTER_ACCEPT:
self.__dict__['_refPos'] = NodeIterator.AFTER_NODE
return self._refNode
node = AccessorBase._nextNode(self, self._refNode)
if node:
self.__dict__['_refNode'] = node
self.__dict__['_refPos'] = NodeIterator.AFTER_NODE
return node
def previousNode(self):
if self.root is None:
raise xml.dom.InvalidStateErr(
"can't iterate using a detached NodeIterator")
if self._refNode == None:
self.__dict__['_refNode'] = self.root
self.__dict__['_refPos'] = NodeIterator.BEFORE_NODE
elif self._refPos == NodeIterator.AFTER_NODE:
if self._should_show(self._refNode) == NodeFilter.FILTER_ACCEPT:
self.__dict__['_refPos'] = NodeIterator.BEFORE_NODE
return self._refNode
node = AccessorBase._previousNode(self, self._refNode)
if node:
self.__dict__['_refNode'] = node
self.__dict__['_refPos'] = NodeIterator.BEFORE_NODE
return node
def __getitem__(self, index):
node = self.nextNode()
if node is None:
raise IndexError, "NodeIterator index out of range"
return node
def _filterNode(self, node):
"""Return what the filter says to do with node,
translating reject into skip"""
filterAction = self.filter.acceptNode(node)
if filterAction == NodeFilter.FILTER_REJECT:
return NodeFilter.FILTER_SKIP
return filterAction
class TreeWalker(AccessorBase):
implements(ITreeWalker)
def __init__(self, root, whatToShow=NodeFilter.SHOW_ALL, filter=None,
entityReferenceExpansion=1):
AccessorBase.__init__(self, root, whatToShow,
filter, entityReferenceExpansion)
self.__dict__['currentNode'] = root
def _get_currentNode(self):
return self.currentNode
def _set_currentNode(self, node):
if node is None:
raise xml.dom.NotSupportedErr("can't set current node to None")
self.__dict__['currentNode'] = node
def parentNode(self):
if self.root.isSameNode(self.currentNode):
return None
node = self.currentNode.parentNode
while node is not None and (
self._should_show(node) != NodeFilter.FILTER_ACCEPT):
if node.isSameNode(self.root):
# can't step any further up
return
else:
node = node.parentNode
if node is not None:
self.__dict__['currentNode'] = node
return node
def firstChild(self):
node = self.currentNode.firstChild
while node is not None and (
self._should_show(node) != NodeFilter.FILTER_ACCEPT):
node = node.nextSibling
if node is not None:
self.__dict__['currentNode'] = node
return node
def lastChild(self):
node = self.currentNode.lastChild
while node is not None and (
self._should_show(node) != NodeFilter.FILTER_ACCEPT):
node = node.previousSibling
if node is not None:
self.__dict__['currentNode'] = node
return node
# the rec doesn't say that *Sibling should pay attention to root!
def previousSibling(self):
node = self.currentNode.previousSibling
while node is not None and (
self._should_show(node) != NodeFilter.FILTER_ACCEPT):
node = node.previousSibling
if node is not None:
self.__dict__['currentNode'] = node
return node
def nextSibling(self):
node = self.currentNode.nextSibling
while node is not None and (
self._should_show(node) != NodeFilter.FILTER_ACCEPT):
node = node.nextSibling
if node is not None:
self.__dict__['currentNode'] = node
return node
# TreeWalkers don't move if there is no visible next or previous,
# so we do nothing for a None return.
def nextNode(self):
node = AccessorBase._nextNode(self, self.currentNode)
if node:
self.__dict__['currentNode'] = node
return node
def previousNode(self):
node = AccessorBase._previousNode(self, self.currentNode)
if node:
self.__dict__['currentNode'] = node
return node
def _filterNode(self, node):
"""Return what the filter says to do with node."""
return self.filter.acceptNode(node)
=== Added File zopeproducts/xml/dom/xmlextended.py ===
##############################################################################
#
# Copyright (c) 2001-2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""
Acquisition-based implementation of the DOM 'XML' feature classes.
See the W3C DOM specification more information: http://www.w3.org/DOM/
$Id: xmlextended.py,v 1.1 2003/06/20 15:11:38 philikon Exp $
"""
import core
from core import DOMProperty
import xml.dom
from zope.interface import implements
from zope.app.context import ContextWrapper
from zopeproducts.xml.interfaces.dom.xmlextended import \
ICDATASection, IDocumentType, INotation, IEntity, \
IEntityReference, IProcessingInstruction
class CDATASection(core.Text):
implements(ICDATASection)
_nodeName = "#cdata-section"
_nodeType = core.Node.CDATA_SECTION_NODE
class Identified:
"""Mix-in class that supports the publicId and systemId attributes."""
def _identified_mixin_init(self, publicId, systemId):
self._publicId = publicId
self._systemId = systemId
def _get_publicId(self):
return self._publicId
publicId = DOMProperty(_get_publicId)
def _get_systemId(self):
return self._systemId
systemId = DOMProperty(_get_systemId)
class Entity(Identified, core.Parentless, core.Node):
implements(IEntity)
_nodeType = core.Node.ENTITY_NODE
_readonly = True
_in_tree = False
_allowed_child_types = (core.Node.ELEMENT_NODE,
core.Node.PROCESSING_INSTRUCTION_NODE,
core.Node.COMMENT_NODE,
core.Node.TEXT_NODE,
core.Node.CDATA_SECTION_NODE,
core.Node.ENTITY_REFERENCE_NODE)
def __init__(self, name, publicId, systemId, notationName):
self._identified_mixin_init(publicId, systemId)
self._nodeName = name
self._notationName = notationName
def _cloneNode(self, deep, mutable, document):
# force children to not to acquire mutability:
return core.Node._cloneNode(self, deep, False, document)
def _get_notationName(self):
return self._notationName
notationName = DOMProperty(_get_notationName)
# DOM Level 3 (Working Draft, 01 Sep 2000)
# I expect some or all of these will become read-only before the
# recommendation is finished.
_actualEncoding = None
_encoding = None
_version = None
def _get_actualEncoding(self):
return self._actualEncoding
def _set_actualEncoding(self, value):
self._actualEncoding = value
actualEncoding = property(_get_actualEncoding, _set_actualEncoding)
def _get_encoding(self):
return self._encoding
def _set_encoding(self, value):
self._encoding = value # XXX
encoding = property(_get_encoding, _set_encoding)
def _get_version(self):
return self.version
def _set_version(self, value):
self.__version = value
version = property(_get_version, _set_version)
class EntityReference(core.Node):
implements(IEntityReference)
_nodeType = core.Node.ENTITY_REFERENCE_NODE
_readonly = True
_allowed_child_types = (core.Node.ELEMENT_NODE,
core.Node.PROCESSING_INSTRUCTION_NODE,
core.Node.COMMENT_NODE,
core.Node.TEXT_NODE,
core.Node.CDATA_SECTION_NODE,
core.Node.ENTITY_REFERENCE_NODE)
def __init__(self, name):
self._in_tree = False
self._nodeName = name
class Notation(Identified, core.Childless, core.Parentless, core.Node):
implements(INotation)
_nodeType = core.Node.NOTATION_NODE
_readonly = True
def __init__(self, name, publicId, systemId):
self._identified_mixin_init(publicId, systemId)
self._in_tree = False
self._nodeName = name
def _cloneNode(self, deep, mutable, document):
# force children to not to acquire mutability:
return core.Node._cloneNode(self, deep, False, document)
# DOM Level 3 (working draft, 5 June 2001)
def _get_textContent(self):
return ''
textContent = ''
class ProcessingInstruction(core.Childless, core.Node):
implements(IProcessingInstruction)
_nodeType = core.Node.PROCESSING_INSTRUCTION_NODE
def __init__(self, target, data):
self._in_tree = False
self._nodeName = target
self._target = target
self._nodeValue = data
self._data = data
def _get_data(self):
return self._data
def _set_data(self, data):
if self._readonly:
raise xml.dom.NoModificationAllowedErr()
if self._data != data:
self._data = data
self._nodeValue = data
self._changed()
data = DOMProperty(_get_data, _set_data)
_get_nodeValue = _get_data
_set_nodeValue = _set_data
nodeValue = DOMProperty(_get_nodeValue, _set_nodeValue)
def _get_target(self):
return self._target
target = DOMProperty(_get_target)
# DOM Level 3 (working draft, 5 June 2001)
def _get_textContent(self):
return self._nodeValue
textContent = DOMProperty(_get_textContent)
class DocumentType(Identified, core.Childless, core.Node):
implements(IDocumentType)
_nodeType = core.Node.DOCUMENT_TYPE_NODE
_nodeValue = None
_internalSubset = None
def __init__(self, qualifiedName, publicId, systemId):
self._identified_mixin_init(publicId, systemId)
self._name = qualifiedName
self._nodeName = qualifiedName
self._entities = []
self._notations = []
self._in_tree = False
def _get_internalSubset(self):
return self._internalSubset
internalSubset = DOMProperty(_get_internalSubset)
def _get_name(self):
return self._name
name = DOMProperty(_get_name)
_get_nodeName = _get_name
nodeName = DOMProperty(_get_nodeName)
def _get_nodeValue(self):
pass # XXX ?
def _set_nodeValue(self, data):
return
nodeValue = DOMProperty(_get_nodeValue, _set_nodeValue)
def _get_entities(self):
return OwnedEntityMap(self, '_entities')
entities = DOMProperty(_get_entities)
def _get_notations(self):
return OwnedEntityMap(self, '_notations')
notations = DOMProperty(_get_notations)
def isSupported(self, feature, version):
doc = self.ownerDocument
if doc:
impl = doc.implementation
else:
impl = core.theDOMImplementation
return impl.hasFeature(feature, version)
# DOM Level 3 (working draft, 5 June 2001)
def _get_textContent(self):
return ''
textContent = ''
class OwnedEntityMap(core.MapFromParent):
"""
NamedNodeMap that works on the entity or notation structure
of a DocumentType.
"""
def __init__(self, parent, listName):
core.MapFromParent.__init__(self, parent)
self._parentListName = listName
def _item_helper(self, itemSource):
"used by item; create an Attribute from the item and return it"
# XXX is ownerDocument ok with this?
#itemSource.__dict__['ownerDocument'] = self._parent
return ContextWrapper(itemSource, self._parent)
def _nameMatcher(self, itemSource, name):
return itemSource.nodeName == name
def _nsMatcher(self, itemSource, namespaceURI, localName):
return (itemSource.namespaceURI == namespaceURI
and itemSource.localName == localName)
def _set_named_item(self, name, matcher, node):
raise xml.dom.NoModificationAllowedErr()
def _delFromParentList(self, entities, i):
raise xml.dom.NoModificationAllowedErr()
def _addToParentList(self, entities, node):
raise xml.dom.NoModificationAllowedErr()
def _key_helper(self, itemSource):
"Given an item source, return an appropriate key for our mapping"
if itemSource.prefix:
return "%s:%s" % (itemSource.prefix,
itemSource.localName)
else:
return itemSource.localName