[Zope3-checkins] CVS: zopeproducts/xmldom - README.txt:1.1 __init__.py:1.1 core.py:1.1 exceptions.py:1.1 expatbuilder.py:1.1 loadsave.py:1.1 saxbuilder.py:1.1 traversal.py:1.1 xmlextended.py:1.1
Martijn Faassen
m.faassen@vet.uu.nl
Sat, 7 Jun 2003 06:13:52 -0400
Update of /cvs-repository/zopeproducts/xmldom
In directory cvs.zope.org:/tmp/cvs-serv1893
Added Files:
README.txt __init__.py core.py exceptions.py expatbuilder.py
loadsave.py saxbuilder.py traversal.py xmlextended.py
Log Message:
Added beginning of Zope 3 port of ParsedXML's DOM component. See README.txt
for more details.
=== Added File zopeproducts/xmldom/README.txt ===
Documentation
=============
xmldom implements an XML DOM, the DOM standard is the right
place to find documentation:
http://www.w3.org/DOM/
xmldom implements DOM level 2 (in particular Core, and buggily parts
of Traversal), with some experimental level 3 extensions.
http://www.w3.org/DOM/DOMTR#dom2
http://www.w3.org/DOM/DOMTR#dom3
Example
=======
from zopeproducts.xmldom import expatbuilder
from StringIO import StringIO
xml = '<doc>Hello world!</doc>'
f = StringIO(xml)
dom = expatbuilder.parse(f)
Background
==========
xmldom is derived from the Zope 2 ParsedXML product. It only contains
that product's DOM implementation and tests, not any of the Zope
integration (which would be very different in Zope 3 anyway). It has
been changed to work with Zope 3:
* now uses ContextWrappers instead of Zope 2 explicit acquisition.
* uses Python 2.2 properties instead of custom __getattr__ based
access handlers. All kinds of __dict__ workarounds have also been
removed.
* module names are in lower case.
* relicensed to ZPL 2.0.
It passes the (extensive) DOM unit tests which are also included.
Problems
========
The code does not yet contain any Zope3 interface declarations. These
should be added.
The code is extremely grotty in many places. This is in part due to
the requirements of XML conformance and the DOM standard in particular
(which doesn't excel in clean design). It does pass the tests,
however.
Traversal implementation seems to be buggy.
It still exhibits the same fundamental problem as ParsedXML does
concerning ContextWrappers, for more information see
tests/test_contextwrapperpain.py.
=== Added File zopeproducts/xmldom/__init__.py ===
# this is a package
=== Added File zopeproducts/xmldom/core.py === (2153/2253 lines abridged)
##############################################################################
#
# Copyright (c) 2001-2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
__metaclass__ = type
import string
import xml.dom
from zope.context import ContextAware, getWrapperContext,\
getWrapperContainer, getbaseobject, ContainmentIterator
from zope.context.wrapper import setcontext
from zope.app.context import ContextWrapper
# legal qualified name pattern, from PyXML xml/dom/Document.py
# see http://www.w3.org/TR/REC-xml-names/#NT-QName
# we don't enforce namespace usage if using namespaces, which basically
# means that we don't disallow a leading ':'
# XXX there's more to the world than ascii a-z
# FIXME: should allow combining characters: fix when Python gets Unicode
import re
_ok_qualified_name = re.compile('[a-zA-Z_:][\w\.\-_:]*\Z').match
_TupleType = type(())
_StringTypes = (type(''), type(unicode('')))
# http://www.w3.org/TR/1999/REC-xml-names-19990114/#ns-qualnames
def _check_qualified_name(name, uri='ok'):
"test name for well-formedness"
if _ok_qualified_name(name) is not None:
if ":" in name:
parts = name.split(':')
if len(parts) != 2:
raise xml.dom.NamespaceErr("malformed qualified name")
if not (parts[0] and parts[1]):
raise xml.dom.NamespaceErr("malformed qualified name")
if not uri:
raise xml.dom.NamespaceErr("no namespace URI for prefix")
return 1
[-=- -=- -=- 2153 lines omitted -=- -=- -=-]
break
if oldNode is None:
self._addToParentList(attributes, node)
node._set_owner_element(self._parent)
return oldNode
def _delFromParentList(self, attrs, i):
"workhorse for __delitem__; remove ith item from attrs"
del attrs[i] #XXX ownerElement needs to be updated in other refs
self._parent._changed()
def _addToParentList(self, attrs, node):
if self._parent._attributes:
self._parent._attributes.append(node._item)
else:
self._parent._attributes = [node._item]
self._parent._changed()
def _key_helper(self, itemSource):
"given an item source, return an appropriate key for our mapping"
return itemSource[_ATTR_NAME]
# Utility functions for Attrs, used by more than the Attr class.
def _attr_item_match_name(item, name,
_ATTR_NAME=_ATTR_NAME):
"utility function for AttributeMap; return true if name matches item"
return item[_ATTR_NAME] == name
def _attr_item_match_ns(item, (namespaceURI, localName),
_ATTR_NS=_ATTR_NS, _ATTR_LOCALNAME=_ATTR_LOCALNAME):
"utility function for AttributeMap; return true if name matches item"
return (item[_ATTR_LOCALNAME] == localName
and item[_ATTR_NS] == namespaceURI)
def _attr_get_value(nodes):
"utility function to get attr value; concatenate values of list of nodes"
L = []
for node in nodes:
L.append(node.nodeValue)
return ''.join(filter(None, L))
def _attr_set_value(item, value):
"utility function to safely set shared value of attr item"
newChild = Text(value)
del newChild._in_tree
while item[_ATTR_VALUE]:
item[_ATTR_VALUE].pop()
item[_ATTR_VALUE].append(newChild)
=== Added File zopeproducts/xmldom/exceptions.py ===
##############################################################################
#
# Copyright (c) 2001-2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Replacement DOM exceptions to be used if the xml.dom package is not
available.
"""
"""
Why does this module exist?
The Python DOM API defines exceptions that DOM implementations
should use to allow DOM client code to detect errors that can
occur during processing. Since not all client code knows about
the DOM implementation used, all implementations must use shared
exceptions. These are defined in the xml.dom package (in the
package's __init__.py). The xml.dom package is provided as part
of PyXML and Python 2.0.
Since ParsedXML may be used from Python 1.5.2 without having PyXML
or a more recent version of Python available, we need to provide
an alternate implementation. However, DOM client code that works
on DOM instances created elsewhere will still expect to get the
exception classes from xml.dom. Since the code may be part of
third-party packages that know nothing of ParsedXML or Zope, we
need to provide an implementation of xml.dom if it doesn't already
exist.
So how does this module solve the problem?
This module defines the required exception objects and constants
and 'installs' the values in the xml.dom module if they are not
already present. Since the xml.dom module may not exist, or may
pre-date the addition of these exceptions to the standard
implementation (Python 2.0 or PyXML 0.6.2), the modules xml and
xml.dom are created by surgically adding them to sys.modules if
needed, and inserting the required values into an existing xml.dom
module if needed.
This works because of the way the module import machinery works in
Python. sys.modules is a mapping from module name to module
object; sys.modules['sys'] evaluates to the sys module object.
When an import statement is executed, the Python runtime first
looks in sys.modules to retrieve an already-loaded module. The
set of built-in modules and the filesystem are only consulted if
the module has not already been loaded. For modules in packages
(xml.dom), each level of enclosing package is checked before
attempting to load the module; i.e., xml is checked before
xml.dom. This machinery is invoked each time an import is
attempted.
When ParsedXML.DOM is imported, it imports this module. This
first attempts to load the standard xml.dom package. If that
fails (which it is likely to do for Python 1.5.2 without PyXML
installed), this module is an acceptable implementation of
xml.dom, but we still need the xml package. This is created
artificially using the new.module() function and inserted in
sys.modules. Once this is done, this module may be inserted for
the key 'xml.dom', after which attempts to import xml.dom will
provide this module.
If xml.dom is already available, but older than the introduction
of DOMException and its specializations, the implementations
defined here are inserted into it, so that it is extended to match
the more recent version of the interface definition.
What are the limitations of this approach?
Some versions of PyXML may have defined DOMException without
defining the subclasses. The specialized versions of DOMException
were added in PyXML version 0.6.3 (XXX ??). Versions which
contain DOMException but not the specializations will not be
compatible with this module. This should not be a substantial
limitation in the context of Zope.
There is no way to protect against code that imports xml.dom
before ParsedXML.DOM has been imported. Such code will receive an
ImportError. Reloading that code after ParsedXML.DOM is imported
will cause it to work properly.
"""
# These have to be in order:
_CODE_NAMES = [
"INDEX_SIZE_ERR",
"DOMSTRING_SIZE_ERR",
"HIERARCHY_REQUEST_ERR",
"WRONG_DOCUMENT_ERR",
"INVALID_CHARACTER_ERR",
"NO_DATA_ALLOWED_ERR",
"NO_MODIFICATION_ALLOWED_ERR",
"NOT_FOUND_ERR",
"NOT_SUPPORTED_ERR",
"INUSE_ATTRIBUTE_ERR",
"INVALID_STATE_ERR",
"SYNTAX_ERR",
"INVALID_MODIFICATION_ERR",
"NAMESPACE_ERR",
"INVALID_ACCESS_ERR",
]
for i in range(len(_CODE_NAMES)):
globals()[_CODE_NAMES[i]] = i + 1
del i
class DOMException(Exception):
"""Base class for exceptions raised by the DOM."""
def __init__(self, code, *args):
self.code = code
self.args = (code,) + args
Exception.__init__(self, g_errorMessages[code])
if self.__class__ is DOMException:
self.__class__ = g_realExceptions[code]
def _derived_init(self, *args):
"""Initializer method that does not expect a code argument,
for use in derived classes."""
if not args:
args = (self, g_errorMessages[self.code])
else:
args = (self,) + args
apply(Exception.__init__, args)
try:
from xml.dom import DOMException
except ImportError:
pass
import string
_EXCEPTION_NAMES = ["DOMException"]
template = """\
class %s(DOMException):
code = %s
__init__ = _derived_init
"""
g_realExceptions = {}
for s in _CODE_NAMES:
words = string.split(string.lower(s), "_")
ename = string.join(map(string.capitalize, words), "")
exec template % (ename, s)
g_realExceptions[globals()[s]] = globals()[ename]
_EXCEPTION_NAMES.append(ename)
del s, words, ename, string, template
try:
import xml.dom
except ImportError:
# We have to define everything, which we've done above.
# This installs it:
import sys
try:
mod = __import__("xml")
except ImportError:
import new
mod = new.module("xml")
del new
sys.modules["xml"] = mod
import Exceptions
mod.dom = Exceptions
sys.modules["xml.dom"] = Exceptions
del mod, sys
del Exceptions
from Core import Node
else:
# The exception classes may not have been defined, so add any
# that are needed.
import Exceptions
for s in _CODE_NAMES + _EXCEPTION_NAMES:
if not hasattr(xml.dom, s):
setattr(xml.dom, s, getattr(Exceptions, s))
if not hasattr(xml.dom, "Node") or type(xml.dom.Node) is type(Exceptions):
# We need to provide the Node class so the .nodeType constants
# are in the right place.
import Core
xml.dom.Node = Core.Node
del Core
del s, Exceptions
del _CODE_NAMES, _EXCEPTION_NAMES
g_errorMessages = {
INDEX_SIZE_ERR:
"Index error accessing NodeList or NamedNodeMap",
DOMSTRING_SIZE_ERR:
"DOMString exceeds maximum size.",
HIERARCHY_REQUEST_ERR:
"Node manipulation results in invalid parent/child relationship.",
WRONG_DOCUMENT_ERR: "",
INVALID_CHARACTER_ERR: "",
NO_DATA_ALLOWED_ERR: "",
NO_MODIFICATION_ALLOWED_ERR:
"Attempt to modify a read-only attribute.",
NOT_FOUND_ERR: "",
NOT_SUPPORTED_ERR:
"DOM feature not supported.",
INUSE_ATTRIBUTE_ERR:
"Illegal operation on an attribute while in use by an element.",
INVALID_STATE_ERR: "",
SYNTAX_ERR: "",
INVALID_MODIFICATION_ERR: "",
NAMESPACE_ERR:
"Namespace operation results in malformed or invalid name or name declaration.",
INVALID_ACCESS_ERR: "",
}
# To be sure that unused alternate implementations of the DOM
# exceptions are not used by accessing this module directly, import
# the "right" versions over those defined here. They may be the same,
# and they may be from an up-to-date PyXML or Python 2.1 or newer.
# This causes alternate implementations to be discarded if not needed.
from xml.dom import *
=== Added File zopeproducts/xmldom/expatbuilder.py === (767/867 lines abridged)
##############################################################################
#
# Copyright (c) 2001-2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Facility to use the Expat parser to load a ParsedXML.DOM instance
from a string or file."""
# Warning!
#
# This module is tightly bound to the implementation details of the
# Parsed XML DOM and can't be used with other DOM implementations. This
# is due, in part, to a lack of appropriate methods in the DOM (there is
# no way to create Entity and Notation nodes via the DOM Level 2
# interface), and for performance. The later is the cause of some fairly
# cryptic code.
#
# Performance hacks:
#
# - .character_data_handler() has an extra case in which continuing
# data is appended to an existing Text node; this can be a
# substantial speedup since Expat seems to break data at every
# newline.
#
# - Determining that a node exists is done using an identity comparison
# with None rather than a truth test; this avoids searching for and
# calling any methods on the node object if it exists. (A rather
# nice speedup is achieved this way as well!)
import string
import core
import xmlextended
from xml.parsers import expat
class Options:
"""Features object that has variables set for each DOMBuilder feature.
[-=- -=- -=- 767 lines omitted -=- -=- -=-]
def default_handler(self, s):
self.subset.append(s)
def parse(file, namespaces=1):
"""Parse a document, returning the resulting Document node.
'file' may be either a file name or an open file object.
"""
if namespaces:
builder = ExpatBuilderNS()
else:
builder = ExpatBuilder()
if isinstance(file, type('')):
fp = open(file, 'rb')
result = builder.parseFile(fp)
fp.close()
else:
result = builder.parseFile(file)
return result
def parseFragment(file, context, namespaces=1):
"""Parse a fragment of a document, given the context from which it was
originally extracted. context should be the parent of the node(s) which
are in the fragment.
'file' may be either a file name or an open file object.
"""
if namespaces:
builder = FragmentBuilderNS(context)
else:
builder = FragmentBuilder(context)
if isinstance(file, type('')):
fp = open(file, 'rb')
result = builder.parseFile(fp)
fp.close()
else:
result = builder.parseFile(file)
return result
def makeBuilder(options):
"""Create a builder based on an Options object."""
if options.namespaces:
return ExpatBuilderNS(options)
else:
return ExpatBuilder(options)
=== Added File zopeproducts/xmldom/loadsave.py ===
##############################################################################
#
# Copyright (c) 2001-2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Implementation of the DOM Level 3 'Load' feature."""
import core
import expatbuilder
import copy
import string
import xml.dom
__all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"]
class DOMBuilder:
_entityResolver = None
_errorHandler = None
_filter = None
def __init__(self):
self._options = ExpatBuilder.Options()
def _get_entityResolver(self):
return self._entityResolver
def _set_entityResolver(self, entityResolver):
self._entityResolver
entityResolver = property(_get_entityResolver, _set_entityResolver)
def _get_errorHandler(self):
return self._errorHandler
def _set_errorHandler(self, errorHandler):
self._errorHandler = errorHandler
errorHandler = property(_get_errorHandler, _set_errorHandler)
def _get_filter(self):
return self._filter
def _set_filter(self, filter):
self._filter = filter
filter = property(_get_filter, _set_filter)
def setFeature(self, name, state):
if self.supportsFeature(name):
try:
settings = self._settings[(_name_xform(name), state)]
except KeyError:
raise xml.dom.NotSupportedErr(
"unsupported feature: " + `name`)
else:
for name, value in settings:
setattr(self._options, name, value)
else:
raise xml.dom.NotFoundErr("unknown feature: " + `name`)
def supportsFeature(self, name):
return hasattr(self._options, _name_xform(name))
def canSetFeature(self, name, state):
key = (_name_xform(name), state and 1 or 0)
return self._settings.has_key(key)
_settings = {
("namespaces", 0): [("namespaces", 0)],
("namespaces", 1): [("namespaces", 1)],
("namespace_declarations", 0): [("namespace_declarations", 0)],
("namespace_declarations", 1): [("namespace_declarations", 1)],
("validation", 0): [("validation", 0)],
("external_general_entities", 0): [("external_general_entities", 0)],
("external_general_entities", 1): [("external_general_entities", 1)],
("external_parameter_entities", 0): [("external_parameter_entities", 0)],
("external_parameter_entities", 1): [("external_parameter_entities", 1)],
("validate_if_cm", 0): [("validate_if_cm", 0)],
("create_entity_ref_nodes", 0): [("create_entity_ref_nodes", 0)],
("create_entity_ref_nodes", 1): [("create_entity_ref_nodes", 1)],
("entity_nodes", 0): [("create_entity_ref_nodes", 0),
("entity_nodes", 0)],
("entity_nodes", 1): [("entity_nodes", 1)],
("white_space_in_element_content", 0):
[("white_space_in_element_content", 0)],
("white_space_in_element_content", 1):
[("white_space_in_element_content", 1)],
("cdata_nodes", 0): [("cdata_nodes", 0)],
("cdata_nodes", 1): [("cdata_nodes", 1)],
("comments", 0): [("comments", 0)],
("comments", 1): [("comments", 1)],
("charset_overrides_xml_encoding", 0):
[("charset_overrides_xml_encoding", 0)],
("charset_overrides_xml_encoding", 1):
[("charset_overrides_xml_encoding", 1)],
}
def getFeature(self, name):
try:
return getattr(self._options, _name_xform(name))
except AttributeError:
raise xml.dom.NotFoundErr()
def parseURI(self, uri):
if self.entityResolver:
input = self.entityResolver.resolveEntity(None, uri)
else:
input = DOMEntityResolver().resolveEntity(None, uri)
return self.parseDOMInputSource(input)
def parseDOMInputSource(self, input):
options = copy.copy(self._options)
options.filter = self.filter
options.errorHandler = self.errorHandler
fp = input.byteStream
if fp is None and options.systemId:
import urllib
fp = urllib.urlopen(input.systemId)
builder = ExpatBuilder.makeBuilder(options)
return builder.parseFile(fp)
class DOMEntityResolver(core.DOMImplementation):
def resolveEntity(self, publicId, systemId):
source = DOMInputSource()
source.publicId = publicId
source.systemId = systemId
if systemId:
import urllib
self.byteStream = urllib.urlopen(systemId)
# Should parse out the content-type: header to
# get charset information so that we can set the
# encoding attribute on the DOMInputSource.
return source
class DOMInputSource:
byteStream = None
characterStream = None
encoding = None
publicId = None
systemId = None
def _get_byteStream(self):
return self._byteStream
def _set_byteStream(self, byteStream):
self._byteStream = byteStream
byteStream = property(_get_byteStream, _set_byteStream)
def _get_characterStream(self):
return self._characterStream
def _set_characterStream(self, characterStream):
self._characterStream = characterStream
characterStream = property(_get_characterStream, _set_characterStream)
def _get_encoding(self):
return self._encoding
def _set_encoding(self, encoding):
self._encoding = encoding
encoding = property(_get_encoding, _set_encoding)
def _get_publicId(self):
return self._publicId
def _set_publicId(self, publicId):
self._publicId = publicId
publicId = property(_get_publicId, _set_publicId)
def _get_systemId(self):
return self._systemId
def _set_systemId(self, systemId):
self._systemId = systemId
systemId = property(_get_systemId, _set_systemId)
class DOMBuilderFilter:
"""Element filter which can be used to tailor construction of
a DOM instance.
"""
# There's really no need for this class; concrete implementations
# should just implement the endElement() method as appropriate.
def endElement(self, element):
# Why this method is supposed to return anything at all
# is a mystery; the result doesn't appear to be used.
return 1
def _name_xform(name):
return string.replace(string.lower(name), '-', '_')
=== Added File zopeproducts/xmldom/saxbuilder.py ===
##############################################################################
#
# Copyright (c) 2001-2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Constructor for ParsedXML.DOM, based on a SAX parser."""
import os
import urllib
import xml.sax
class SAXBuilder(xml.sax.ContentHandler):
_locator = None
document = None
documentElement = None
def __init__(self, documentFactory=None):
self.documentFactory = documentFactory
self._ns_contexts = [{}] # contains uri -> prefix dicts
self._current_context = self._ns_contexts[-1]
def install(self, parser):
parser.setContentHandler(self)
def setDocumentLocator(self, locator):
self._locator = locator
def startPrefixMapping(self, prefix, uri):
self._ns_contexts.append(self._current_context.copy())
self._current_context[uri] = prefix or None
def endPrefixMapping(self, prefix):
self._current_context = self._ns_contexts.pop()
def _make_qname(self, uri, localname, tagname):
# When using namespaces, the reader may or may not
# provide us with the original name. If not, create
# *a* valid tagName from the current context.
if uri:
if tagname is None:
prefix = self._current_context.get(uri)
if prefix:
tagname = "%s:%s" % (prefix, localname)
else:
tagname = localname
else:
tagname = localname
return tagname
def startElementNS(self, name, tagName, attrs):
uri, localname = name
tagName = self._make_qname(uri, localname, tagName)
if not self.document:
factory = self.documentFactory
self.document = factory.createDocument(uri or None, tagName, None)
node = self.document.documentElement
else:
if uri:
node = self.document.createElementNS(uri, tagName)
else:
node = self.document.createElement(localname)
self.curNode.appendChild(node)
self.curNode = node
for aname, value in attrs.items():
a_uri, a_localname = aname
if a_uri:
qname = "%s:%s" % (self._current_context[a_uri], a_localname)
node.setAttributeNS(a_uri, qname, value)
else:
attr = self.document.createAttribute(a_localname)
node.setAttribute(a_localname, value)
def endElementNS(self, name, tagName):
self.curNode = self.curNode.parentNode
def startElement(self, name, attrs):
if self.documentElement is None:
factory = self.documentFactory
self.document = factory.createDocument(None, name, None)
node = self.document.documentElement
self.documentElement = 1
else:
node = self.document.createElement(name)
self.curNode.appendChild(node)
self.curNode = node
for aname, value in attrs.items():
node.setAttribute(aname, value)
def endElement(self, name):
self.curNode = self.curNode.parentNode
def comment(self, s):
node = self.document.createComment(s)
self.curNode.appendChild(node)
def processingInstruction(self, target, data):
node = self.document.createProcessingInstruction(target, data)
self.curNode.appendChild(node)
def ignorableWhitespace(self, chars):
node = self.document.createTextNode(chars)
self.curNode.appendChild(node)
def characters(self, chars):
node = self.document.createTextNode(chars)
self.curNode.appendChild(node)
def parse(file, namespaces=1, dom=None, parser=None):
if not parser:
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_namespaces, namespaces)
if not dom:
import Core
dom = Core.theDOMImplementation
if isinstance(file, type('')):
try:
fp = open(file)
except IOError, e:
if e.errno != errno.ENOENT:
raise
fp = urllib.urlopen(file)
systemId = file
else:
# Ugh! Why doesn't urllib.pathname2url() do something useful?
systemId = "file://" + os.path.abspath(file)
else:
source = xml.sax.InputSource()
fp = file
try:
systemId = file.name
except AttributeError:
systemId = None
source = xml.sax.InputSource(file)
source.setByteStream(fp)
source.setSystemId(systemId)
builder = SAXBuilder(documentFactory=dom)
builder.install(parser)
parser.parse(source)
if fp is not file:
fp.close()
return builder.document
=== Added File zopeproducts/xmldom/traversal.py ===
##############################################################################
#
# Copyright (c) 2001-2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Implementation of DOM Level 2 Traversal.
Based on the W3C recommendation at:
http://www.w3.org/TR/DOM-Level-2-Traversal-Range/
"""
# This code could be sped up.
# - uses DOM methods, could use implementation internals, esp. childNodes
# - if we had mutation events, NodeIterator could build an array as it
# iterated, and move over that, only updating on mutation
import xml.dom
__all__ = [
"NodeFilter",
"NodeIterator",
"TreeWalker",
]
class NodeFilter:
# Constants returned by acceptNode():
FILTER_ACCEPT = 1
FILTER_REJECT = 2
FILTER_SKIP = 3
# Constants for whatToShow:
SHOW_ALL = 0xFFFFFFFF
SHOW_ELEMENT = 0x00000001
SHOW_ATTRIBUTE = 0x00000002
SHOW_TEXT = 0x00000004
SHOW_CDATA_SECTION = 0x00000008
SHOW_ENTITY_REFERENCE = 0x00000010
SHOW_ENTITY = 0x00000020
SHOW_PROCESSING_INSTRUCTION = 0x00000040
SHOW_COMMENT = 0x00000080
SHOW_DOCUMENT = 0x00000100
SHOW_DOCUMENT_TYPE = 0x00000200
SHOW_DOCUMENT_FRAGMENT = 0x00000400
SHOW_NOTATION = 0x00000800
def acceptNode(self, node):
# Just accept everything by default:
return NodeFilter.FILTER_ACCEPT
_whatToShow_bits = (
(xml.dom.Node.ELEMENT_NODE, NodeFilter.SHOW_ELEMENT),
(xml.dom.Node.ATTRIBUTE_NODE, NodeFilter.SHOW_ATTRIBUTE),
(xml.dom.Node.TEXT_NODE, NodeFilter.SHOW_TEXT),
(xml.dom.Node.CDATA_SECTION_NODE, NodeFilter.SHOW_CDATA_SECTION),
(xml.dom.Node.ENTITY_REFERENCE_NODE, NodeFilter.SHOW_ENTITY_REFERENCE),
(xml.dom.Node.ENTITY_NODE, NodeFilter.SHOW_ENTITY),
(xml.dom.Node.PROCESSING_INSTRUCTION_NODE,
NodeFilter.SHOW_PROCESSING_INSTRUCTION),
(xml.dom.Node.COMMENT_NODE, NodeFilter.SHOW_COMMENT),
(xml.dom.Node.DOCUMENT_NODE, NodeFilter.SHOW_DOCUMENT),
(xml.dom.Node.DOCUMENT_TYPE_NODE, NodeFilter.SHOW_DOCUMENT_TYPE),
(xml.dom.Node.DOCUMENT_FRAGMENT_NODE, NodeFilter.SHOW_DOCUMENT_FRAGMENT),
(xml.dom.Node.NOTATION_NODE, NodeFilter.SHOW_NOTATION),
)
class AccessorBase:
def __init__(self, root, whatToShow, filter, entityReferenceExpansion):
if root is None:
raise xml.dom.NotSupportedErr(
"root of traversal object can't be None")
d = self.__dict__
d['root'] = root
d['whatToShow'] = whatToShow
d['filter'] = filter
d['expandEntityReferences'] = entityReferenceExpansion
#
# Decode the whatToShow flags for faster tests; the W3C
# reserves the first 200 NodeType values, but the whatToShow
# flags have to fit in 32 bits (leave slot 0 empty since it's
# not a valid NodeType).
#
d['_whatToShow'] = what = [0] * 33
for nodeType, bit in _whatToShow_bits:
what[nodeType] = whatToShow & bit
def __setattr__(self, name, value):
setter = getattr(self, '_set_' + name, None)
if setter is None:
getter = getattr(self, '_get_' + name, None)
if getter:
raise xml.dom.NoModificationAllowedErr(
"read-only attribute: " + `name`)
else:
raise AttributeError, "no such attribute: " + `name`
setter(value)
def _get_root(self):
return self.root
def _get_whatToShow(self):
return self.whatToShow
def _get_filter(self):
return filter
def _get_expandEntityReferences(self):
return self.expandEntityReferences
def _should_show(self, node):
if not self._whatToShow[node.nodeType]:
return NodeFilter.FILTER_SKIP
else:
if ( node.nodeType == xml.dom.Node.ENTITY_REFERENCE_NODE
and not self.expandEntityReferences):
return NodeFilter.FILTER_REJECT
elif self.filter is not None:
return self._filterNode(node)
return NodeFilter.FILTER_ACCEPT
def _nextInTree(self, node):
"""Return first visible node in node's subtree, or None."""
# check given node first
if self._should_show(node) == NodeFilter.FILTER_ACCEPT:
return node
elif self._should_show(node) == NodeFilter.FILTER_REJECT:
return None
for c in node.childNodes:
child = self._nextInTree(c)
if child:
return child
if c.isSameNode(self.root): # don't leave root subtree
return None
return None
def _lastInTree(self, node):
"""Return last visible node in node's subtree, or None."""
if self._should_show(node) == NodeFilter.FILTER_REJECT:
return None
childNodes = node.childNodes
childNums = range(childNodes.length)
childNums.reverse()
for c in childNums:
childNode = childNodes[c]
child = self._lastInTree(childNode)
if child:
return child
if childNode.isSameNode(self.root): # don't leave root subtree
return None
# subtree exhausted, check given node
if self._should_show(node) == NodeFilter.FILTER_ACCEPT:
return node
return None
# we don't do any visibilty tests here, _nextInTree does.
def _nextNode(self, startNode):
"""Return next visible node after startNode, or None."""
# check children
for child in startNode.childNodes:
node = self._nextInTree(child)
if node:
return node
if child.isSameNode(self.root): # don't leave root subtree
return None
# check next siblings
sib = startNode.nextSibling
while sib:
node = self._nextInTree(sib)
if node:
return node
if sib.isSameNode(self.root): # don't leave root subtree
return None
sib = sib.nextSibling
# check ancestors' next siblings; don't visit ancestors
ancestor = startNode.parentNode
while ancestor:
sib = ancestor.nextSibling
while sib:
node = self._nextInTree(sib)
if node:
return node
if sib.isSameNode(self.root): # don't leave root subtree
return None
sib = sib.nextSibling
# no visible nodes in siblings or subtrees of this ancestor
if ancestor.isSameNode(self.root):
# don't leave root subtree
return None
ancestor = ancestor.parentNode
return None
# we *do* a visibilty test here, _lastInTree does too.
def _previousNode(self, startNode):
"""Return the previous visible node after startNode, or None."""
# check previous siblings
sib = startNode.previousSibling
while sib:
node = self._lastInTree(sib)
if node:
return node
if sib.isSameNode(self.root): # don't leave root subtree
return None
sib = sib.previousSibling
# check ancestors, then ancestors' previous siblings
ancestor = startNode.parentNode
while ancestor:
if self._should_show(ancestor) == NodeFilter.FILTER_ACCEPT:
return ancestor
sib = ancestor.previousSibling
while sib:
node = self._lastInTree(sib)
if node:
return node
if sib.isSameNode(self.root): # don't leave root subtree
return None
sib = sib.previousSibling
if ancestor.isSameNode(self.root):
# don't leave root subtree
return None
ancestor = ancestor.parentNode
return None
# Since we don't need to know about structure, we could probably be a lot
# faster if we kept a list of nodes in document order and updated
# it when we got a mutation event - once we have mutation events.
class NodeIterator(AccessorBase):
BEFORE_NODE = 1 # iterator crossed reference node moving forward
AFTER_NODE = 0 # iterator crossed reference node moving backward
def __init__(self, root, whatToShow=NodeFilter.SHOW_ALL, filter=None,
entityReferenceExpansion=1):
AccessorBase.__init__(self, root, whatToShow,
filter, entityReferenceExpansion)
self.__dict__['_refNode'] = None
self.__dict__['_refPos'] = NodeIterator.BEFORE_NODE
def detach(self):
self.__dict__['root'] = None
def nextNode(self):
if self.root is None:
raise xml.dom.InvalidStateErr(
"can't iterate using a detached NodeIterator")
if self._refNode == None:
self.__dict__['_refNode'] = self.root
self.__dict__['_refPos'] = NodeIterator.AFTER_NODE
if self._should_show(self._refNode) == NodeFilter.FILTER_ACCEPT:
return self._refNode
elif self._refPos == NodeIterator.BEFORE_NODE:
if self._should_show(self._refNode) == NodeFilter.FILTER_ACCEPT:
self.__dict__['_refPos'] = NodeIterator.AFTER_NODE
return self._refNode
node = AccessorBase._nextNode(self, self._refNode)
if node:
self.__dict__['_refNode'] = node
self.__dict__['_refPos'] = NodeIterator.AFTER_NODE
return node
def previousNode(self):
if self.root is None:
raise xml.dom.InvalidStateErr(
"can't iterate using a detached NodeIterator")
if self._refNode == None:
self.__dict__['_refNode'] = self.root
self.__dict__['_refPos'] = NodeIterator.BEFORE_NODE
elif self._refPos == NodeIterator.AFTER_NODE:
if self._should_show(self._refNode) == NodeFilter.FILTER_ACCEPT:
self.__dict__['_refPos'] = NodeIterator.BEFORE_NODE
return self._refNode
node = AccessorBase._previousNode(self, self._refNode)
if node:
self.__dict__['_refNode'] = node
self.__dict__['_refPos'] = NodeIterator.BEFORE_NODE
return node
def __getitem__(self, index):
node = self.nextNode()
if node is None:
raise IndexError, "NodeIterator index out of range"
return node
def _filterNode(self, node):
"""Return what the filter says to do with node,
translating reject into skip"""
filterAction = self.filter.acceptNode(node)
if filterAction == NodeFilter.FILTER_REJECT:
return NodeFilter.FILTER_SKIP
return filterAction
class TreeWalker(AccessorBase):
def __init__(self, root, whatToShow=NodeFilter.SHOW_ALL, filter=None,
entityReferenceExpansion=1):
AccessorBase.__init__(self, root, whatToShow,
filter, entityReferenceExpansion)
self.__dict__['currentNode'] = root
def _get_currentNode(self):
return self.currentNode
def _set_currentNode(self, node):
if node is None:
raise xml.dom.NotSupportedErr("can't set current node to None")
self.__dict__['currentNode'] = node
def parentNode(self):
if self.root.isSameNode(self.currentNode):
return None
node = self.currentNode.parentNode
while node is not None and (
self._should_show(node) != NodeFilter.FILTER_ACCEPT):
if node.isSameNode(self.root):
# can't step any further up
return
else:
node = node.parentNode
if node is not None:
self.__dict__['currentNode'] = node
return node
def firstChild(self):
node = self.currentNode.firstChild
while node is not None and (
self._should_show(node) != NodeFilter.FILTER_ACCEPT):
node = node.nextSibling
if node is not None:
self.__dict__['currentNode'] = node
return node
def lastChild(self):
node = self.currentNode.lastChild
while node is not None and (
self._should_show(node) != NodeFilter.FILTER_ACCEPT):
node = node.previousSibling
if node is not None:
self.__dict__['currentNode'] = node
return node
# the rec doesn't say that *Sibling should pay attention to root!
def previousSibling(self):
node = self.currentNode.previousSibling
while node is not None and (
self._should_show(node) != NodeFilter.FILTER_ACCEPT):
node = node.previousSibling
if node is not None:
self.__dict__['currentNode'] = node
return node
def nextSibling(self):
node = self.currentNode.nextSibling
while node is not None and (
self._should_show(node) != NodeFilter.FILTER_ACCEPT):
node = node.nextSibling
if node is not None:
self.__dict__['currentNode'] = node
return node
# TreeWalkers don't move if there is no visible next or previous,
# so we do nothing for a None return.
def nextNode(self):
node = AccessorBase._nextNode(self, self.currentNode)
if node:
self.__dict__['currentNode'] = node
return node
def previousNode(self):
node = AccessorBase._previousNode(self, self.currentNode)
if node:
self.__dict__['currentNode'] = node
return node
def _filterNode(self, node):
"""Return what the filter says to do with node."""
return self.filter.acceptNode(node)
=== Added File zopeproducts/xmldom/xmlextended.py ===
##############################################################################
#
# Copyright (c) 2001-2003 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Acquisition-based implementation of the DOM 'XML' feature classes."""
import core
from core import DOMProperty
import xml.dom
from zope.app.context import ContextWrapper
class CDATASection(core.Text):
_nodeName = "#cdata-section"
_nodeType = core.Node.CDATA_SECTION_NODE
class Identified:
"""Mix-in class that supports the publicId and systemId attributes."""
def _identified_mixin_init(self, publicId, systemId):
self._publicId = publicId
self._systemId = systemId
def _get_publicId(self):
return self._publicId
publicId = DOMProperty(_get_publicId)
def _get_systemId(self):
return self._systemId
systemId = DOMProperty(_get_systemId)
class Entity(Identified, core.Parentless, core.Node):
_nodeType = core.Node.ENTITY_NODE
_readonly = 1
_in_tree = 0
_allowed_child_types = (core.Node.ELEMENT_NODE,
core.Node.PROCESSING_INSTRUCTION_NODE,
core.Node.COMMENT_NODE,
core.Node.TEXT_NODE,
core.Node.CDATA_SECTION_NODE,
core.Node.ENTITY_REFERENCE_NODE)
def __init__(self, name, publicId, systemId, notationName):
self._identified_mixin_init(publicId, systemId)
self._nodeName = name
self._notationName = notationName
def _cloneNode(self, deep, mutable, document):
# force children to not to acquire mutability:
return core.Node._cloneNode(self, deep, 0, document)
def _get_notationName(self):
return self._notationName
notationName = DOMProperty(_get_notationName)
# DOM Level 3 (Working Draft, 01 Sep 2000)
# I expect some or all of these will become read-only before the
# recommendation is finished.
_actualEncoding = None
_encoding = None
_version = None
def _get_actualEncoding(self):
return self._actualEncoding
def _set_actualEncoding(self, value):
self._actualEncoding = value
actualEncoding = property(_get_actualEncoding, _set_actualEncoding)
def _get_encoding(self):
return self._encoding
def _set_encoding(self, value):
self._encoding = value # XXX
encoding = property(_get_encoding, _set_encoding)
def _get_version(self):
return self.version
def _set_version(self, value):
self.__version = value
version = property(_get_version, _set_version)
class EntityReference(core.Node):
_nodeType = core.Node.ENTITY_REFERENCE_NODE
_readonly = 1
_allowed_child_types = (core.Node.ELEMENT_NODE,
core.Node.PROCESSING_INSTRUCTION_NODE,
core.Node.COMMENT_NODE,
core.Node.TEXT_NODE,
core.Node.CDATA_SECTION_NODE,
core.Node.ENTITY_REFERENCE_NODE)
def __init__(self, name):
self._in_tree = 0
self._nodeName = name
class Notation(Identified, core.Childless, core.Parentless, core.Node):
_nodeType = core.Node.NOTATION_NODE
_readonly = 1
def __init__(self, name, publicId, systemId):
self._identified_mixin_init(publicId, systemId)
self._in_tree = 0
self._nodeName = name
def _cloneNode(self, deep, mutable, document):
# force children to not to acquire mutability:
return core.Node._cloneNode(self, deep, 0, document)
# DOM Level 3 (working draft, 5 June 2001)
def _get_textContent(self):
return ''
textContent = ''
class ProcessingInstruction(core.Childless, core.Node):
_nodeType = core.Node.PROCESSING_INSTRUCTION_NODE
def __init__(self, target, data):
self._in_tree = 0
self._nodeName = target
self._target = target
self._nodeValue = data
self._data = data
def _get_data(self):
return self._data
def _set_data(self, data):
if self._readonly:
raise xml.dom.NoModificationAllowedErr()
if self._data != data:
self._data = data
self._nodeValue = data
self._changed()
data = DOMProperty(_get_data, _set_data)
_get_nodeValue = _get_data
_set_nodeValue = _set_data
nodeValue = DOMProperty(_get_nodeValue, _set_nodeValue)
def _get_target(self):
return self._target
target = DOMProperty(_get_target)
# DOM Level 3 (working draft, 5 June 2001)
def _get_textContent(self):
return self._nodeValue
textContent = DOMProperty(_get_textContent)
class DocumentType(Identified, core.Childless, core.Node):
_nodeType = core.Node.DOCUMENT_TYPE_NODE
_nodeValue = None
_internalSubset = None
def __init__(self, qualifiedName, publicId, systemId):
self._identified_mixin_init(publicId, systemId)
self._name = qualifiedName
self._nodeName = qualifiedName
self._entities = []
self._notations = []
self._in_tree = 0
def _get_internalSubset(self):
return self._internalSubset
internalSubset = DOMProperty(_get_internalSubset)
def _get_name(self):
return self._name
name = DOMProperty(_get_name)
_get_nodeName = _get_name
nodeName = DOMProperty(_get_nodeName)
def _get_nodeValue(self):
pass # XXX ?
def _set_nodeValue(self, data):
return
nodeValue = DOMProperty(_get_nodeValue, _set_nodeValue)
def _get_entities(self):
return OwnedEntityMap(self, '_entities')
entities = DOMProperty(_get_entities)
def _get_notations(self):
return OwnedEntityMap(self, '_notations')
notations = DOMProperty(_get_notations)
def isSupported(self, feature, version):
doc = self.ownerDocument
if doc:
impl = doc.implementation
else:
impl = core.theDOMImplementation
return impl.hasFeature(feature, version)
# DOM Level 3 (working draft, 5 June 2001)
def _get_textContent(self):
return ''
textContent = ''
class OwnedEntityMap(core.MapFromParent):
"""
NamedNodeMap that works on the entity or notation structure
of a DocumentType.
"""
def __init__(self, parent, listName):
core.MapFromParent.__init__(self, parent)
self._parentListName = listName
def _item_helper(self, itemSource):
"used by item; create an Attribute from the item and return it"
# XXX is ownerDocument ok with this?
#itemSource.__dict__['ownerDocument'] = self._parent
return ContextWrapper(itemSource, self._parent)
def _nameMatcher(self, itemSource, name):
return itemSource.nodeName == name
def _nsMatcher(self, itemSource, namespaceURI, localName):
return (itemSource.namespaceURI == namespaceURI
and itemSource.localName == localName)
def _set_named_item(self, name, matcher, node):
raise xml.dom.NoModificationAllowedErr()
def _delFromParentList(self, entities, i):
raise xml.dom.NoModificationAllowedErr()
def _addToParentList(self, entities, node):
raise xml.dom.NoModificationAllowedErr()
def _key_helper(self, itemSource):
"Given an item source, return an appropriate key for our mapping"
if itemSource.prefix:
return "%s:%s" % (itemSource.prefix,
itemSource.localName)
else:
return itemSource.localName