[Zope3-checkins] CVS: Zope3/src/zope/app/xml - w3cschemalocations.py:1.7
Fred L. Drake, Jr.
fred@zope.com
Thu, 17 Jul 2003 10:54:51 -0400
Update of /cvs-repository/Zope3/src/zope/app/xml
In directory cvs.zope.org:/tmp/cvs-serv3256
Modified Files:
w3cschemalocations.py
Log Message:
W3CXMLSchemaLocationParser implementation changes:
- startElement(): simplify
- parse(): complexlify: don't pass the whole document to Expat if we
dont' need to; parse it in small chunks instead. This allows a
faster exception propogation from the startElement() method once
we've actually located the data we want
=== Zope3/src/zope/app/xml/w3cschemalocations.py 1.6 => 1.7 ===
--- Zope3/src/zope/app/xml/w3cschemalocations.py:1.6 Tue Jul 1 10:31:05 2003
+++ Zope3/src/zope/app/xml/w3cschemalocations.py Thu Jul 17 10:54:46 2003
@@ -55,7 +55,10 @@
class W3CXMLSchemaLocationParser:
- SCHEMA_INSTANCE_NAMESPACE = 'http://www.w3.org/2001/XMLSchema-instance'
+ CHUNK_SIZE = 1024
+
+ SCHEMA_INSTANCE_NAME = (
+ 'http://www.w3.org/2001/XMLSchema-instance schemaLocation')
def __init__(self, xml):
self._xml = xml
@@ -64,23 +67,25 @@
self._parser.StartElementHandler = self.startElement
def startElement(self, name, attrs):
- for key, value in attrs.items():
- try:
- namespace_uri, name = key.split(' ')
- except ValueError:
- namespace_uri = None
- name = key
- if (namespace_uri == self.SCHEMA_INSTANCE_NAMESPACE and
- name == 'schemaLocation'):
- self._schema_uris = value.strip().split()
+ self._schema_uris = attrs.get(self.SCHEMA_INSTANCE_NAME, '').split()
# abort parsing after the first element, which is the document element
- # raising an error seems to be a legitimate way to do this
+ # raising an error is the best way to exit the parse
raise DoneParsing
def parse(self):
+ start = 0
try:
- self._parser.Parse(self._xml, True)
- except ExpatError, e:
+ # Feed the document to Expat a little bit at a time; this
+ # allows a parse of a well-formed but large document to
+ # exit more quickly once the start tag for the document
+ # element has been found.
+ while 1:
+ text = self._xml[start:start + self.CHUNK_SIZE]
+ if not text:
+ break
+ start += self.CHUNK_SIZE
+ self._parser.Parse(text, False)
+ except ExpatError:
# we do not take any special pains to make sure this is
# well-formed anyway; this should happen at a higher level
# (views) or will be detected at a lower layer (parsing into