[Zope3-checkins] CVS: Zope3/src/zope/app/xml - w3cschemalocations.py:1.7

Fred L. Drake, Jr. fred@zope.com
Thu, 17 Jul 2003 10:54:51 -0400


Update of /cvs-repository/Zope3/src/zope/app/xml
In directory cvs.zope.org:/tmp/cvs-serv3256

Modified Files:
	w3cschemalocations.py 
Log Message:
W3CXMLSchemaLocationParser implementation changes:
- startElement(): simplify
- parse(): complexlify: don't pass the whole document to Expat if we
  dont' need to; parse it in small chunks instead.  This allows a
  faster exception propogation from the startElement() method once
  we've actually located the data we want


=== Zope3/src/zope/app/xml/w3cschemalocations.py 1.6 => 1.7 ===
--- Zope3/src/zope/app/xml/w3cschemalocations.py:1.6	Tue Jul  1 10:31:05 2003
+++ Zope3/src/zope/app/xml/w3cschemalocations.py	Thu Jul 17 10:54:46 2003
@@ -55,7 +55,10 @@
 
 class W3CXMLSchemaLocationParser:
 
-    SCHEMA_INSTANCE_NAMESPACE = 'http://www.w3.org/2001/XMLSchema-instance'
+    CHUNK_SIZE = 1024
+
+    SCHEMA_INSTANCE_NAME = (
+        'http://www.w3.org/2001/XMLSchema-instance schemaLocation')
 
     def __init__(self, xml):
         self._xml = xml
@@ -64,23 +67,25 @@
         self._parser.StartElementHandler = self.startElement
 
     def startElement(self, name, attrs):
-        for key, value in attrs.items():
-            try:
-                namespace_uri, name = key.split(' ')
-            except ValueError:
-                namespace_uri = None
-                name = key
-            if (namespace_uri == self.SCHEMA_INSTANCE_NAMESPACE and
-                name == 'schemaLocation'):
-                self._schema_uris = value.strip().split()
+        self._schema_uris = attrs.get(self.SCHEMA_INSTANCE_NAME, '').split()
         # abort parsing after the first element, which is the document element
-        # raising an error seems to be a legitimate way to do this
+        # raising an error is the best way to exit the parse
         raise DoneParsing
 
     def parse(self):
+        start = 0
         try:
-            self._parser.Parse(self._xml, True)
-        except ExpatError, e:
+            # Feed the document to Expat a little bit at a time; this
+            # allows a parse of a well-formed but large document to
+            # exit more quickly once the start tag for the document
+            # element has been found.
+            while 1:
+                text = self._xml[start:start + self.CHUNK_SIZE]
+                if not text:
+                    break
+                start += self.CHUNK_SIZE
+                self._parser.Parse(text, False)
+        except ExpatError:
             # we do not take any special pains to make sure this is
             # well-formed anyway; this should happen at a higher level
             # (views) or will be detected at a lower layer (parsing into