[Zope-CVS] CVS: Packages/HTMLStructure - Utility.py:1.1 Validator.py:1.3

Evan Simpson evan@zope.com
Fri, 1 Feb 2002 14:23:58 -0500


Update of /cvs-repository/Packages/HTMLStructure
In directory cvs.zope.org:/tmp/cvs-serv29282

Modified Files:
	Validator.py 
Added Files:
	Utility.py 
Log Message:
Include empty tags in match list.  Put nest_tags() in new Utility module.


=== Added File Packages/HTMLStructure/Utility.py ===
import Wrapper

def filterTagsByNS(pt, ns, matched):
    '''Select 'important' tags from a document.

    Takes a ParsedText, a namespace string, and a list of matched
    (open, close) tags.  It filters out all the tags except those that
    belong to (or have an attribute belonging to) the namespace.
    '''

    tags = Wrapper.WrapTags(pt)
    nspre = ns + ':'
    results = []
    for match in matched:
        tags.goto(match[0])
        if tags.tName().startswith(nspre):
            results.append(match)
        else:
            for name in tags.tAttrNames():
                if name.startswith(nspre):
                    results.append(match)
                    break
    return results

def nest_tags(matched):
    '''Create a nested tag data structure from a list of pairs.'''
    tags = []
    stack = []
    for span in matched:
        tag = [span]
        # Pop tags off of the stack that we're to the right of.
        while stack:
            top = stack[-1]
            if span[0] > top[0][1]:
                stack.pop()
            else:
                top.append(tag)
                break
        else:
            tags.append(tag)
        stack.append(tag)
    return tags



=== Packages/HTMLStructure/Validator.py 1.2 => 1.3 ===
         tags and matching them with opening tags.  When a pair of tags
         matches exactly, the 2-tuple (open, close) of the parse
-        indexes of the matched tags are added to the "matched" list.
+        indexes of the matched tags are added to the "matched"
+        list. Empty tags match themselves.
 
         The parse indexes of unmatched tags are returned in the
         "unclosed" and "unopened" lists.
@@ -48,7 +49,7 @@
         
         unop_names = []
 
-        # Scan non-empty tags in reverse order
+        # Scan tags in reverse order
         tags = Wrapper.WrapTags(self.pt)
         while tags.prev():
             tag = tags.tag
@@ -56,8 +57,10 @@
             ttype = tags.tType(self.is_empty)
             i = tags.index
 
-            if not ttype or ttype == 'empty':
+            if not ttype:
                 pass
+            elif ttype == 'empty':
+                matched.append((i, i))
             elif ttype == 'close':
                 unopened.append(i)
                 unop_names.append(tname)
@@ -169,25 +172,6 @@
         elif mtag is None or uidx < mtag[0]:
             self.scan = uidx + 1
             return uidx
-
-def nest_tags(matched):
-    '''Create a nested tag data structure from a list of pairs.'''
-    tags = []
-    stack = []
-    for span in matched:
-        tag = [span]
-        # Pop tags off of the stack that we're to the right of.
-        while stack:
-            top = stack[-1]
-            if span[0] > top[0][1]:
-                stack.pop()
-            else:
-                top.append(tag)
-                break
-        else:
-            tags.append(tag)
-        stack.append(tag)
-    return tags
 
 def listTest(alist):
     m = {}