[Zope-Checkins] CVS: Zope2 - StructuredText.py:1.34.8.3

Andreas Jung andreas@dhcp165.digicool.com
Mon, 23 Apr 2001 16:04:12 -0400


Update of /cvs-repository/Zope2/lib/python/StructuredText
In directory yetix:/work/sandboxes/ajung-2_4-ts_regex-exterminiation-branch/lib/python/StructuredText

Modified Files:
      Tag: ajung-2_4-ts_regex-exterminiation-branch
	StructuredText.py 
Log Message:
cleanup of several suspicious regex stuff



--- Updated File StructuredText.py in package Zope2 --
--- StructuredText.py	2001/04/19 12:49:53	1.34.8.2
+++ StructuredText.py	2001/04/23 20:04:11	1.34.8.3
@@ -205,11 +205,13 @@
 
 '''
 
-import ts_regex_old as ts_regex
-import  regex
+import ts_regex
+import regex
+from ts_regex import gsub
 from string import split, join, strip, find
-import re
+import string,re
 
+
 def untabify(aString,
              indent_tab=ts_regex.compile('\(\n\|^\)\( *\)\t').search_group,
              ):
@@ -306,8 +308,9 @@
     ROW=' <TR>\n%s </TR>\n'
     TABLE='\n<TABLE BORDER=1 CELLPADDING=2>\n%s</TABLE>'
     
-    def create(self,aPar,td=ts_regex.compile(
-        '[ \t\n]*||\([^\0|]*\)').match_group):
+    def create(self,aPar,
+        td_reg=re.compile(r'[ \t\n]*\|\|([^\0x00|]*)')
+        ):
         '''parses a table and returns nested list representing the
         table'''
         self.table=[]
@@ -315,11 +318,12 @@
         for line in text:
             row=[]
             while 1:
-                pos=td(line,(1,))
-                if not pos:return 0
-                row.append(pos[1])
-                if pos[0]==len(line):break
-                line=line[pos[0]:]
+                mo =  td_reg.match(line)
+                if not mo: return 0
+                pos = mo.end(1)
+                row.append(mo.group(1))
+                if pos==len(line):break
+                line=line[pos:]
             self.table.append(row)
         return 1
 
@@ -339,9 +343,6 @@
             htmltable.append(self.ROW%join(htmlrow,''))
         return self.TABLE%join(htmltable,'')
 
-optional_trailing_punctuation = '\(,\|\([.:?;]\)\)?'
-trailing_space = '\([\0- ]\)'
-not_punctuation_or_whitespace = "[^-,.?:\0- ]"
 table=Table()
 
 class StructuredText:
@@ -364,25 +365,25 @@
           aStructuredString -- The string to be parsed.
           level -- The level of top level headings to be created.
         '''
+
+
+        pat = '\"([%s]*?)\":' % string.printable + \
+              '([-:%s0-9_,./?=@#~&]*?)' % string.letters + \
+              '([.:?;] )' 
+
+        p_reg = re.compile(pat,re.M)
+                
+        aStructuredString = p_reg.sub(r'<a href="\2">\1</a>\3 ' , aStructuredString)
+
+        pat = '\"([%s]*?)\", ' % string.printable + \
+              '([-:%s0-9_,./?=@#~&]*?)' % string.letters + \
+              '([.:?;] )' 
 
-        aStructuredString = re.sub(
-            '\"([^\"\0]+)\":'         # title: <"text":>
-            + ('([-:a-zA-Z0-9_,./?=@#~&]+%s)'
-               % not_punctuation_or_whitespace)
-            + optional_trailing_punctuation
-            + trailing_space,
-            '<a href="\\2">\\1</a>\\4\\5\\6',
-            aStructuredString)
-
-        aStructuredString = re.sub(
-            '\"([^\"\0]+)\",[\0- ]+'            # title: <"text", >
-            + ('([a-zA-Z]*:[-:a-zA-Z0-9_,./?=@#~&]*%s)'
-               % not_punctuation_or_whitespace)
-            + optional_trailing_punctuation
-            + trailing_space,
-            '<a href="\\2">\\1</a>\\4\\5\\6',
-            aStructuredString)
+        p_reg = re.compile(pat,re.M)
 
+        aStructuredString = p_reg.sub(r'<a href="\2">\1</a>\3 ' , aStructuredString)
+
+
         protoless = find(aStructuredString, '<a href=":')
         if protoless != -1:
             aStructuredString = re.sub('<a href=":', '<a href="',
@@ -391,7 +392,6 @@
         self.level=level
         paragraphs=ts_regex.split(untabify(aStructuredString),
                                   paragraph_divider)
-
         paragraphs=map(indent_level,paragraphs)
 
         self.structure=structure(paragraphs)
@@ -401,11 +401,11 @@
         return str(self.structure)
 
 
-ctag_prefix='([\x00- \\(]|^)'   
-ctag_suffix='([\x00- ,.:;!?\\)]|$)' 
-ctag_middle='[%s]([^\x00- %s][^%s]*[^\x00- %s]|[^%s])[%s]'      
-ctag_middl2='[%s][%s]([^\x00- %s][^%s]*[^\x00- %s]|[^%s])[%s][%s]'   
-        
+ctag_prefix=r'([\x00- \\(]|^)' 
+ctag_suffix=r'([\x00- ,.:;!?\\)]|$)'         
+ctag_middle=r'[%s]([^\x00- %s][^%s]*[^\x00- %s]|[^%s])[%s]' 
+ctag_middl2=r'[%s][%s]([^\x00- %s][^%s]*[^\x00- %s]|[^%s])[%s][%s]'    
+
 def ctag(s,
          em=re.compile(
              ctag_prefix+(ctag_middle % (("*",)*6) )+ctag_suffix),
@@ -417,10 +417,10 @@
              ctag_prefix+(ctag_middle % (("\'",)*6))+ctag_suffix),
          ):
     if s is None: s=''
-    s=strong.sub('\\1<strong>\\2</strong>\\3',s)
-    s=under.sub('\\1<u>\\2</u>\\3',s)
-    s=code.sub( '\\1<code>\\2</code>\\3',s)
-    s=em.sub(   '\\1<em>\\2</em>\\3',s)
+    s=strong.sub(r'\1<strong>\2</strong>\3',s)
+    s=under.sub( r'\1<u>\2</u>\3',s)
+    s=code.sub(  r'\1<code>\2</code>\3',s)
+    s=em.sub(    r'\1<em>\2</em>\3',s)
     return s    
 
 class HTML(StructuredText):
@@ -439,9 +439,9 @@
 
         '''
         s=self._str(self.structure,self.level)
-        s=re.sub(extra_dl,'\n',s)
-        s=re.sub(extra_ul,'\n',s)
-        s=re.sub(extra_ol,'\n',s)
+        s=extra_dl.sub('\n',s)
+        s=extra_ul.sub('\n',s)
+        s=extra_ol.sub('\n',s)
         return s
 
     def ul(self, before, p, after):
@@ -486,53 +486,49 @@
     
     def _str(self,structure,level,
              # Static
-             bullet=re.compile('[ \t\n]*[o*-][ \t\n]+([^\0]*)'
-                                     ),
-             example=re.compile('[\0- ]examples?:[\0- ]*$'
+             bullet=ts_regex.compile('[ \t\n]*[o*-][ \t\n]+\([^\0]*\)'
+                                     ).match_group,
+             example=ts_regex.compile('[\0- ]examples?:[\0- ]*$'
                                       ).search,
-             dl=re.compile('([^\n]+)[ \t]+--[ \t\n]+([^\0]*)'
-                                 ),
-             nl=re.compile('\n').search,
-             ol=re.compile(
-                 '[ \t]*(([0-9]+\|[a-zA-Z]+)[.)])+[ \t\n]+([^\0]*\|$)'
-                 ),
-             olp=re.compile('[ \t]*([0-9]+)[ \t\n]+([^\0]*\|$)'
-                                  ),
+             dl=ts_regex.compile('\([^\n]+\)[ \t]+--[ \t\n]+\([^\0]*\)'
+                                 ).match_group,
+             nl=ts_regex.compile('\n').search,
+             ol=ts_regex.compile(
+                 '[ \t]*\(\([0-9]+\|[%s]+\)[.)]\)+[ \t\n]+\([^\0]*\|$\)' % string.letters
+                 ).match_group,
+             olp=ts_regex.compile('[ \t]*([0-9]+)[ \t\n]+\([^\0]*\|$\)'
+                                  ).match_group,
              ):
         r=''
         for s in structure:
 
-            mo = bullet.match(s[0])
-            if mo is not None:
-                p = mo.group(0)
+            ts_results = bullet(s[0], (1,))
+            if ts_results:
+                p = ts_results[1]
                 if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
                 else: ps=self._str(s[1],level)
                 r=self.ul(r,p,ps)
                 continue
-
-            mo = ol.match(s[0])
-            if mo is not None:
-                p = mo.group(2)
+            ts_results = ol(s[0], (3,))
+            if ts_results:
+                p = ts_results[1]
                 if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
                 else: ps=self._str(s[1],level)
                 r=self.ol(r,p,ps)
                 continue
-
-            mo = opl.match(s[0])
-            if mo is not None:
-                p = mo.group(0)
+            ts_results = olp(s[0], (1,))
+            if ts_results:
+                p = ts_results[1]
                 if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
                 else: ps=self._str(s[1],level)
                 r=self.ol(r,p,ps)
                 continue
-
-            mo = dl.match(s[0])
-            if mo is not None:
-                t,d = mo.group(0,1)
+            ts_results = dl(s[0], (1,2))
+            if ts_results:
+                t,d = ts_results[1]
                 r=self.dl(r,t,d,self._str(s[1],level))
                 continue
-
-            if example(s[0]) is not None and s[1]:
+            if example(s[0]) >= 0 and s[1]:
                 # Introduce an example, using pre tags:
                 r=self.normal(r,s[0],self.pre(s[1]))
                 continue
@@ -546,7 +542,7 @@
                 continue
             else:
 
-                if nl(s[0]) is None and s[1] and s[0][-1:] != ':':
+                if nl(s[0]) < 0 and s[1] and s[0][-1:] != ':':
                     # Treat as a heading
                     t=s[0]
                     r=self.head(r,t,level,
@@ -565,23 +561,23 @@
                        )): #"
         text=str(v)
         for re,name in character_entities:
-            text=re.sub(re,name,text)
+            text=re.sub(name,text)
         return text
 
 def html_with_references(text, level=1):
     text = re.sub(
-        '[\0\n]\.\. \[([0-9_a-zA-Z-]+)\]',
-        '\n  <a name="\\1">[\\1]</a>',
+        r'[\0\n]\.\. \[([0-9_%s-]+)\]' % string.letters,
+        r'\n  <a name="\1">[\1]</a>',
         text)
-    
+
     text = re.sub(
-        '([\0- ,])\[([0-9_a-zA-Z-]+)\]([\0- ,.:])',
-        '\\1<a href="#\\2">[\\2]</a>\\3',
+        r'([\x00- ,])\[(?P<ref>[0-9_%s-]+)\]([\x00- ,.:])'   % string.letters,
+        r'\1<a href="#\2">[\2]</a>\3',
         text)
     
     text = re.sub(
-        '([\0- ,])\[([^]]+)\.html\]([\0- ,.:])',
-        '\\1<a href="\\2.html">[\\2]</a>\\3',
+        r'([\0- ,])\[([^]]+)\.html\]([\0- ,.:])',
+        r'\1<a href="\2.html">[\2]</a>\3',
         text)
 
     return HTML(text,level=level)
@@ -590,7 +586,7 @@
 def main():
     import sys, getopt
 
-    opts,args=getopt.getopt(sys.argv[1:],'tw')
+    opts,args=getopt.getopt(sys.argv[1:],'twl')
 
     if args:
         [infile]=args
@@ -603,13 +599,17 @@
         if filter(lambda o: o[0]=='-w', opts):
             print 'Content-Type: text/html\n'
 
+        if filter(lambda o: o[0]=='-l', opts):
+            import locale
+            locale.setlocale(locale.LC_ALL,"")
+
         if s[:2]=='#!':
-            s=ts_regex.sub('^#![^\n]+','',s)
+            s=re.sub('^#![^\n]+','',s)
 
-        r=ts_regex.compile('\([\0-\n]*\n\)')
-        ts_results = r.match_group(s, (1,))
-        if ts_results:
-            s=s[len(ts_results[1]):]
+        mo = re.compile('([\0-\n]*\n)').match(s)
+        if mo is not None:
+            s = s[len(mo.group(0)) :]
+            
         s=str(html_with_references(s))
         if s[:4]=='<h1>':
             t=s[4:find(s,'</h1>')]