[CMF-checkins] CVS: CMF/CMFCollector - util.py:1.14
Ken Manheimer
klm@zope.com
Fri, 9 Nov 2001 14:56:51 -0500
Update of /cvs-repository/CMF/CMFCollector
In directory cvs.zope.org:/tmp/cvs-serv7825
Modified Files:
util.py
Log Message:
_webtext_format_line(): Avoid treacherous conflicts between
html_quote, whitespace, and URL link processing by doing the
processing all in one place. Simple.
(I could simplify things further by getting rid of provisions for
pseudo-<pre>...)
=== CMF/CMFCollector/util.py 1.13 => 1.14 ===
got = []
- text = html_quote_nonURLs(text)
-
for l in text.split('\n'):
if not l:
@@ -131,42 +129,61 @@
at_literal = (l.startswith(" ") or l.startswith(">"))
if at_literal:
- # Can't open or close <pre> in literal - since it's cited/escaped.
- got.append(l.replace(" ", " "))
+ # In a cited or leading-whitespace literal.
+ got.append(_webtext_format_line(l, do_whitespace=1))
elif in_pre:
- got.append(l.replace(" ", " "))
- # Check for closing pre:
+ # In a pre.
x = unpresplit(l)
if len(x) > 1 and not presearch(x[-1]):
in_pre = 0
+ got.append(_webtext_format_line(l, do_whitespace=1))
else:
- # Non-literal:
- got.append(urlexpsub(r'<a href="\1">\1</a>', l))
- # Check for opening pre:
+ # Non-literal case.
x = presplit(l)
if len(x) > 1 and not unpresearch(x[-1]):
# The line has a prevailing <pre>.
in_pre = 1
+ got.append(_webtext_format_line(l))
return "<br>\n".join(got)
-def html_quote_nonURLs(text):
- """html_quote everything in text except for URLs."""
- urlmatches = list_search_hits(text, urlexp)
- if not urlmatches:
- return html_quote(text)
+def _webtext_format_line(line,
+ do_html_quote=1, do_links=1, do_whitespace=0):
+ """
+ Turn URLs into links, and html_quote everything else."""
+
+ if do_links:
+ urlmatches = list_search_hits(line, urlexp)
else:
- got = []
- cursor = 0
- for m in urlmatches:
- curstart, curend = m.start(), m.end()
- got.append(html_quote(text[cursor:curstart]))
- got.append(text[curstart:curend])
- cursor = curend
- if cursor < len(text):
- got.append(html_quote(text[cursor:]))
+ urlmatches = []
+
+ got = []
+ cursor = 0
+ lenline = len(line)
+ while cursor < lenline:
+
+ if urlmatches:
+ urlmatch = urlmatches.pop(0)
+ curstart, curend = urlmatch.start(), urlmatch.end()
+ else:
+ urlmatch = None
+ curstart = curend = lenline
+
+ nonurl = line[cursor:curstart]
+ if do_html_quote:
+ nonurl = html_quote(nonurl)
+ if do_whitespace:
+ nonurl = nonurl.replace(" ", " ")
+ got.append(nonurl)
+
+ if urlmatch:
+ url = line[curstart:curend]
+ got.append('<a href="%s">%s</a>' % (url, url))
+
+ cursor = curend
+
return "".join(got)
def list_search_hits(text, exprobj):
@@ -181,6 +198,28 @@
else:
break
return got
+
+def test_webtext_format_line():
+ wfl = _webtext_format_line
+ assert wfl("") == ""
+ assert wfl("x") == "x"
+ assert wfl(" ") == " "
+ assert wfl("& < >") == "& < >"
+ assert wfl(" ", do_whitespace=1) == " "
+ subj = "http://www.zope.org and so on"
+ assert wfl(subj) == ('<a href="http://www.zope.org">'
+ 'http://www.zope.org</a>'
+ ' and so on')
+ assert wfl(subj, do_whitespace=1) == ('<a href="http://www.zope.org">'
+ 'http://www.zope.org</a>'
+ ' and so on')
+ subj = "<http://www.zope.org&value=1> and so on"
+ assert wfl(subj) == ('<<a href="http://www.zope.org&value=1">'
+ 'http://www.zope.org&value=1</a>>'
+ ' and so on')
+ subj = "... http://www.zope.org&value=1"
+ assert wfl(subj) == ('... <a href="http://www.zope.org&value=1">'
+ 'http://www.zope.org&value=1</a>')
# Match group 1 is citation prefix, group 2 is leading whitespace: