[CMF-checkins] CVS: CMF/CMFCollector - util.py:1.10
Ken Manheimer
klm@zope.com
Sat, 3 Nov 2001 00:16:03 -0500
Update of /cvs-repository/CMF/CMFCollector
In directory cvs.zope.org:/tmp/cvs-serv21683
Modified Files:
util.py
Log Message:
format_webtext(): simple and more suitable text format for issues.
Characteristics:
- No html.
- flow lines beginning at left margin, but break them at carriage returns.
- Recognize as "literal" lines, whose whitespace is preserved:
indented lines, lines cited with leading '>', and lines contained
within <pre> / </pre> pairs
- Linkify URLs that are not on literal lines.
What's wrong with structured text, and/or HTML, or plain preformatted?
- Structured text structuring is unnecessary and often *undesirable*.
Also, structured text flows literal text, not good for source code,
and most people don't know the secret to prevent that.
- HTML (and structured text with HTML) interprets and other '<'/'>'
bracketed text. Like stx, HTML flows literal text, ...
- Preformatted requires that the user break lines, or you get an awfully
wide page. Using a hard-wrap entry box is *too* restrictive - it
prevents longer lines, eg in source code, tracebacks, etc.
We can now get rid of process_comment(), unprocess_comments(),
link_candidates(), and list_search_hits()!
=== CMF/CMFCollector/util.py 1.9 => 1.10 ===
import string, re
from Products.CMFCore.utils import getToolByName
-
-preexp = re.compile(r'<pre>')
-unpreexp = re.compile(r'</pre>')
-citedexp = re.compile(r'^\s*>')
-# Match group 1 is citation prefix, group 2 is leading whitespace:
-cite_prefixexp = re.compile('([\s>]*>)?([\s]*)')
+from DocumentTemplate.DT_Var import html_quote
def users_for_local_role(object, userids, role):
"""Give only designated userids specified local role.
@@ -86,17 +81,88 @@
except AttributeError:
# Some don't support getProperty:
return getattr(userobj, property, default)
-
-def cited_text(text, rfind=string.rfind, strip=string.strip):
+##############################
+# WebText processing utilities
+preexp = re.compile(r'<pre>')
+unpreexp = re.compile(r'</pre>')
+urlchars = (r'[A-Za-z0-9/:@_%~#=&\.\-\?]+')
+nonpuncurlchars = (r'[A-Za-z0-9/@_%~#=&\-]')
+url = (r'["=]?((http|https|ftp|mailto|file|about):%s%s)'
+ % (urlchars, nonpuncurlchars))
+urlexp=re.compile(url)
+
+def format_webtext(text,
+ presearch=preexp.search, presplit=preexp.split,
+ unpresearch=unpreexp.search, unpresplit=unpreexp.split,
+ urlexpsub=urlexp.sub):
+ """Transform web text for browser presentation.
+
+ - HTML quote everything
+ - Terminate all lines with <br>s.
+ - Whitespace-quote indented and '>' cited lines
+ - Whitespace-quote lines within <pre>/</pre> pairs
+ - Turn URLs recognized outside of literal regions into links."""
+
+ # Definitions:
+ #
+ # - "in_literal" exemptions: Lines starting with whitespace or '>'
+ # - "in_pre" exemptions: Lines residing within (non-exempted) <pre> tag
+ #
+ # Nuances:
+ #
+ # - Neither exemption can toggle while the other applies - each renders
+ # the cues for the other mostly ineffective, except...
+ # - in_pre cannot deactivate on a literal-exemption qualifying line, so
+ # pre tags can be used to contain cited text with (ineffective) </pre>s.
+ # - We mostly don't handle pre tag nesting, except balanced within a line
+
+ in_pre = at_literal = 0
+ got = []
+ for l in text.split('\n'):
+
+ if not l:
+ got.append(l)
+ continue
+
+ l = l.expandtabs()
+
+ at_literal = (l.startswith(" ") or l.startswith(">"))
+
+ if at_literal:
+ # Can't open or close <pre> in literal - since it's cited/escaped.
+ got.append(l.replace(" ", " "))
+
+ elif in_pre:
+ got.append(l.replace(" ", " "))
+ # Check for closing pre:
+ x = unpresplit(l)
+ if len(x) > 1 and not presearch(x[-1]):
+ in_pre = 0
+
+ else:
+ # Non-literal:
+ got.append(urlexpsub(r'<a href="\1">\1</a>', l))
+ # Check for opening pre:
+ x = presplit(l)
+ if len(x) > 1 and not unpresearch(x[-1]):
+ # The line has a prevailing <pre>.
+ in_pre = 1
+
+ return "<br>\n".join(got)
+
+# Match group 1 is citation prefix, group 2 is leading whitespace:
+cite_prefixexp = re.compile('([\s>]*>)?([\s]*)')
+
+def cited_text(text, cite_prefixexp=cite_prefixexp):
"""Quote text for use in literal citations.
We prepend '>' to each line, splitting long lines (propagating
existing citation and leading whitespace) when necessary."""
- # (Over?) elaborate stuff snarfed from my wiki commenting provisions.
+ # Over elaborate stuff snarfed from my wiki commenting provisions.
got = []
- for line in string.split(text, '\n'):
+ for line in text.split('\n'):
pref = '> '
if len(line) < 79:
got.append(pref + line)
@@ -128,15 +194,15 @@
break
else:
lastcurlen = curlen
- splitpoint = max(rfind(line[:78-lenpref], ' '),
- rfind(line[:78-lenpref], '\t'))
+ splitpoint = max(line[:78-lenpref].rfind(' '),
+ line[:78-lenpref].rfind('\t'))
if not splitpoint or splitpoint == -1:
- if strip(line):
+ if line.strip():
got.append((pref % continuation_padding) +
line)
line = ''
else:
- if strip(line[:splitpoint]):
+ if line[:splitpoint].strip():
got.append((pref % continuation_padding) +
line[:splitpoint])
line = line[splitpoint+1:]
@@ -146,129 +212,7 @@
continuation_padding = ' '
return string.join(got, '\n')
-def process_comment(comment, strip=string.strip):
- """Return formatted comment, escaping cited text."""
- # More elaborate stuff snarfed from my wiki commenting provisions.
- # Process the comment:
- # - Strip leading whitespace,
- # - cause all cited text to be preformatted.
-
- inpre = incited = atcited = 0
- presearch = preexp.search
- presplit = preexp.split
- unpresearch = unpreexp.search
- unpresplit = unpreexp.split
- citedsearch = citedexp.search
- got = []
- for i in string.split('\n' + string.rstrip(comment), '\n') + ['']:
- atcited = citedsearch(i)
- if not atcited:
- if incited:
- # Departing cited section.
- incited = 0
- if inpre:
- # Close <pre> that we prepended.
- got.append("</pre collector:deleteme>")
- inpre = 0
-
- # Check line for toggling of inpre.
- # XXX We don't deal well with way imbalanced pres on a
- # single line. Feh, we're working too hard, already.
- if not inpre:
- x = presplit(i)
- if len(x) > 1 and not unprexpsearch(x[-1]):
- # The line has a <pre> without subsequent </pre>
- inpre = 1
- else: # in <pre>
- x = unpresplit(i)
- if len(x) > 1 and not prexpsearch(x[-1]):
- # The line has a </pre> without subsequent <pre>
- inpre = 0
-
- else:
- # Quote the minimal set of chars, to reduce raw text
- # ugliness. Do the '&' *before* any others that include '&'s!
- if '&' in i and ';' in i: i = string.replace(i, '&', '&')
- if '<' in i: i = string.replace(i, '<', '<')
- if not incited:
- incited = 1
- if not inpre:
- got.append("<pre collector:deleteme>")
- inpre = 1
- got.append(i)
- return string.strip(string.join(got, '\n'))
-
-
-def unprocess_comments(text):
- """Invert the process_comment transformations to yield literal text.
-
- Specifically, remove (special) <pre>/</pre> and turn the small set of
- character entities back to characters."""
-
- if text.find("<pre collector:deleteme>\n"):
- text = text.replace("<pre collector:deleteme>\n", '')
- if text.find("</pre collector:deleteme>\n"):
- text = text.replace("</pre collector:deleteme>\n", '')
- if text.find('&'):
- text = text.replace('&', '&')
- if text.find('<'):
- text = text.replace('<', '<')
- return text
-
def sorted(l):
x = list(l[:])
x.sort()
return x
-
-urlchars = (r'[A-Za-z0-9/:@_%~#=&\.\-\?]+')
-nonpuncurlchars = (r'[A-Za-z0-9/:@_%~#=&\-]')
-url = (r'["=]?((http|https|ftp|mailto|file|about):%s%s)'
- % (urlchars, nonpuncurlchars))
-urlexp = re.compile(url)
-UPLOAD_PREFIX = "Uploaded: "
-uploadexp = re.compile('(%s)([^<,\n]*)([<,\n])' % UPLOAD_PREFIX, re.MULTILINE)
-excludeexpr=re.compile(r'(<a [^>]*href=[^>]+>[^<>]*</a>|<img [^>]*src=[^>]*>)')
-
-def link_candidates(text):
- """Return match objs for URLS, excluding targets contained in tags.
-
- (See the excludeexpr for the exact excluded tag contexts.)"""
-
- excludes = list_search_hits(text, excludeexpr)
- candidates = list_search_hits(text, urlexp)
- got = []
- for c in candidates:
- cstart, cend = c.start(), c.end()
- good = 1
- while excludes:
- e = excludes[0]
- if e.end() < cend:
- # Ditch exclude_match that's prior to remaining candidates.
- del excludes[0]
- continue
- elif e.start() > cstart:
- # Remaining segments are after candidate - it passes.
- good = 1
- break
- else:
- # Candidate is contained in segment - baad.
- good = 0
- break
-
- if good:
- got.append(c)
-
- return got
-
-def list_search_hits(text, exprobj):
- """Return a list of match objects for non-overlapping text hits."""
- cursor = 0
- got = []
- while 1:
- hit = exprobj.search(text, cursor)
- if hit:
- cursor = hit.end()
- got.append(hit)
- else:
- break
- return got