[Zope-Checkins] CVS: Zope/lib/python/docutils/parsers/rst - roman.py:1.1.2.1 __init__.py:1.2.10.1 states.py:1.2.10.1 tableparser.py:1.2.10.1
Chris McDonough
chrism@zope.com
Mon, 21 Jul 2003 12:39:12 -0400
Update of /cvs-repository/Zope/lib/python/docutils/parsers/rst
In directory cvs.zope.org:/tmp/cvs-serv17213/lib/python/docutils/parsers/rst
Modified Files:
Tag: Zope-2_7-branch
__init__.py states.py tableparser.py
Added Files:
Tag: Zope-2_7-branch
roman.py
Log Message:
Merge changes from HEAD since the release of Zope 2.7a1 into the Zope-2_7-branch in preparation for release of Zope 2.7b1.
=== Added File Zope/lib/python/docutils/parsers/rst/roman.py ===
"""Convert to and from Roman numerals"""
__author__ = "Mark Pilgrim (f8dy@diveintopython.org)"
__version__ = "1.4"
__date__ = "8 August 2001"
__copyright__ = """Copyright (c) 2001 Mark Pilgrim
This program is part of "Dive Into Python", a free Python tutorial for
experienced programmers. Visit http://diveintopython.org/ for the
latest version.
This program is free software; you can redistribute it and/or modify
it under the terms of the Python 2.1.1 license, available at
http://www.python.org/2.1.1/license.html
"""
import re
#Define exceptions
class RomanError(Exception): pass
class OutOfRangeError(RomanError): pass
class NotIntegerError(RomanError): pass
class InvalidRomanNumeralError(RomanError): pass
#Define digit mapping
romanNumeralMap = (('M', 1000),
('CM', 900),
('D', 500),
('CD', 400),
('C', 100),
('XC', 90),
('L', 50),
('XL', 40),
('X', 10),
('IX', 9),
('V', 5),
('IV', 4),
('I', 1))
def toRoman(n):
"""convert integer to Roman numeral"""
if not (0 < n < 5000):
raise OutOfRangeError, "number out of range (must be 1..4999)"
if int(n) <> n:
raise NotIntegerError, "decimals can not be converted"
result = ""
for numeral, integer in romanNumeralMap:
while n >= integer:
result += numeral
n -= integer
return result
#Define pattern to detect valid Roman numerals
romanNumeralPattern = re.compile('''
^ # beginning of string
M{0,4} # thousands - 0 to 4 M's
(CM|CD|D?C{0,3}) # hundreds - 900 (CM), 400 (CD), 0-300 (0 to 3 C's),
# or 500-800 (D, followed by 0 to 3 C's)
(XC|XL|L?X{0,3}) # tens - 90 (XC), 40 (XL), 0-30 (0 to 3 X's),
# or 50-80 (L, followed by 0 to 3 X's)
(IX|IV|V?I{0,3}) # ones - 9 (IX), 4 (IV), 0-3 (0 to 3 I's),
# or 5-8 (V, followed by 0 to 3 I's)
$ # end of string
''' ,re.VERBOSE)
def fromRoman(s):
"""convert Roman numeral to integer"""
if not s:
raise InvalidRomanNumeralError, 'Input can not be blank'
if not romanNumeralPattern.search(s):
raise InvalidRomanNumeralError, 'Invalid Roman numeral: %s' % s
result = 0
index = 0
for numeral, integer in romanNumeralMap:
while s[index:index+len(numeral)] == numeral:
result += integer
index += len(numeral)
return result
=== Zope/lib/python/docutils/parsers/rst/__init__.py 1.2 => 1.2.10.1 ===
--- Zope/lib/python/docutils/parsers/rst/__init__.py:1.2 Sat Feb 1 04:26:07 2003
+++ Zope/lib/python/docutils/parsers/rst/__init__.py Mon Jul 21 12:38:05 2003
@@ -95,7 +95,10 @@
{'action': 'store_true'}),
('Set number of spaces for tab expansion (default 8).',
['--tab-width'],
- {'metavar': '<width>', 'type': 'int', 'default': 8}),))
+ {'metavar': '<width>', 'type': 'int', 'default': 8}),
+ ('Remove spaces before footnote references.',
+ ['--trim-footnote-reference-space'],
+ {'action': 'store_true'}),))
def __init__(self, rfc2822=None, inliner=None):
if rfc2822:
=== Zope/lib/python/docutils/parsers/rst/states.py 1.2 => 1.2.10.1 ===
--- Zope/lib/python/docutils/parsers/rst/states.py:1.2 Sat Feb 1 04:26:07 2003
+++ Zope/lib/python/docutils/parsers/rst/states.py Mon Jul 21 12:38:05 2003
@@ -107,11 +107,12 @@
import sys
import re
+import roman
from types import TupleType
-from docutils import nodes, statemachine, utils, roman, urischemes
+from docutils import nodes, statemachine, utils, urischemes
from docutils import ApplicationError, DataError
from docutils.statemachine import StateMachineWS, StateWS
-from docutils.utils import normalize_name
+from docutils.nodes import fully_normalize_name as normalize_name
from docutils.parsers.rst import directives, languages, tableparser
from docutils.parsers.rst.languages import en as _fallback_language_module
@@ -159,6 +160,7 @@
language=self.language,
title_styles=[],
section_level=0,
+ section_bubble_up_kludge=0,
inliner=inliner)
self.document = document
self.attach_observer(document.note_source)
@@ -271,8 +273,10 @@
node=node, match_titles=match_titles)
state_machine.unlink()
new_offset = state_machine.abs_line_offset()
- # Adjustment for block if modified in nested parse:
- self.state_machine.next_line(len(block) - block_length)
+ # No `block.parent` implies disconnected -- lines aren't in sync:
+ if block.parent:
+ # Adjustment for block if modified in nested parse:
+ self.state_machine.next_line(len(block) - block_length)
return new_offset
def nested_list_parse(self, block, input_offset, node, initial_state,
@@ -340,6 +344,8 @@
return None
if level <= mylevel: # sibling or supersection
memo.section_level = level # bubble up to parent section
+ if len(style) == 2:
+ memo.section_bubble_up_kludge = 1
# back up 2 lines for underline title, 3 for overline title
self.state_machine.previous_line(len(style) + 1)
raise EOFError # let parent section re-evaluate
@@ -471,13 +477,15 @@
_interpreted_roles = {
# Values of ``None`` mean "not implemented yet":
- 'title-reference': 'title_reference_role',
- 'abbreviation': None,
- 'acronym': None,
+ 'title-reference': 'generic_interpreted_role',
+ 'abbreviation': 'generic_interpreted_role',
+ 'acronym': 'generic_interpreted_role',
'index': None,
- 'emphasis': None,
- 'strong': None,
- 'literal': None,
+ 'subscript': 'generic_interpreted_role',
+ 'superscript': 'generic_interpreted_role',
+ 'emphasis': 'generic_interpreted_role',
+ 'strong': 'generic_interpreted_role',
+ 'literal': 'generic_interpreted_role',
'named-reference': None,
'anonymous-reference': None,
'uri-reference': None,
@@ -487,7 +495,7 @@
'citation-reference': None,
'substitution-reference': None,
'target': None,
- }
+ 'restructuredtext-unimplemented-role': None}
"""Mapping of canonical interpreted text role name to method name.
Initializes a name to bound-method mapping in `__init__`."""
@@ -495,6 +503,18 @@
"""The role to use when no explicit role is given.
Override in subclasses."""
+ generic_roles = {'abbreviation': nodes.abbreviation,
+ 'acronym': nodes.acronym,
+ 'emphasis': nodes.emphasis,
+ 'literal': nodes.literal,
+ 'strong': nodes.strong,
+ 'subscript': nodes.subscript,
+ 'superscript': nodes.superscript,
+ 'title-reference': nodes.title_reference,}
+ """Mapping of canonical interpreted text role name to node class.
+ Used by the `generic_interpreted_role` method for simple, straightforward
+ roles (simple wrapping; no extra processing)."""
+
def __init__(self, roles=None):
"""
`roles` is a mapping of canonical role name to role function or bound
@@ -872,9 +892,11 @@
return uri
def interpreted(self, before, after, rawsource, text, role, lineno):
- role_function, messages = self.get_role_function(role, lineno)
+ role_function, canonical, messages = self.get_role_function(role,
+ lineno)
if role_function:
- nodelist, messages2 = role_function(role, rawsource, text, lineno)
+ nodelist, messages2 = role_function(canonical, rawsource, text,
+ lineno)
messages.extend(messages2)
return before, nodelist, after, messages
else:
@@ -885,34 +907,34 @@
msg_text = []
if role:
name = role.lower()
- canonical = None
- try:
- canonical = self.language.roles[name]
- except AttributeError, error:
- msg_text.append('Problem retrieving role entry from language '
- 'module %r: %s.' % (self.language, error))
- except KeyError:
- msg_text.append('No role entry for "%s" in module "%s".'
- % (role, self.language.__name__))
- if not canonical:
- try:
- canonical = _fallback_language_module.roles[name]
- msg_text.append('Using English fallback for role "%s".'
- % role)
- except KeyError:
- msg_text.append('Trying "%s" as canonical role name.'
- % role)
- # Should be an English name, but just in case:
- canonical = name
- if msg_text:
- message = self.reporter.info('\n'.join(msg_text), line=lineno)
- messages.append(message)
+ else:
+ name = self.default_interpreted_role
+ canonical = None
+ try:
+ canonical = self.language.roles[name]
+ except AttributeError, error:
+ msg_text.append('Problem retrieving role entry from language '
+ 'module %r: %s.' % (self.language, error))
+ except KeyError:
+ msg_text.append('No role entry for "%s" in module "%s".'
+ % (name, self.language.__name__))
+ if not canonical:
try:
- return self.interpreted_roles[canonical], messages
+ canonical = _fallback_language_module.roles[name]
+ msg_text.append('Using English fallback for role "%s".'
+ % name)
except KeyError:
- raise UnknownInterpretedRoleError(messages)
- else:
- return self.interpreted_roles[self.default_interpreted_role], []
+ msg_text.append('Trying "%s" as canonical role name.'
+ % name)
+ # Should be an English name, but just in case:
+ canonical = name
+ if msg_text:
+ message = self.reporter.info('\n'.join(msg_text), line=lineno)
+ messages.append(message)
+ try:
+ return self.interpreted_roles[canonical], canonical, messages
+ except KeyError:
+ raise UnknownInterpretedRoleError(messages)
def literal(self, match, lineno):
before, inlines, remaining, sysmessages, endstring = self.inline_obj(
@@ -936,26 +958,22 @@
match, lineno, self.patterns.substitution_ref,
nodes.substitution_reference)
if len(inlines) == 1:
- subrefnode = inlines[0]
- if isinstance(subrefnode, nodes.substitution_reference):
- subreftext = subrefnode.astext()
- refname = normalize_name(subreftext)
- subrefnode['refname'] = refname
- self.document.note_substitution_ref(
- subrefnode)
+ subref_node = inlines[0]
+ if isinstance(subref_node, nodes.substitution_reference):
+ subref_text = subref_node.astext()
+ self.document.note_substitution_ref(subref_node, subref_text)
if endstring[-1:] == '_':
- referencenode = nodes.reference(
- '|%s%s' % (subreftext, endstring), '')
+ reference_node = nodes.reference(
+ '|%s%s' % (subref_text, endstring), '')
if endstring[-2:] == '__':
- referencenode['anonymous'] = 1
+ reference_node['anonymous'] = 1
self.document.note_anonymous_ref(
- referencenode)
+ reference_node)
else:
- referencenode['refname'] = refname
- self.document.note_refname(
- referencenode)
- referencenode += subrefnode
- inlines = [referencenode]
+ reference_node['refname'] = normalize_name(subref_text)
+ self.document.note_refname(reference_node)
+ reference_node += subref_node
+ inlines = [reference_node]
return before, inlines, remaining, sysmessages
def footnote_reference(self, match, lineno):
@@ -965,6 +983,9 @@
"""
label = match.group('footnotelabel')
refname = normalize_name(label)
+ string = match.string
+ before = string[:match.start('whole')]
+ remaining = string[match.end('whole'):]
if match.group('citationlabel'):
refnode = nodes.citation_reference('[%s]_' % label,
refname=refname)
@@ -986,10 +1007,9 @@
if refname:
refnode['refname'] = refname
self.document.note_footnote_ref(refnode)
- string = match.string
- matchstart = match.start('whole')
- matchend = match.end('whole')
- return (string[:matchstart], [refnode], string[matchend:], [])
+ if self.document.settings.trim_footnote_reference_space:
+ before = before.rstrip()
+ return (before, [refnode], remaining, [])
def reference(self, match, lineno, anonymous=None):
referencename = match.group('refname')
@@ -1084,8 +1104,15 @@
'_': reference,
'__': anonymous_reference}
- def title_reference_role(self, role, rawtext, text, lineno):
- return [nodes.title_reference(rawtext, text)], []
+ def generic_interpreted_role(self, role, rawtext, text, lineno):
+ try:
+ role_class = self.generic_roles[role]
+ except KeyError:
+ msg = self.reporter.error('Unknown interpreted text role: "%s".'
+ % role, line=lineno)
+ prb = self.problematic(text, text, msg)
+ return [prb], [msg]
+ return [role_class(rawtext, text)], []
def pep_reference_role(self, role, rawtext, text, lineno):
try:
@@ -1208,16 +1235,72 @@
"""Block quote."""
indented, indent, line_offset, blank_finish = \
self.state_machine.get_indented()
- blockquote = self.block_quote(indented, line_offset)
+ blockquote, messages = self.block_quote(indented, line_offset)
self.parent += blockquote
+ self.parent += messages
if not blank_finish:
self.parent += self.unindent_warning('Block quote')
return context, next_state, []
def block_quote(self, indented, line_offset):
+ blockquote_lines, attribution_lines, attribution_offset = \
+ self.check_attribution(indented, line_offset)
blockquote = nodes.block_quote()
- self.nested_parse(indented, line_offset, blockquote)
- return blockquote
+ self.nested_parse(blockquote_lines, line_offset, blockquote)
+ messages = []
+ if attribution_lines:
+ attribution, messages = self.parse_attribution(attribution_lines,
+ attribution_offset)
+ blockquote += attribution
+ return blockquote, messages
+
+ attribution_pattern = re.compile(r'--(?![-\n]) *(?=[^ \n])')
+
+ def check_attribution(self, indented, line_offset):
+ """
+ Check for an attribution in the last contiguous block of `indented`.
+
+ * First line after last blank line must begin with "--" (etc.).
+ * Every line after that must have consistent indentation.
+
+ Return a 3-tuple: (block quote lines, attribution lines,
+ attribution offset).
+ """
+ blank = None
+ nonblank_seen = None
+ indent = 0
+ for i in range(len(indented) - 1, 0, -1): # don't check first line
+ this_line_blank = not indented[i].strip()
+ if nonblank_seen and this_line_blank:
+ match = self.attribution_pattern.match(indented[i + 1])
+ if match:
+ blank = i
+ break
+ elif not this_line_blank:
+ nonblank_seen = 1
+ if blank and len(indented) - blank > 2: # multi-line attribution
+ indent = (len(indented[blank + 2])
+ - len(indented[blank + 2].lstrip()))
+ for j in range(blank + 3, len(indented)):
+ if indent != (len(indented[j])
+ - len(indented[j].lstrip())): # bad shape
+ blank = None
+ break
+ if blank:
+ a_lines = indented[blank + 1:]
+ a_lines.trim_left(match.end(), end=1)
+ a_lines.trim_left(indent, start=1)
+ return (indented[:blank], a_lines, line_offset + blank + 1)
+ else:
+ return (indented, None, None)
+
+ def parse_attribution(self, indented, line_offset):
+ text = '\n'.join(indented).rstrip()
+ lineno = self.state_machine.abs_line_number() + line_offset
+ textnodes, messages = self.inline_text(text, lineno)
+ node = nodes.attribution(text, '', *textnodes)
+ node.line = lineno
+ return node, messages
def bullet(self, match, context, next_state):
"""Bullet list item."""
@@ -1436,8 +1519,9 @@
self.parent += msg
indented, indent, line_offset, blank_finish = \
self.state_machine.get_first_known_indented(match.end())
- blockquote = self.block_quote(indented, line_offset)
+ blockquote, messages = self.block_quote(indented, line_offset)
self.parent += blockquote
+ self.parent += messages
if not blank_finish:
self.parent += self.unindent_warning('Option list')
return [], next_state, []
@@ -1689,6 +1773,7 @@
(?P=quote) # close quote if open quote used
)
%(non_whitespace_escape_before)s
+ [ ]? # optional space
: # end of reference name
([ ]+|$) # followed by whitespace
""" % vars(Inliner), re.VERBOSE),
@@ -1864,34 +1949,31 @@
while block and not block[-1].strip():
block.pop()
subname = subdefmatch.group('name')
- name = normalize_name(subname)
- substitutionnode = nodes.substitution_definition(
- blocktext, name=name, alt=subname)
- substitutionnode.line = lineno
+ substitution_node = nodes.substitution_definition(blocktext)
+ substitution_node.line = lineno
+ self.document.note_substitution_def(
+ substitution_node,subname, self.parent)
if block:
block[0] = block[0].strip()
new_abs_offset, blank_finish = self.nested_list_parse(
- block, input_offset=offset, node=substitutionnode,
+ block, input_offset=offset, node=substitution_node,
initial_state='SubstitutionDef', blank_finish=blank_finish)
i = 0
- for node in substitutionnode[:]:
+ for node in substitution_node[:]:
if not (isinstance(node, nodes.Inline) or
isinstance(node, nodes.Text)):
- self.parent += substitutionnode[i]
- del substitutionnode[i]
+ self.parent += substitution_node[i]
+ del substitution_node[i]
else:
i += 1
- if len(substitutionnode) == 0:
+ if len(substitution_node) == 0:
msg = self.reporter.warning(
'Substitution definition "%s" empty or invalid.'
% subname,
nodes.literal_block(blocktext, blocktext), line=lineno)
return [msg], blank_finish
else:
- del substitutionnode['alt']
- self.document.note_substitution_def(
- substitutionnode, self.parent)
- return [substitutionnode], blank_finish
+ return [substitution_node], blank_finish
else:
msg = self.reporter.warning(
'Substitution definition "%s" missing contents.' % subname,
@@ -2112,6 +2194,7 @@
re.compile(r"""
\.\.[ ]+ # explicit markup start
(%s) # directive name
+ [ ]? # optional space
:: # directive delimiter
([ ]+|$) # whitespace or end of line
""" % Inliner.simplename, re.VERBOSE | re.UNICODE))]
@@ -2147,7 +2230,8 @@
self.state_machine.input_lines[offset:],
input_offset=self.state_machine.abs_line_offset() + 1,
node=self.parent, initial_state='Explicit',
- blank_finish=blank_finish)
+ blank_finish=blank_finish,
+ match_titles=self.state_machine.match_titles)
self.goto_line(newline_offset)
if not blank_finish:
self.parent += self.unindent_warning('Explicit markup')
@@ -2452,11 +2536,8 @@
initial_transitions = ['embedded_directive', 'text']
def embedded_directive(self, match, context, next_state):
- if self.parent.has_key('alt'):
- option_presets = {'alt': self.parent['alt']}
- else:
- option_presets = {}
- nodelist, blank_finish = self.directive(match, **option_presets)
+ nodelist, blank_finish = self.directive(match,
+ alt=self.parent['name'])
self.parent += nodelist
if not self.state_machine.at_eof():
self.blank_finish = blank_finish
@@ -2591,8 +2672,9 @@
self.state_machine.get_indented()
definitionlistitem = nodes.definition_list_item(
'\n'.join(termline + list(indented)))
- termlist, messages = self.term(
- termline, self.state_machine.abs_line_number() - 1)
+ lineno = self.state_machine.abs_line_number() - 1
+ definitionlistitem.line = lineno
+ termlist, messages = self.term(termline, lineno)
definitionlistitem += termlist
definition = nodes.definition('', *messages)
definitionlistitem += definition
@@ -2678,7 +2760,9 @@
def eof(self, context):
"""Transition marker at end of section or document."""
marker = context[0].strip()
- if len(marker) < 4:
+ if self.memo.section_bubble_up_kludge:
+ self.memo.section_bubble_up_kludge = 0
+ elif len(marker) < 4:
self.state_correction(context)
if self.eofcheck: # ignore EOFError with sections
lineno = self.state_machine.abs_line_number() - 1
@@ -2741,7 +2825,7 @@
self.short_overline(context, blocktext, lineno, 2)
else:
msg = self.reporter.severe(
- 'Missing underline for overline.',
+ 'Missing matching underline for section title overline.',
nodes.literal_block(source, source), line=lineno)
self.parent += msg
return [], 'Body', []
@@ -2819,8 +2903,13 @@
start = found + 2 # skip character after escape
def unescape(text, restore_backslashes=0):
- """Return a string with nulls removed or restored to backslashes."""
+ """
+ Return a string with nulls removed or restored to backslashes.
+ Backslash-escaped spaces are also removed.
+ """
if restore_backslashes:
return text.replace('\x00', '\\')
else:
- return ''.join(text.split('\x00'))
+ for sep in ['\x00 ', '\x00\n', '\x00']:
+ text = ''.join(text.split(sep))
+ return text
=== Zope/lib/python/docutils/parsers/rst/tableparser.py 1.2 => 1.2.10.1 ===
--- Zope/lib/python/docutils/parsers/rst/tableparser.py:1.2 Sat Feb 1 04:26:07 2003
+++ Zope/lib/python/docutils/parsers/rst/tableparser.py Mon Jul 21 12:38:06 2003
@@ -131,7 +131,8 @@
head_body_separator_pat = re.compile(r'\+=[=+]+=\+ *$')
def setup(self, block):
- self.block = list(block) # make a copy; it may be modified
+ self.block = block[:] # make a copy; it may be modified
+ self.block.disconnect() # don't propagate changes to parent
self.bottom = len(block) - 1
self.right = len(block[0]) - 1
self.head_body_sep = None
@@ -165,7 +166,9 @@
update_dict_of_lists(self.rowseps, rowseps)
update_dict_of_lists(self.colseps, colseps)
self.mark_done(top, left, bottom, right)
- cellblock = self.get_cell_block(top, left, bottom, right)
+ cellblock = self.block.get_2D_block(top + 1, left + 1,
+ bottom, right)
+ cellblock.disconnect() # lines in cell can't sync with parent
self.cells.append((top, left, bottom, right, cellblock))
corners.extend([(top, right), (bottom, left)])
corners.sort()
@@ -188,19 +191,6 @@
return None
return 1
- def get_cell_block(self, top, left, bottom, right):
- """Given the corners, extract the text of a cell."""
- cellblock = []
- margin = right
- for lineno in range(top + 1, bottom):
- line = self.block[lineno][left + 1 : right].rstrip()
- cellblock.append(line)
- if line:
- margin = min(margin, len(line) - len(line.lstrip()))
- if 0 < margin < right:
- cellblock = [line[margin:] for line in cellblock]
- return cellblock
-
def scan_cell(self, top, left):
"""Starting at the top-left corner, start tracing out a cell."""
assert self.block[top][left] == '+'
@@ -278,7 +268,7 @@
def structure_from_cells(self):
"""
- From the data colledted by `scan_cell()`, convert to the final data
+ From the data collected by `scan_cell()`, convert to the final data
structure.
"""
rowseps = self.rowseps.keys() # list of row boundaries
@@ -371,7 +361,8 @@
span_pat = re.compile('-[ -]*$')
def setup(self, block):
- self.block = list(block) # make a copy; it will be modified
+ self.block = block[:] # make a copy; it will be modified
+ self.block.disconnect() # don't propagate changes to parent
# Convert top & bottom borders to column span underlines:
self.block[0] = self.block[0].replace('=', '-')
self.block[-1] = self.block[-1].replace('=', '-')
@@ -394,25 +385,26 @@
self.columns = self.parse_columns(self.block[0], 0)
self.border_end = self.columns[-1][1]
firststart, firstend = self.columns[0]
- block = self.block[1:]
- offset = 0
- # Container for accumulating text lines until a row is complete:
- rowlines = []
- while block:
- line = block.pop(0)
- offset += 1
+ offset = 1 # skip top border
+ start = 1
+ text_found = None
+ while offset < len(self.block):
+ line = self.block[offset]
if self.span_pat.match(line):
# Column span underline or border; row is complete.
- self.parse_row(rowlines, (line.rstrip(), offset))
- rowlines = []
+ self.parse_row(self.block[start:offset], start,
+ (line.rstrip(), offset))
+ start = offset + 1
+ text_found = None
elif line[firststart:firstend].strip():
# First column not blank, therefore it's a new row.
- if rowlines:
- self.parse_row(rowlines)
- rowlines = [(line.rstrip(), offset)]
- else:
- # Accumulate lines of incomplete row.
- rowlines.append((line.rstrip(), offset))
+ if text_found and offset != start:
+ self.parse_row(self.block[start:offset], start)
+ start = offset
+ text_found = 1
+ elif not text_found:
+ start = offset + 1
+ offset += 1
def parse_columns(self, line, offset):
"""
@@ -448,12 +440,12 @@
morecols += 1
except (AssertionError, IndexError):
raise TableMarkupError('Column span alignment problem at '
- 'line offset %s.' % offset)
- cells.append((0, morecols, offset, []))
+ 'line offset %s.' % (offset + 1))
+ cells.append([0, morecols, offset, []])
i += 1
return cells
- def parse_row(self, lines, spanline=None):
+ def parse_row(self, lines, start, spanline=None):
"""
Given the text `lines` of a row, parse it and append to `self.table`.
@@ -462,20 +454,30 @@
text from each line, and check for text in column margins. Finally,
adjust for insigificant whitespace.
"""
- while lines and not lines[-1][0]:
- lines.pop() # Remove blank trailing lines.
- if lines:
- offset = lines[0][1]
- elif spanline:
- offset = spanline[1]
- else:
+ if not (lines or spanline):
# No new row, just blank lines.
return
if spanline:
columns = self.parse_columns(*spanline)
+ span_offset = spanline[1]
else:
columns = self.columns[:]
- row = self.init_row(columns, offset)
+ span_offset = start
+ self.check_columns(lines, start, columns)
+ row = self.init_row(columns, start)
+ for i in range(len(columns)):
+ start, end = columns[i]
+ cellblock = lines.get_2D_block(0, start, len(lines), end)
+ cellblock.disconnect() # lines in cell can't sync with parent
+ row[i][3] = cellblock
+ self.table.append(row)
+
+ def check_columns(self, lines, first_line, columns):
+ """
+ Check for text in column margins and text overflow in the last column.
+ Raise TableMarkupError if anything but whitespace is in column margins.
+ Adjust the end value for the last column if there is text overflow.
+ """
# "Infinite" value for a dummy last column's beginning, used to
# check for text overflow:
columns.append((sys.maxint, None))
@@ -483,30 +485,20 @@
for i in range(len(columns) - 1):
start, end = columns[i]
nextstart = columns[i+1][0]
- block = []
- margin = sys.maxint
- for line, offset in lines:
+ offset = 0
+ for line in lines:
if i == lastcol and line[end:].strip():
text = line[start:].rstrip()
- columns[lastcol] = (start, start + len(text))
- self.adjust_last_column(start + len(text))
+ new_end = start + len(text)
+ columns[i] = (start, new_end)
+ main_start, main_end = self.columns[-1]
+ if new_end > main_end:
+ self.columns[-1] = (main_start, new_end)
elif line[end:nextstart].strip():
raise TableMarkupError('Text in column margin at line '
- 'offset %s.' % offset)
- else:
- text = line[start:end].rstrip()
- block.append(text)
- if text:
- margin = min(margin, len(text) - len(text.lstrip()))
- if 0 < margin < sys.maxint:
- block = [line[margin:] for line in block]
- row[i][3].extend(block)
- self.table.append(row)
-
- def adjust_last_column(self, new_end):
- start, end = self.columns[-1]
- if new_end > end:
- self.columns[-1] = (start, new_end)
+ 'offset %s.' % (first_line + offset))
+ offset += 1
+ columns.pop()
def structure_from_cells(self):
colspecs = [end - start for start, end in self.columns]