[Zope-Checkins] CVS: Zope/lib/python/docutils/parsers/rst - __init__.py:1.2 states.py:1.2 tableparser.py:1.2
Andreas Jung
andreas@andreas-jung.com
Sat, 1 Feb 2003 04:26:41 -0500
Update of /cvs-repository/Zope/lib/python/docutils/parsers/rst
In directory cvs.zope.org:/tmp/cvs-serv18056/docutils/parsers/rst
Added Files:
__init__.py states.py tableparser.py
Log Message:
merge from ajung-restructuredtext-integration-branch
=== Zope/lib/python/docutils/parsers/rst/__init__.py 1.1 => 1.2 ===
--- /dev/null Sat Feb 1 04:26:41 2003
+++ Zope/lib/python/docutils/parsers/rst/__init__.py Sat Feb 1 04:26:07 2003
@@ -0,0 +1,120 @@
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+This is ``docutils.parsers.rst`` package. It exports a single class, `Parser`,
+the reStructuredText parser.
+
+
+Usage
+=====
+
+1. Create a parser::
+
+ parser = docutils.parsers.rst.Parser()
+
+ Several optional arguments may be passed to modify the parser's behavior.
+ Please see `Customizing the Parser`_ below for details.
+
+2. Gather input (a multi-line string), by reading a file or the standard
+ input::
+
+ input = sys.stdin.read()
+
+3. Create a new empty `docutils.nodes.document` tree::
+
+ document = docutils.utils.new_document(source, settings)
+
+ See `docutils.utils.new_document()` for parameter details.
+
+4. Run the parser, populating the document tree::
+
+ parser.parse(input, document)
+
+
+Parser Overview
+===============
+
+The reStructuredText parser is implemented as a state machine, examining its
+input one line at a time. To understand how the parser works, please first
+become familiar with the `docutils.statemachine` module, then see the
+`states` module.
+
+
+Customizing the Parser
+----------------------
+
+Anything that isn't already customizable is that way simply because that type
+of customizability hasn't been implemented yet. Patches welcome!
+
+When instantiating an object of the `Parser` class, two parameters may be
+passed: ``rfc2822`` and ``inliner``. Pass ``rfc2822=1`` to enable an initial
+RFC-2822 style header block, parsed as a "field_list" element (with "class"
+attribute set to "rfc2822"). Currently this is the only body-level element
+which is customizable without subclassing. (Tip: subclass `Parser` and change
+its "state_classes" and "initial_state" attributes to refer to new classes.
+Contact the author if you need more details.)
+
+The ``inliner`` parameter takes an instance of `states.Inliner` or a subclass.
+It handles inline markup recognition. A common extension is the addition of
+further implicit hyperlinks, like "RFC 2822". This can be done by subclassing
+`states.Inliner`, adding a new method for the implicit markup, and adding a
+``(pattern, method)`` pair to the "implicit_dispatch" attribute of the
+subclass. See `states.Inliner.implicit_inline()` for details. Explicit
+inline markup can be customized in a `states.Inliner` subclass via the
+``patterns.initial`` and ``dispatch`` attributes (and new methods as
+appropriate).
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+import docutils.parsers
+import docutils.statemachine
+from docutils.parsers.rst import states
+
+
+class Parser(docutils.parsers.Parser):
+
+ """The reStructuredText parser."""
+
+ supported = ('restructuredtext', 'rst', 'rest', 'restx', 'rtxt', 'rstx')
+ """Aliases this parser supports."""
+
+ settings_spec = (
+ 'reStructuredText Parser Options',
+ None,
+ (('Recognize and link to PEP references (like "PEP 258").',
+ ['--pep-references'],
+ {'action': 'store_true'}),
+ ('Recognize and link to RFC references (like "RFC 822").',
+ ['--rfc-references'],
+ {'action': 'store_true'}),
+ ('Set number of spaces for tab expansion (default 8).',
+ ['--tab-width'],
+ {'metavar': '<width>', 'type': 'int', 'default': 8}),))
+
+ def __init__(self, rfc2822=None, inliner=None):
+ if rfc2822:
+ self.initial_state = 'RFC2822Body'
+ else:
+ self.initial_state = 'Body'
+ self.state_classes = states.state_classes
+ self.inliner = inliner
+
+ def parse(self, inputstring, document):
+ """Parse `inputstring` and populate `document`, a document tree."""
+ self.setup_parse(inputstring, document)
+ debug = document.reporter[''].debug
+ self.statemachine = states.RSTStateMachine(
+ state_classes=self.state_classes,
+ initial_state=self.initial_state,
+ debug=debug)
+ inputlines = docutils.statemachine.string2lines(
+ inputstring, tab_width=document.settings.tab_width,
+ convert_whitespace=1)
+ self.statemachine.run(inputlines, document, inliner=self.inliner)
+ self.finish_parse()
=== Zope/lib/python/docutils/parsers/rst/states.py 1.1 => 1.2 === (2729/2829 lines abridged)
--- /dev/null Sat Feb 1 04:26:41 2003
+++ Zope/lib/python/docutils/parsers/rst/states.py Sat Feb 1 04:26:07 2003
@@ -0,0 +1,2826 @@
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+This is the ``docutils.parsers.restructuredtext.states`` module, the core of
+the reStructuredText parser. It defines the following:
+
+:Classes:
+ - `RSTStateMachine`: reStructuredText parser's entry point.
+ - `NestedStateMachine`: recursive StateMachine.
+ - `RSTState`: reStructuredText State superclass.
+ - `Inliner`: For parsing inline markup.
+ - `Body`: Generic classifier of the first line of a block.
+ - `SpecializedBody`: Superclass for compound element members.
+ - `BulletList`: Second and subsequent bullet_list list_items
+ - `DefinitionList`: Second+ definition_list_items.
+ - `EnumeratedList`: Second+ enumerated_list list_items.
+ - `FieldList`: Second+ fields.
+ - `OptionList`: Second+ option_list_items.
+ - `RFC2822List`: Second+ RFC2822-style fields.
+ - `ExtensionOptions`: Parses directive option fields.
+ - `Explicit`: Second+ explicit markup constructs.
+ - `SubstitutionDef`: For embedded directives in substitution definitions.
+ - `Text`: Classifier of second line of a text block.
+ - `SpecializedText`: Superclass for continuation lines of Text-variants.
+ - `Definition`: Second line of potential definition_list_item.
+ - `Line`: Second line of overlined section title or transition marker.
+ - `Struct`: An auxiliary collection class.
+
+:Exception classes:
+ - `MarkupError`
+ - `ParserError`
+ - `MarkupMismatch`
+
+:Functions:
+ - `escape2null()`: Return a string, escape-backslashes converted to nulls.
+ - `unescape()`: Return a string, nulls removed or restored to backslashes.
+
+:Attributes:
+ - `state_classes`: set of State classes used with `RSTStateMachine`.
+
+Parser Overview
+===============
+
[-=- -=- -=- 2729 lines omitted -=- -=- -=-]
+ def underline(self, match, context, next_state):
+ overline = context[0]
+ blocktext = overline + '\n' + self.state_machine.line
+ lineno = self.state_machine.abs_line_number() - 1
+ if len(overline.rstrip()) < 4:
+ self.short_overline(context, blocktext, lineno, 1)
+ msg = self.reporter.error(
+ 'Invalid section title or transition marker.',
+ nodes.literal_block(blocktext, blocktext), line=lineno)
+ self.parent += msg
+ return [], 'Body', []
+
+ def short_overline(self, context, blocktext, lineno, lines=1):
+ msg = self.reporter.info(
+ 'Possible incomplete section title.\nTreating the overline as '
+ "ordinary text because it's so short.", line=lineno)
+ self.parent += msg
+ self.state_correction(context, lines)
+
+ def state_correction(self, context, lines=1):
+ self.state_machine.previous_line(lines)
+ context[:] = []
+ raise statemachine.StateCorrection('Body', 'text')
+
+
+state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,
+ OptionList, ExtensionOptions, Explicit, Text, Definition,
+ Line, SubstitutionDef, RFC2822Body, RFC2822List)
+"""Standard set of State classes used to start `RSTStateMachine`."""
+
+
+def escape2null(text):
+ """Return a string with escape-backslashes converted to nulls."""
+ parts = []
+ start = 0
+ while 1:
+ found = text.find('\\', start)
+ if found == -1:
+ parts.append(text[start:])
+ return ''.join(parts)
+ parts.append(text[start:found])
+ parts.append('\x00' + text[found+1:found+2])
+ start = found + 2 # skip character after escape
+
+def unescape(text, restore_backslashes=0):
+ """Return a string with nulls removed or restored to backslashes."""
+ if restore_backslashes:
+ return text.replace('\x00', '\\')
+ else:
+ return ''.join(text.split('\x00'))
=== Zope/lib/python/docutils/parsers/rst/tableparser.py 1.1 => 1.2 === (433/533 lines abridged)
--- /dev/null Sat Feb 1 04:26:41 2003
+++ Zope/lib/python/docutils/parsers/rst/tableparser.py Sat Feb 1 04:26:07 2003
@@ -0,0 +1,530 @@
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+This module defines table parser classes,which parse plaintext-graphic tables
+and produce a well-formed data structure suitable for building a CALS table.
+
+:Classes:
+ - `GridTableParser`: Parse fully-formed tables represented with a grid.
+ - `SimpleTableParser`: Parse simple tables, delimited by top & bottom
+ borders.
+
+:Exception class: `TableMarkupError`
+
+:Function:
+ `update_dict_of_lists()`: Merge two dictionaries containing list values.
+"""
+
+__docformat__ = 'reStructuredText'
+
+
+import re
+import sys
+from docutils import DataError
+
+
+class TableMarkupError(DataError): pass
+
+
+class TableParser:
+
+ """
+ Abstract superclass for the common parts of the syntax-specific parsers.
+ """
+
+ head_body_separator_pat = None
+ """Matches the row separator between head rows and body rows."""
+
+ def parse(self, block):
+ """
+ Analyze the text `block` and return a table data structure.
+
+ Given a plaintext-graphic table in `block` (list of lines of text; no
+ whitespace padding), parse the table, construct and return the data
[-=- -=- -=- 433 lines omitted -=- -=- -=-]
+ columns.append((sys.maxint, None))
+ lastcol = len(columns) - 2
+ for i in range(len(columns) - 1):
+ start, end = columns[i]
+ nextstart = columns[i+1][0]
+ block = []
+ margin = sys.maxint
+ for line, offset in lines:
+ if i == lastcol and line[end:].strip():
+ text = line[start:].rstrip()
+ columns[lastcol] = (start, start + len(text))
+ self.adjust_last_column(start + len(text))
+ elif line[end:nextstart].strip():
+ raise TableMarkupError('Text in column margin at line '
+ 'offset %s.' % offset)
+ else:
+ text = line[start:end].rstrip()
+ block.append(text)
+ if text:
+ margin = min(margin, len(text) - len(text.lstrip()))
+ if 0 < margin < sys.maxint:
+ block = [line[margin:] for line in block]
+ row[i][3].extend(block)
+ self.table.append(row)
+
+ def adjust_last_column(self, new_end):
+ start, end = self.columns[-1]
+ if new_end > end:
+ self.columns[-1] = (start, new_end)
+
+ def structure_from_cells(self):
+ colspecs = [end - start for start, end in self.columns]
+ first_body_row = 0
+ if self.head_body_sep:
+ for i in range(len(self.table)):
+ if self.table[i][0][2] > self.head_body_sep:
+ first_body_row = i
+ break
+ return (colspecs, self.table[:first_body_row],
+ self.table[first_body_row:])
+
+
+def update_dict_of_lists(master, newdata):
+ """
+ Extend the list values of `master` with those from `newdata`.
+
+ Both parameters must be dictionaries containing list values.
+ """
+ for key, values in newdata.items():
+ master.setdefault(key, []).extend(values)