| # $Id: states.py 6314 2010-04-26 10:04:17Z milde $ |
| # Author: David Goodger <[email protected]> |
| # Copyright: This module has been placed in the public domain. |
| |
| """ |
| This is the ``docutils.parsers.restructuredtext.states`` module, the core of |
| the reStructuredText parser. It defines the following: |
| |
| :Classes: |
| - `RSTStateMachine`: reStructuredText parser's entry point. |
| - `NestedStateMachine`: recursive StateMachine. |
| - `RSTState`: reStructuredText State superclass. |
| - `Inliner`: For parsing inline markup. |
| - `Body`: Generic classifier of the first line of a block. |
| - `SpecializedBody`: Superclass for compound element members. |
| - `BulletList`: Second and subsequent bullet_list list_items |
| - `DefinitionList`: Second+ definition_list_items. |
| - `EnumeratedList`: Second+ enumerated_list list_items. |
| - `FieldList`: Second+ fields. |
| - `OptionList`: Second+ option_list_items. |
| - `RFC2822List`: Second+ RFC2822-style fields. |
| - `ExtensionOptions`: Parses directive option fields. |
| - `Explicit`: Second+ explicit markup constructs. |
| - `SubstitutionDef`: For embedded directives in substitution definitions. |
| - `Text`: Classifier of second line of a text block. |
| - `SpecializedText`: Superclass for continuation lines of Text-variants. |
| - `Definition`: Second line of potential definition_list_item. |
| - `Line`: Second line of overlined section title or transition marker. |
| - `Struct`: An auxiliary collection class. |
| |
| :Exception classes: |
| - `MarkupError` |
| - `ParserError` |
| - `MarkupMismatch` |
| |
| :Functions: |
| - `escape2null()`: Return a string, escape-backslashes converted to nulls. |
| - `unescape()`: Return a string, nulls removed or restored to backslashes. |
| |
| :Attributes: |
| - `state_classes`: set of State classes used with `RSTStateMachine`. |
| |
| Parser Overview |
| =============== |
| |
| The reStructuredText parser is implemented as a recursive state machine, |
| examining its input one line at a time. To understand how the parser works, |
| please first become familiar with the `docutils.statemachine` module. In the |
| description below, references are made to classes defined in this module; |
| please see the individual classes for details. |
| |
| Parsing proceeds as follows: |
| |
| 1. The state machine examines each line of input, checking each of the |
| transition patterns of the state `Body`, in order, looking for a match. |
| The implicit transitions (blank lines and indentation) are checked before |
| any others. The 'text' transition is a catch-all (matches anything). |
| |
| 2. The method associated with the matched transition pattern is called. |
| |
| A. Some transition methods are self-contained, appending elements to the |
| document tree (`Body.doctest` parses a doctest block). The parser's |
| current line index is advanced to the end of the element, and parsing |
| continues with step 1. |
| |
| B. Other transition methods trigger the creation of a nested state machine, |
| whose job is to parse a compound construct ('indent' does a block quote, |
| 'bullet' does a bullet list, 'overline' does a section [first checking |
| for a valid section header], etc.). |
| |
| - In the case of lists and explicit markup, a one-off state machine is |
| created and run to parse contents of the first item. |
| |
| - A new state machine is created and its initial state is set to the |
| appropriate specialized state (`BulletList` in the case of the |
| 'bullet' transition; see `SpecializedBody` for more detail). This |
| state machine is run to parse the compound element (or series of |
| explicit markup elements), and returns as soon as a non-member element |
| is encountered. For example, the `BulletList` state machine ends as |
| soon as it encounters an element which is not a list item of that |
| bullet list. The optional omission of inter-element blank lines is |
| enabled by this nested state machine. |
| |
| - The current line index is advanced to the end of the elements parsed, |
| and parsing continues with step 1. |
| |
| C. The result of the 'text' transition depends on the next line of text. |
| The current state is changed to `Text`, under which the second line is |
| examined. If the second line is: |
| |
| - Indented: The element is a definition list item, and parsing proceeds |
| similarly to step 2.B, using the `DefinitionList` state. |
| |
| - A line of uniform punctuation characters: The element is a section |
| header; again, parsing proceeds as in step 2.B, and `Body` is still |
| used. |
| |
| - Anything else: The element is a paragraph, which is examined for |
| inline markup and appended to the parent element. Processing |
| continues with step 1. |
| """ |
| |
| __docformat__ = 'reStructuredText' |
| |
| |
| import sys |
| import re |
| import roman |
| from types import FunctionType, MethodType |
| from docutils import nodes, statemachine, utils, urischemes |
| from docutils import ApplicationError, DataError |
| from docutils.statemachine import StateMachineWS, StateWS |
| from docutils.nodes import fully_normalize_name as normalize_name |
| from docutils.nodes import whitespace_normalize_name |
| from docutils.utils import escape2null, unescape, column_width |
| import docutils.parsers.rst |
| from docutils.parsers.rst import directives, languages, tableparser, roles |
| from docutils.parsers.rst.languages import en as _fallback_language_module |
| |
| |
| class MarkupError(DataError): pass |
| class UnknownInterpretedRoleError(DataError): pass |
| class InterpretedRoleNotImplementedError(DataError): pass |
| class ParserError(ApplicationError): pass |
| class MarkupMismatch(Exception): pass |
| |
| |
| class Struct: |
| |
| """Stores data attributes for dotted-attribute access.""" |
| |
| def __init__(self, **keywordargs): |
| self.__dict__.update(keywordargs) |
| |
| |
| class RSTStateMachine(StateMachineWS): |
| |
| """ |
| reStructuredText's master StateMachine. |
| |
| The entry point to reStructuredText parsing is the `run()` method. |
| """ |
| |
| def run(self, input_lines, document, input_offset=0, match_titles=1, |
| inliner=None): |
| """ |
| Parse `input_lines` and modify the `document` node in place. |
| |
| Extend `StateMachineWS.run()`: set up parse-global data and |
| run the StateMachine. |
| """ |
| self.language = languages.get_language( |
| document.settings.language_code) |
| self.match_titles = match_titles |
| if inliner is None: |
| inliner = Inliner() |
| inliner.init_customizations(document.settings) |
| self.memo = Struct(document=document, |
| reporter=document.reporter, |
| language=self.language, |
| title_styles=[], |
| section_level=0, |
| section_bubble_up_kludge=0, |
| inliner=inliner) |
| self.document = document |
| self.attach_observer(document.note_source) |
| self.reporter = self.memo.reporter |
| self.node = document |
| results = StateMachineWS.run(self, input_lines, input_offset, |
| input_source=document['source']) |
| assert results == [], 'RSTStateMachine.run() results should be empty!' |
| self.node = self.memo = None # remove unneeded references |
| |
| |
| class NestedStateMachine(StateMachineWS): |
| |
| """ |
| StateMachine run from within other StateMachine runs, to parse nested |
| document structures. |
| """ |
| |
| def run(self, input_lines, input_offset, memo, node, match_titles=1): |
| """ |
| Parse `input_lines` and populate a `docutils.nodes.document` instance. |
| |
| Extend `StateMachineWS.run()`: set up document-wide data. |
| """ |
| self.match_titles = match_titles |
| self.memo = memo |
| self.document = memo.document |
| self.attach_observer(self.document.note_source) |
| self.reporter = memo.reporter |
| self.language = memo.language |
| self.node = node |
| results = StateMachineWS.run(self, input_lines, input_offset) |
| assert results == [], ('NestedStateMachine.run() results should be ' |
| 'empty!') |
| return results |
| |
| |
| class RSTState(StateWS): |
| |
| """ |
| reStructuredText State superclass. |
| |
| Contains methods used by all State subclasses. |
| """ |
| |
| nested_sm = NestedStateMachine |
| nested_sm_cache = [] |
| |
| def __init__(self, state_machine, debug=0): |
| self.nested_sm_kwargs = {'state_classes': state_classes, |
| 'initial_state': 'Body'} |
| StateWS.__init__(self, state_machine, debug) |
| |
| def runtime_init(self): |
| StateWS.runtime_init(self) |
| memo = self.state_machine.memo |
| self.memo = memo |
| self.reporter = memo.reporter |
| self.inliner = memo.inliner |
| self.document = memo.document |
| self.parent = self.state_machine.node |
| # enable the reporter to determine source and source-line |
| if not hasattr(self.reporter, 'locator'): |
| self.reporter.locator = self.state_machine.get_source_and_line |
| # print "adding locator to reporter", self.state_machine.input_offset |
| |
| |
| def goto_line(self, abs_line_offset): |
| """ |
| Jump to input line `abs_line_offset`, ignoring jumps past the end. |
| """ |
| try: |
| self.state_machine.goto_line(abs_line_offset) |
| except EOFError: |
| pass |
| |
| def no_match(self, context, transitions): |
| """ |
| Override `StateWS.no_match` to generate a system message. |
| |
| This code should never be run. |
| """ |
| src, srcline = self.state_machine.get_source_and_line() |
| self.reporter.severe( |
| 'Internal error: no transition pattern match. State: "%s"; ' |
| 'transitions: %s; context: %s; current line: %r.' |
| % (self.__class__.__name__, transitions, context, |
| self.state_machine.line), |
| source=src, line=srcline) |
| return context, None, [] |
| |
| def bof(self, context): |
| """Called at beginning of file.""" |
| return [], [] |
| |
| def nested_parse(self, block, input_offset, node, match_titles=0, |
| state_machine_class=None, state_machine_kwargs=None): |
| """ |
| Create a new StateMachine rooted at `node` and run it over the input |
| `block`. |
| """ |
| use_default = 0 |
| if state_machine_class is None: |
| state_machine_class = self.nested_sm |
| use_default += 1 |
| if state_machine_kwargs is None: |
| state_machine_kwargs = self.nested_sm_kwargs |
| use_default += 1 |
| block_length = len(block) |
| |
| state_machine = None |
| if use_default == 2: |
| try: |
| state_machine = self.nested_sm_cache.pop() |
| except IndexError: |
| pass |
| if not state_machine: |
| state_machine = state_machine_class(debug=self.debug, |
| **state_machine_kwargs) |
| state_machine.run(block, input_offset, memo=self.memo, |
| node=node, match_titles=match_titles) |
| if use_default == 2: |
| self.nested_sm_cache.append(state_machine) |
| else: |
| state_machine.unlink() |
| new_offset = state_machine.abs_line_offset() |
| # No `block.parent` implies disconnected -- lines aren't in sync: |
| if block.parent and (len(block) - block_length) != 0: |
| # Adjustment for block if modified in nested parse: |
| self.state_machine.next_line(len(block) - block_length) |
| return new_offset |
| |
| def nested_list_parse(self, block, input_offset, node, initial_state, |
| blank_finish, |
| blank_finish_state=None, |
| extra_settings={}, |
| match_titles=0, |
| state_machine_class=None, |
| state_machine_kwargs=None): |
| """ |
| Create a new StateMachine rooted at `node` and run it over the input |
| `block`. Also keep track of optional intermediate blank lines and the |
| required final one. |
| """ |
| if state_machine_class is None: |
| state_machine_class = self.nested_sm |
| if state_machine_kwargs is None: |
| state_machine_kwargs = self.nested_sm_kwargs.copy() |
| state_machine_kwargs['initial_state'] = initial_state |
| state_machine = state_machine_class(debug=self.debug, |
| **state_machine_kwargs) |
| if blank_finish_state is None: |
| blank_finish_state = initial_state |
| state_machine.states[blank_finish_state].blank_finish = blank_finish |
| for key, value in extra_settings.items(): |
| setattr(state_machine.states[initial_state], key, value) |
| state_machine.run(block, input_offset, memo=self.memo, |
| node=node, match_titles=match_titles) |
| blank_finish = state_machine.states[blank_finish_state].blank_finish |
| state_machine.unlink() |
| return state_machine.abs_line_offset(), blank_finish |
| |
| def section(self, title, source, style, lineno, messages): |
| """Check for a valid subsection and create one if it checks out.""" |
| if self.check_subsection(source, style, lineno): |
| self.new_subsection(title, lineno, messages) |
| |
| def check_subsection(self, source, style, lineno): |
| """ |
| Check for a valid subsection header. Return 1 (true) or None (false). |
| |
| When a new section is reached that isn't a subsection of the current |
| section, back up the line count (use ``previous_line(-x)``), then |
| ``raise EOFError``. The current StateMachine will finish, then the |
| calling StateMachine can re-examine the title. This will work its way |
| back up the calling chain until the correct section level isreached. |
| |
| @@@ Alternative: Evaluate the title, store the title info & level, and |
| back up the chain until that level is reached. Store in memo? Or |
| return in results? |
| |
| :Exception: `EOFError` when a sibling or supersection encountered. |
| """ |
| memo = self.memo |
| title_styles = memo.title_styles |
| mylevel = memo.section_level |
| try: # check for existing title style |
| level = title_styles.index(style) + 1 |
| except ValueError: # new title style |
| if len(title_styles) == memo.section_level: # new subsection |
| title_styles.append(style) |
| return 1 |
| else: # not at lowest level |
| self.parent += self.title_inconsistent(source, lineno) |
| return None |
| if level <= mylevel: # sibling or supersection |
| memo.section_level = level # bubble up to parent section |
| if len(style) == 2: |
| memo.section_bubble_up_kludge = 1 |
| # back up 2 lines for underline title, 3 for overline title |
| self.state_machine.previous_line(len(style) + 1) |
| raise EOFError # let parent section re-evaluate |
| if level == mylevel + 1: # immediate subsection |
| return 1 |
| else: # invalid subsection |
| self.parent += self.title_inconsistent(source, lineno) |
| return None |
| |
| def title_inconsistent(self, sourcetext, lineno): |
| src, srcline = self.state_machine.get_source_and_line(lineno) |
| error = self.reporter.severe( |
| 'Title level inconsistent:', nodes.literal_block('', sourcetext), |
| source=src, line=srcline) |
| return error |
| |
| def new_subsection(self, title, lineno, messages): |
| """Append new subsection to document tree. On return, check level.""" |
| memo = self.memo |
| mylevel = memo.section_level |
| memo.section_level += 1 |
| section_node = nodes.section() |
| self.parent += section_node |
| textnodes, title_messages = self.inline_text(title, lineno) |
| titlenode = nodes.title(title, '', *textnodes) |
| name = normalize_name(titlenode.astext()) |
| section_node['names'].append(name) |
| section_node += titlenode |
| section_node += messages |
| section_node += title_messages |
| self.document.note_implicit_target(section_node, section_node) |
| offset = self.state_machine.line_offset + 1 |
| absoffset = self.state_machine.abs_line_offset() + 1 |
| newabsoffset = self.nested_parse( |
| self.state_machine.input_lines[offset:], input_offset=absoffset, |
| node=section_node, match_titles=1) |
| self.goto_line(newabsoffset) |
| if memo.section_level <= mylevel: # can't handle next section? |
| raise EOFError # bubble up to supersection |
| # reset section_level; next pass will detect it properly |
| memo.section_level = mylevel |
| |
| def paragraph(self, lines, lineno): |
| """ |
| Return a list (paragraph & messages) & a boolean: literal_block next? |
| """ |
| data = '\n'.join(lines).rstrip() |
| if re.search(r'(?<!\\)(\\\\)*::$', data): |
| if len(data) == 2: |
| return [], 1 |
| elif data[-3] in ' \n': |
| text = data[:-3].rstrip() |
| else: |
| text = data[:-1] |
| literalnext = 1 |
| else: |
| text = data |
| literalnext = 0 |
| textnodes, messages = self.inline_text(text, lineno) |
| p = nodes.paragraph(data, '', *textnodes) |
| p.source, p.line = self.state_machine.get_source_and_line(lineno) |
| return [p] + messages, literalnext |
| |
| def inline_text(self, text, lineno): |
| """ |
| Return 2 lists: nodes (text and inline elements), and system_messages. |
| """ |
| return self.inliner.parse(text, lineno, self.memo, self.parent) |
| |
| def unindent_warning(self, node_name): |
| # the actual problem is one line below the current line |
| src, srcline = self.state_machine.get_source_and_line() |
| return self.reporter.warning('%s ends without a blank line; ' |
| 'unexpected unindent.' % node_name, |
| source=src, line=srcline+1) |
| |
| |
| def build_regexp(definition, compile=1): |
| """ |
| Build, compile and return a regular expression based on `definition`. |
| |
| :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts), |
| where "parts" is a list of regular expressions and/or regular |
| expression definitions to be joined into an or-group. |
| """ |
| name, prefix, suffix, parts = definition |
| part_strings = [] |
| for part in parts: |
| if type(part) is tuple: |
| part_strings.append(build_regexp(part, None)) |
| else: |
| part_strings.append(part) |
| or_group = '|'.join(part_strings) |
| regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals() |
| if compile: |
| return re.compile(regexp, re.UNICODE) |
| else: |
| return regexp |
| |
| |
| class Inliner: |
| |
| """ |
| Parse inline markup; call the `parse()` method. |
| """ |
| |
| def __init__(self): |
| self.implicit_dispatch = [(self.patterns.uri, self.standalone_uri),] |
| """List of (pattern, bound method) tuples, used by |
| `self.implicit_inline`.""" |
| |
| def init_customizations(self, settings): |
| """Setting-based customizations; run when parsing begins.""" |
| if settings.pep_references: |
| self.implicit_dispatch.append((self.patterns.pep, |
| self.pep_reference)) |
| if settings.rfc_references: |
| self.implicit_dispatch.append((self.patterns.rfc, |
| self.rfc_reference)) |
| |
| def parse(self, text, lineno, memo, parent): |
| # Needs to be refactored for nested inline markup. |
| # Add nested_parse() method? |
| """ |
| Return 2 lists: nodes (text and inline elements), and system_messages. |
| |
| Using `self.patterns.initial`, a pattern which matches start-strings |
| (emphasis, strong, interpreted, phrase reference, literal, |
| substitution reference, and inline target) and complete constructs |
| (simple reference, footnote reference), search for a candidate. When |
| one is found, check for validity (e.g., not a quoted '*' character). |
| If valid, search for the corresponding end string if applicable, and |
| check it for validity. If not found or invalid, generate a warning |
| and ignore the start-string. Implicit inline markup (e.g. standalone |
| URIs) is found last. |
| """ |
| self.reporter = memo.reporter |
| self.document = memo.document |
| self.language = memo.language |
| self.parent = parent |
| pattern_search = self.patterns.initial.search |
| dispatch = self.dispatch |
| remaining = escape2null(text) |
| processed = [] |
| unprocessed = [] |
| messages = [] |
| while remaining: |
| match = pattern_search(remaining) |
| if match: |
| groups = match.groupdict() |
| method = dispatch[groups['start'] or groups['backquote'] |
| or groups['refend'] or groups['fnend']] |
| before, inlines, remaining, sysmessages = method(self, match, |
| lineno) |
| unprocessed.append(before) |
| messages += sysmessages |
| if inlines: |
| processed += self.implicit_inline(''.join(unprocessed), |
| lineno) |
| processed += inlines |
| unprocessed = [] |
| else: |
| break |
| remaining = ''.join(unprocessed) + remaining |
| if remaining: |
| processed += self.implicit_inline(remaining, lineno) |
| return processed, messages |
| |
| openers = u'\'"([{<\u2018\u201c\xab\u00a1\u00bf' # see quoted_start below |
| closers = u'\'")]}>\u2019\u201d\xbb!?' |
| unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0' |
| start_string_prefix = (u'((?<=^)|(?<=[-/: \\n\u2019%s%s]))' |
| % (re.escape(unicode_delimiters), |
| re.escape(openers))) |
| end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))' |
| % (re.escape(unicode_delimiters), |
| re.escape(closers))) |
| non_whitespace_before = r'(?<![ \n])' |
| non_whitespace_escape_before = r'(?<![ \n\x00])' |
| non_whitespace_after = r'(?![ \n])' |
| # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together): |
| simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*' |
| # Valid URI characters (see RFC 2396 & RFC 2732); |
| # final \x00 allows backslash escapes in URIs: |
| uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]""" |
| # Delimiter indicating the end of a URI (not part of the URI): |
| uri_end_delim = r"""[>]""" |
| # Last URI character; same as uric but no punctuation: |
| urilast = r"""[_~*/=+a-zA-Z0-9]""" |
| # End of a URI (either 'urilast' or 'uric followed by a |
| # uri_end_delim'): |
| uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals() |
| emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]""" |
| email_pattern = r""" |
| %(emailc)s+(?:\.%(emailc)s+)* # name |
| (?<!\x00)@ # at |
| %(emailc)s+(?:\.%(emailc)s*)* # host |
| %(uri_end)s # final URI char |
| """ |
| parts = ('initial_inline', start_string_prefix, '', |
| [('start', '', non_whitespace_after, # simple start-strings |
| [r'\*\*', # strong |
| r'\*(?!\*)', # emphasis but not strong |
| r'``', # literal |
| r'_`', # inline internal target |
| r'\|(?!\|)'] # substitution reference |
| ), |
| ('whole', '', end_string_suffix, # whole constructs |
| [# reference name & end-string |
| r'(?P<refname>%s)(?P<refend>__?)' % simplename, |
| ('footnotelabel', r'\[', r'(?P<fnend>\]_)', |
| [r'[0-9]+', # manually numbered |
| r'\#(%s)?' % simplename, # auto-numbered (w/ label?) |
| r'\*', # auto-symbol |
| r'(?P<citationlabel>%s)' % simplename] # citation reference |
| ) |
| ] |
| ), |
| ('backquote', # interpreted text or phrase reference |
| '(?P<role>(:%s:)?)' % simplename, # optional role |
| non_whitespace_after, |
| ['`(?!`)'] # but not literal |
| ) |
| ] |
| ) |
| patterns = Struct( |
| initial=build_regexp(parts), |
| emphasis=re.compile(non_whitespace_escape_before |
| + r'(\*)' + end_string_suffix), |
| strong=re.compile(non_whitespace_escape_before |
| + r'(\*\*)' + end_string_suffix), |
| interpreted_or_phrase_ref=re.compile( |
| r""" |
| %(non_whitespace_escape_before)s |
| ( |
| ` |
| (?P<suffix> |
| (?P<role>:%(simplename)s:)? |
| (?P<refend>__?)? |
| ) |
| ) |
| %(end_string_suffix)s |
| """ % locals(), re.VERBOSE | re.UNICODE), |
| embedded_uri=re.compile( |
| r""" |
| ( |
| (?:[ \n]+|^) # spaces or beginning of line/string |
| < # open bracket |
| %(non_whitespace_after)s |
| ([^<>\x00]+) # anything but angle brackets & nulls |
| %(non_whitespace_before)s |
| > # close bracket w/o whitespace before |
| ) |
| $ # end of string |
| """ % locals(), re.VERBOSE), |
| literal=re.compile(non_whitespace_before + '(``)' |
| + end_string_suffix), |
| target=re.compile(non_whitespace_escape_before |
| + r'(`)' + end_string_suffix), |
| substitution_ref=re.compile(non_whitespace_escape_before |
| + r'(\|_{0,2})' |
| + end_string_suffix), |
| email=re.compile(email_pattern % locals() + '$', re.VERBOSE), |
| uri=re.compile( |
| (r""" |
| %(start_string_prefix)s |
| (?P<whole> |
| (?P<absolute> # absolute URI |
| (?P<scheme> # scheme (http, ftp, mailto) |
| [a-zA-Z][a-zA-Z0-9.+-]* |
| ) |
| : |
| ( |
| ( # either: |
| (//?)? # hierarchical URI |
| %(uric)s* # URI characters |
| %(uri_end)s # final URI char |
| ) |
| ( # optional query |
| \?%(uric)s* |
| %(uri_end)s |
| )? |
| ( # optional fragment |
| \#%(uric)s* |
| %(uri_end)s |
| )? |
| ) |
| ) |
| | # *OR* |
| (?P<email> # email address |
| """ + email_pattern + r""" |
| ) |
| ) |
| %(end_string_suffix)s |
| """) % locals(), re.VERBOSE), |
| pep=re.compile( |
| r""" |
| %(start_string_prefix)s |
| ( |
| (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file |
| | |
| (PEP\s+(?P<pepnum2>\d+)) # reference by name |
| ) |
| %(end_string_suffix)s""" % locals(), re.VERBOSE), |
| rfc=re.compile( |
| r""" |
| %(start_string_prefix)s |
| (RFC(-|\s+)?(?P<rfcnum>\d+)) |
| %(end_string_suffix)s""" % locals(), re.VERBOSE)) |
| |
| def quoted_start(self, match): |
| """Return 1 if inline markup start-string is 'quoted', 0 if not.""" |
| string = match.string |
| start = match.start() |
| end = match.end() |
| if start == 0: # start-string at beginning of text |
| return 0 |
| prestart = string[start - 1] |
| try: |
| poststart = string[end] |
| if self.openers.index(prestart) \ |
| == self.closers.index(poststart): # quoted |
| return 1 |
| except IndexError: # start-string at end of text |
| return 1 |
| except ValueError: # not quoted |
| pass |
| return 0 |
| |
| def inline_obj(self, match, lineno, end_pattern, nodeclass, |
| restore_backslashes=0): |
| string = match.string |
| matchstart = match.start('start') |
| matchend = match.end('start') |
| if self.quoted_start(match): |
| return (string[:matchend], [], string[matchend:], [], '') |
| endmatch = end_pattern.search(string[matchend:]) |
| if endmatch and endmatch.start(1): # 1 or more chars |
| text = unescape(endmatch.string[:endmatch.start(1)], |
| restore_backslashes) |
| textend = matchend + endmatch.end(1) |
| rawsource = unescape(string[matchstart:textend], 1) |
| return (string[:matchstart], [nodeclass(rawsource, text)], |
| string[textend:], [], endmatch.group(1)) |
| msg = self.reporter.warning( |
| 'Inline %s start-string without end-string.' |
| % nodeclass.__name__, line=lineno) |
| text = unescape(string[matchstart:matchend], 1) |
| rawsource = unescape(string[matchstart:matchend], 1) |
| prb = self.problematic(text, rawsource, msg) |
| return string[:matchstart], [prb], string[matchend:], [msg], '' |
| |
| def problematic(self, text, rawsource, message): |
| msgid = self.document.set_id(message, self.parent) |
| problematic = nodes.problematic(rawsource, text, refid=msgid) |
| prbid = self.document.set_id(problematic) |
| message.add_backref(prbid) |
| return problematic |
| |
| def emphasis(self, match, lineno): |
| before, inlines, remaining, sysmessages, endstring = self.inline_obj( |
| match, lineno, self.patterns.emphasis, nodes.emphasis) |
| return before, inlines, remaining, sysmessages |
| |
| def strong(self, match, lineno): |
| before, inlines, remaining, sysmessages, endstring = self.inline_obj( |
| match, lineno, self.patterns.strong, nodes.strong) |
| return before, inlines, remaining, sysmessages |
| |
| def interpreted_or_phrase_ref(self, match, lineno): |
| end_pattern = self.patterns.interpreted_or_phrase_ref |
| string = match.string |
| matchstart = match.start('backquote') |
| matchend = match.end('backquote') |
| rolestart = match.start('role') |
| role = match.group('role') |
| position = '' |
| if role: |
| role = role[1:-1] |
| position = 'prefix' |
| elif self.quoted_start(match): |
| return (string[:matchend], [], string[matchend:], []) |
| endmatch = end_pattern.search(string[matchend:]) |
| if endmatch and endmatch.start(1): # 1 or more chars |
| textend = matchend + endmatch.end() |
| if endmatch.group('role'): |
| if role: |
| msg = self.reporter.warning( |
| 'Multiple roles in interpreted text (both ' |
| 'prefix and suffix present; only one allowed).', |
| line=lineno) |
| text = unescape(string[rolestart:textend], 1) |
| prb = self.problematic(text, text, msg) |
| return string[:rolestart], [prb], string[textend:], [msg] |
| role = endmatch.group('suffix')[1:-1] |
| position = 'suffix' |
| escaped = endmatch.string[:endmatch.start(1)] |
| rawsource = unescape(string[matchstart:textend], 1) |
| if rawsource[-1:] == '_': |
| if role: |
| msg = self.reporter.warning( |
| 'Mismatch: both interpreted text role %s and ' |
| 'reference suffix.' % position, line=lineno) |
| text = unescape(string[rolestart:textend], 1) |
| prb = self.problematic(text, text, msg) |
| return string[:rolestart], [prb], string[textend:], [msg] |
| return self.phrase_ref(string[:matchstart], string[textend:], |
| rawsource, escaped, unescape(escaped)) |
| else: |
| rawsource = unescape(string[rolestart:textend], 1) |
| nodelist, messages = self.interpreted(rawsource, escaped, role, |
| lineno) |
| return (string[:rolestart], nodelist, |
| string[textend:], messages) |
| msg = self.reporter.warning( |
| 'Inline interpreted text or phrase reference start-string ' |
| 'without end-string.', line=lineno) |
| text = unescape(string[matchstart:matchend], 1) |
| prb = self.problematic(text, text, msg) |
| return string[:matchstart], [prb], string[matchend:], [msg] |
| |
| def phrase_ref(self, before, after, rawsource, escaped, text): |
| match = self.patterns.embedded_uri.search(escaped) |
| if match: |
| text = unescape(escaped[:match.start(0)]) |
| uri_text = match.group(2) |
| uri = ''.join(uri_text.split()) |
| uri = self.adjust_uri(uri) |
| if uri: |
| target = nodes.target(match.group(1), refuri=uri) |
| else: |
| raise ApplicationError('problem with URI: %r' % uri_text) |
| if not text: |
| text = uri |
| else: |
| target = None |
| refname = normalize_name(text) |
| reference = nodes.reference(rawsource, text, |
| name=whitespace_normalize_name(text)) |
| node_list = [reference] |
| if rawsource[-2:] == '__': |
| if target: |
| reference['refuri'] = uri |
| else: |
| reference['anonymous'] = 1 |
| else: |
| if target: |
| reference['refuri'] = uri |
| target['names'].append(refname) |
| self.document.note_explicit_target(target, self.parent) |
| node_list.append(target) |
| else: |
| reference['refname'] = refname |
| self.document.note_refname(reference) |
| return before, node_list, after, [] |
| |
| def adjust_uri(self, uri): |
| match = self.patterns.email.match(uri) |
| if match: |
| return 'mailto:' + uri |
| else: |
| return uri |
| |
| def interpreted(self, rawsource, text, role, lineno): |
| role_fn, messages = roles.role(role, self.language, lineno, |
| self.reporter) |
| if role_fn: |
| nodes, messages2 = role_fn(role, rawsource, text, lineno, self) |
| return nodes, messages + messages2 |
| else: |
| msg = self.reporter.error( |
| 'Unknown interpreted text role "%s".' % role, |
| line=lineno) |
| return ([self.problematic(rawsource, rawsource, msg)], |
| messages + [msg]) |
| |
| def literal(self, match, lineno): |
| before, inlines, remaining, sysmessages, endstring = self.inline_obj( |
| match, lineno, self.patterns.literal, nodes.literal, |
| restore_backslashes=1) |
| return before, inlines, remaining, sysmessages |
| |
| def inline_internal_target(self, match, lineno): |
| before, inlines, remaining, sysmessages, endstring = self.inline_obj( |
| match, lineno, self.patterns.target, nodes.target) |
| if inlines and isinstance(inlines[0], nodes.target): |
| assert len(inlines) == 1 |
| target = inlines[0] |
| name = normalize_name(target.astext()) |
| target['names'].append(name) |
| self.document.note_explicit_target(target, self.parent) |
| return before, inlines, remaining, sysmessages |
| |
| def substitution_reference(self, match, lineno): |
| before, inlines, remaining, sysmessages, endstring = self.inline_obj( |
| match, lineno, self.patterns.substitution_ref, |
| nodes.substitution_reference) |
| if len(inlines) == 1: |
| subref_node = inlines[0] |
| if isinstance(subref_node, nodes.substitution_reference): |
| subref_text = subref_node.astext() |
| self.document.note_substitution_ref(subref_node, subref_text) |
| if endstring[-1:] == '_': |
| reference_node = nodes.reference( |
| '|%s%s' % (subref_text, endstring), '') |
| if endstring[-2:] == '__': |
| reference_node['anonymous'] = 1 |
| else: |
| reference_node['refname'] = normalize_name(subref_text) |
| self.document.note_refname(reference_node) |
| reference_node += subref_node |
| inlines = [reference_node] |
| return before, inlines, remaining, sysmessages |
| |
| def footnote_reference(self, match, lineno): |
| """ |
| Handles `nodes.footnote_reference` and `nodes.citation_reference` |
| elements. |
| """ |
| label = match.group('footnotelabel') |
| refname = normalize_name(label) |
| string = match.string |
| before = string[:match.start('whole')] |
| remaining = string[match.end('whole'):] |
| if match.group('citationlabel'): |
| refnode = nodes.citation_reference('[%s]_' % label, |
| refname=refname) |
| refnode += nodes.Text(label) |
| self.document.note_citation_ref(refnode) |
| else: |
| refnode = nodes.footnote_reference('[%s]_' % label) |
| if refname[0] == '#': |
| refname = refname[1:] |
| refnode['auto'] = 1 |
| self.document.note_autofootnote_ref(refnode) |
| elif refname == '*': |
| refname = '' |
| refnode['auto'] = '*' |
| self.document.note_symbol_footnote_ref( |
| refnode) |
| else: |
| refnode += nodes.Text(label) |
| if refname: |
| refnode['refname'] = refname |
| self.document.note_footnote_ref(refnode) |
| if utils.get_trim_footnote_ref_space(self.document.settings): |
| before = before.rstrip() |
| return (before, [refnode], remaining, []) |
| |
| def reference(self, match, lineno, anonymous=None): |
| referencename = match.group('refname') |
| refname = normalize_name(referencename) |
| referencenode = nodes.reference( |
| referencename + match.group('refend'), referencename, |
| name=whitespace_normalize_name(referencename)) |
| if anonymous: |
| referencenode['anonymous'] = 1 |
| else: |
| referencenode['refname'] = refname |
| self.document.note_refname(referencenode) |
| string = match.string |
| matchstart = match.start('whole') |
| matchend = match.end('whole') |
| return (string[:matchstart], [referencenode], string[matchend:], []) |
| |
| def anonymous_reference(self, match, lineno): |
| return self.reference(match, lineno, anonymous=1) |
| |
| def standalone_uri(self, match, lineno): |
| if (not match.group('scheme') |
| or match.group('scheme').lower() in urischemes.schemes): |
| if match.group('email'): |
| addscheme = 'mailto:' |
| else: |
| addscheme = '' |
| text = match.group('whole') |
| unescaped = unescape(text, 0) |
| return [nodes.reference(unescape(text, 1), unescaped, |
| refuri=addscheme + unescaped)] |
| else: # not a valid scheme |
| raise MarkupMismatch |
| |
| def pep_reference(self, match, lineno): |
| text = match.group(0) |
| if text.startswith('pep-'): |
| pepnum = int(match.group('pepnum1')) |
| elif text.startswith('PEP'): |
| pepnum = int(match.group('pepnum2')) |
| else: |
| raise MarkupMismatch |
| ref = (self.document.settings.pep_base_url |
| + self.document.settings.pep_file_url_template % pepnum) |
| unescaped = unescape(text, 0) |
| return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)] |
| |
| rfc_url = 'rfc%d.html' |
| |
| def rfc_reference(self, match, lineno): |
| text = match.group(0) |
| if text.startswith('RFC'): |
| rfcnum = int(match.group('rfcnum')) |
| ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum |
| else: |
| raise MarkupMismatch |
| unescaped = unescape(text, 0) |
| return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)] |
| |
| def implicit_inline(self, text, lineno): |
| """ |
| Check each of the patterns in `self.implicit_dispatch` for a match, |
| and dispatch to the stored method for the pattern. Recursively check |
| the text before and after the match. Return a list of `nodes.Text` |
| and inline element nodes. |
| """ |
| if not text: |
| return [] |
| for pattern, method in self.implicit_dispatch: |
| match = pattern.search(text) |
| if match: |
| try: |
| # Must recurse on strings before *and* after the match; |
| # there may be multiple patterns. |
| return (self.implicit_inline(text[:match.start()], lineno) |
| + method(match, lineno) + |
| self.implicit_inline(text[match.end():], lineno)) |
| except MarkupMismatch: |
| pass |
| return [nodes.Text(unescape(text), rawsource=unescape(text, 1))] |
| |
| dispatch = {'*': emphasis, |
| '**': strong, |
| '`': interpreted_or_phrase_ref, |
| '``': literal, |
| '_`': inline_internal_target, |
| ']_': footnote_reference, |
| '|': substitution_reference, |
| '_': reference, |
| '__': anonymous_reference} |
| |
| |
| def _loweralpha_to_int(s, _zero=(ord('a')-1)): |
| return ord(s) - _zero |
| |
| def _upperalpha_to_int(s, _zero=(ord('A')-1)): |
| return ord(s) - _zero |
| |
| def _lowerroman_to_int(s): |
| return roman.fromRoman(s.upper()) |
| |
| |
| class Body(RSTState): |
| |
| """ |
| Generic classifier of the first line of a block. |
| """ |
| |
| double_width_pad_char = tableparser.TableParser.double_width_pad_char |
| """Padding character for East Asian double-width text.""" |
| |
| enum = Struct() |
| """Enumerated list parsing information.""" |
| |
| enum.formatinfo = { |
| 'parens': Struct(prefix='(', suffix=')', start=1, end=-1), |
| 'rparen': Struct(prefix='', suffix=')', start=0, end=-1), |
| 'period': Struct(prefix='', suffix='.', start=0, end=-1)} |
| enum.formats = enum.formatinfo.keys() |
| enum.sequences = ['arabic', 'loweralpha', 'upperalpha', |
| 'lowerroman', 'upperroman'] # ORDERED! |
| enum.sequencepats = {'arabic': '[0-9]+', |
| 'loweralpha': '[a-z]', |
| 'upperalpha': '[A-Z]', |
| 'lowerroman': '[ivxlcdm]+', |
| 'upperroman': '[IVXLCDM]+',} |
| enum.converters = {'arabic': int, |
| 'loweralpha': _loweralpha_to_int, |
| 'upperalpha': _upperalpha_to_int, |
| 'lowerroman': _lowerroman_to_int, |
| 'upperroman': roman.fromRoman} |
| |
| enum.sequenceregexps = {} |
| for sequence in enum.sequences: |
| enum.sequenceregexps[sequence] = re.compile( |
| enum.sequencepats[sequence] + '$') |
| |
| grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$') |
| """Matches the top (& bottom) of a full table).""" |
| |
| simple_table_top_pat = re.compile('=+( +=+)+ *$') |
| """Matches the top of a simple table.""" |
| |
| simple_table_border_pat = re.compile('=+[ =]*$') |
| """Matches the bottom & header bottom of a simple table.""" |
| |
| pats = {} |
| """Fragments of patterns used by transitions.""" |
| |
| pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]' |
| pats['alpha'] = '[a-zA-Z]' |
| pats['alphanum'] = '[a-zA-Z0-9]' |
| pats['alphanumplus'] = '[a-zA-Z0-9_-]' |
| pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s' |
| '|%(upperroman)s|#)' % enum.sequencepats) |
| pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats |
| # @@@ Loosen up the pattern? Allow Unicode? |
| pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats |
| pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats |
| pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats |
| pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats |
| |
| for format in enum.formats: |
| pats[format] = '(?P<%s>%s%s%s)' % ( |
| format, re.escape(enum.formatinfo[format].prefix), |
| pats['enum'], re.escape(enum.formatinfo[format].suffix)) |
| |
| patterns = { |
| 'bullet': u'[-+*\u2022\u2023\u2043]( +|$)', |
| 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats, |
| 'field_marker': r':(?![: ])([^:\\]|\\.)*(?<! ):( +|$)', |
| 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats, |
| 'doctest': r'>>>( +|$)', |
| 'line_block': r'\|( +|$)', |
| 'grid_table_top': grid_table_top_pat, |
| 'simple_table_top': simple_table_top_pat, |
| 'explicit_markup': r'\.\.( +|$)', |
| 'anonymous': r'__( +|$)', |
| 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats, |
| 'text': r''} |
| initial_transitions = ( |
| 'bullet', |
| 'enumerator', |
| 'field_marker', |
| 'option_marker', |
| 'doctest', |
| 'line_block', |
| 'grid_table_top', |
| 'simple_table_top', |
| 'explicit_markup', |
| 'anonymous', |
| 'line', |
| 'text') |
| |
| def indent(self, match, context, next_state): |
| """Block quote.""" |
| indented, indent, line_offset, blank_finish = \ |
| self.state_machine.get_indented() |
| elements = self.block_quote(indented, line_offset) |
| self.parent += elements |
| if not blank_finish: |
| self.parent += self.unindent_warning('Block quote') |
| return context, next_state, [] |
| |
| def block_quote(self, indented, line_offset): |
| elements = [] |
| while indented: |
| (blockquote_lines, |
| attribution_lines, |
| attribution_offset, |
| indented, |
| new_line_offset) = self.split_attribution(indented, line_offset) |
| blockquote = nodes.block_quote() |
| self.nested_parse(blockquote_lines, line_offset, blockquote) |
| elements.append(blockquote) |
| if attribution_lines: |
| attribution, messages = self.parse_attribution( |
| attribution_lines, attribution_offset) |
| blockquote += attribution |
| elements += messages |
| line_offset = new_line_offset |
| while indented and not indented[0]: |
| indented = indented[1:] |
| line_offset += 1 |
| return elements |
| |
| # U+2014 is an em-dash: |
| attribution_pattern = re.compile(u'(---?(?!-)|\u2014) *(?=[^ \\n])') |
| |
| def split_attribution(self, indented, line_offset): |
| """ |
| Check for a block quote attribution and split it off: |
| |
| * First line after a blank line must begin with a dash ("--", "---", |
| em-dash; matches `self.attribution_pattern`). |
| * Every line after that must have consistent indentation. |
| * Attributions must be preceded by block quote content. |
| |
| Return a tuple of: (block quote content lines, content offset, |
| attribution lines, attribution offset, remaining indented lines). |
| """ |
| blank = None |
| nonblank_seen = False |
| for i in range(len(indented)): |
| line = indented[i].rstrip() |
| if line: |
| if nonblank_seen and blank == i - 1: # last line blank |
| match = self.attribution_pattern.match(line) |
| if match: |
| attribution_end, indent = self.check_attribution( |
| indented, i) |
| if attribution_end: |
| a_lines = indented[i:attribution_end] |
| a_lines.trim_left(match.end(), end=1) |
| a_lines.trim_left(indent, start=1) |
| return (indented[:i], a_lines, |
| i, indented[attribution_end:], |
| line_offset + attribution_end) |
| nonblank_seen = True |
| else: |
| blank = i |
| else: |
| return (indented, None, None, None, None) |
| |
| def check_attribution(self, indented, attribution_start): |
| """ |
| Check attribution shape. |
| Return the index past the end of the attribution, and the indent. |
| """ |
| indent = None |
| i = attribution_start + 1 |
| for i in range(attribution_start + 1, len(indented)): |
| line = indented[i].rstrip() |
| if not line: |
| break |
| if indent is None: |
| indent = len(line) - len(line.lstrip()) |
| elif len(line) - len(line.lstrip()) != indent: |
| return None, None # bad shape; not an attribution |
| else: |
| # return index of line after last attribution line: |
| i += 1 |
| return i, (indent or 0) |
| |
| def parse_attribution(self, indented, line_offset): |
| text = '\n'.join(indented).rstrip() |
| lineno = self.state_machine.abs_line_number() + line_offset |
| textnodes, messages = self.inline_text(text, lineno) |
| node = nodes.attribution(text, '', *textnodes) |
| node.line = lineno |
| # report with source and source-line results in |
| # ``IndexError: list index out of range`` |
| # node.source, node.line = self.state_machine.get_source_and_line(lineno) |
| return node, messages |
| |
| def bullet(self, match, context, next_state): |
| """Bullet list item.""" |
| bulletlist = nodes.bullet_list() |
| self.parent += bulletlist |
| bulletlist['bullet'] = match.string[0] |
| i, blank_finish = self.list_item(match.end()) |
| bulletlist += i |
| offset = self.state_machine.line_offset + 1 # next line |
| new_line_offset, blank_finish = self.nested_list_parse( |
| self.state_machine.input_lines[offset:], |
| input_offset=self.state_machine.abs_line_offset() + 1, |
| node=bulletlist, initial_state='BulletList', |
| blank_finish=blank_finish) |
| self.goto_line(new_line_offset) |
| if not blank_finish: |
| self.parent += self.unindent_warning('Bullet list') |
| return [], next_state, [] |
| |
| def list_item(self, indent): |
| if self.state_machine.line[indent:]: |
| indented, line_offset, blank_finish = ( |
| self.state_machine.get_known_indented(indent)) |
| else: |
| indented, indent, line_offset, blank_finish = ( |
| self.state_machine.get_first_known_indented(indent)) |
| listitem = nodes.list_item('\n'.join(indented)) |
| if indented: |
| self.nested_parse(indented, input_offset=line_offset, |
| node=listitem) |
| return listitem, blank_finish |
| |
| def enumerator(self, match, context, next_state): |
| """Enumerated List Item""" |
| format, sequence, text, ordinal = self.parse_enumerator(match) |
| if not self.is_enumerated_list_item(ordinal, sequence, format): |
| raise statemachine.TransitionCorrection('text') |
| enumlist = nodes.enumerated_list() |
| self.parent += enumlist |
| if sequence == '#': |
| enumlist['enumtype'] = 'arabic' |
| else: |
| enumlist['enumtype'] = sequence |
| enumlist['prefix'] = self.enum.formatinfo[format].prefix |
| enumlist['suffix'] = self.enum.formatinfo[format].suffix |
| if ordinal != 1: |
| enumlist['start'] = ordinal |
| src, srcline = self.state_machine.get_source_and_line() |
| msg = self.reporter.info( |
| 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)' |
| % (text, ordinal), source=src, line=srcline) |
| self.parent += msg |
| listitem, blank_finish = self.list_item(match.end()) |
| enumlist += listitem |
| offset = self.state_machine.line_offset + 1 # next line |
| newline_offset, blank_finish = self.nested_list_parse( |
| self.state_machine.input_lines[offset:], |
| input_offset=self.state_machine.abs_line_offset() + 1, |
| node=enumlist, initial_state='EnumeratedList', |
| blank_finish=blank_finish, |
| extra_settings={'lastordinal': ordinal, |
| 'format': format, |
| 'auto': sequence == '#'}) |
| self.goto_line(newline_offset) |
| if not blank_finish: |
| self.parent += self.unindent_warning('Enumerated list') |
| return [], next_state, [] |
| |
| def parse_enumerator(self, match, expected_sequence=None): |
| """ |
| Analyze an enumerator and return the results. |
| |
| :Return: |
| - the enumerator format ('period', 'parens', or 'rparen'), |
| - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.), |
| - the text of the enumerator, stripped of formatting, and |
| - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.; |
| ``None`` is returned for invalid enumerator text). |
| |
| The enumerator format has already been determined by the regular |
| expression match. If `expected_sequence` is given, that sequence is |
| tried first. If not, we check for Roman numeral 1. This way, |
| single-character Roman numerals (which are also alphabetical) can be |
| matched. If no sequence has been matched, all sequences are checked in |
| order. |
| """ |
| groupdict = match.groupdict() |
| sequence = '' |
| for format in self.enum.formats: |
| if groupdict[format]: # was this the format matched? |
| break # yes; keep `format` |
| else: # shouldn't happen |
| raise ParserError('enumerator format not matched') |
| text = groupdict[format][self.enum.formatinfo[format].start |
| :self.enum.formatinfo[format].end] |
| if text == '#': |
| sequence = '#' |
| elif expected_sequence: |
| try: |
| if self.enum.sequenceregexps[expected_sequence].match(text): |
| sequence = expected_sequence |
| except KeyError: # shouldn't happen |
| raise ParserError('unknown enumerator sequence: %s' |
| % sequence) |
| elif text == 'i': |
| sequence = 'lowerroman' |
| elif text == 'I': |
| sequence = 'upperroman' |
| if not sequence: |
| for sequence in self.enum.sequences: |
| if self.enum.sequenceregexps[sequence].match(text): |
| break |
| else: # shouldn't happen |
| raise ParserError('enumerator sequence not matched') |
| if sequence == '#': |
| ordinal = 1 |
| else: |
| try: |
| ordinal = self.enum.converters[sequence](text) |
| except roman.InvalidRomanNumeralError: |
| ordinal = None |
| return format, sequence, text, ordinal |
| |
| def is_enumerated_list_item(self, ordinal, sequence, format): |
| """ |
| Check validity based on the ordinal value and the second line. |
| |
| Return true if the ordinal is valid and the second line is blank, |
| indented, or starts with the next enumerator or an auto-enumerator. |
| """ |
| if ordinal is None: |
| return None |
| try: |
| next_line = self.state_machine.next_line() |
| except EOFError: # end of input lines |
| self.state_machine.previous_line() |
| return 1 |
| else: |
| self.state_machine.previous_line() |
| if not next_line[:1].strip(): # blank or indented |
| return 1 |
| result = self.make_enumerator(ordinal + 1, sequence, format) |
| if result: |
| next_enumerator, auto_enumerator = result |
| try: |
| if ( next_line.startswith(next_enumerator) or |
| next_line.startswith(auto_enumerator) ): |
| return 1 |
| except TypeError: |
| pass |
| return None |
| |
| def make_enumerator(self, ordinal, sequence, format): |
| """ |
| Construct and return the next enumerated list item marker, and an |
| auto-enumerator ("#" instead of the regular enumerator). |
| |
| Return ``None`` for invalid (out of range) ordinals. |
| """ #" |
| if sequence == '#': |
| enumerator = '#' |
| elif sequence == 'arabic': |
| enumerator = str(ordinal) |
| else: |
| if sequence.endswith('alpha'): |
| if ordinal > 26: |
| return None |
| enumerator = chr(ordinal + ord('a') - 1) |
| elif sequence.endswith('roman'): |
| try: |
| enumerator = roman.toRoman(ordinal) |
| except roman.RomanError: |
| return None |
| else: # shouldn't happen |
| raise ParserError('unknown enumerator sequence: "%s"' |
| % sequence) |
| if sequence.startswith('lower'): |
| enumerator = enumerator.lower() |
| elif sequence.startswith('upper'): |
| enumerator = enumerator.upper() |
| else: # shouldn't happen |
| raise ParserError('unknown enumerator sequence: "%s"' |
| % sequence) |
| formatinfo = self.enum.formatinfo[format] |
| next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix |
| + ' ') |
| auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' ' |
| return next_enumerator, auto_enumerator |
| |
| def field_marker(self, match, context, next_state): |
| """Field list item.""" |
| field_list = nodes.field_list() |
| self.parent += field_list |
| field, blank_finish = self.field(match) |
| field_list += field |
| offset = self.state_machine.line_offset + 1 # next line |
| newline_offset, blank_finish = self.nested_list_parse( |
| self.state_machine.input_lines[offset:], |
| input_offset=self.state_machine.abs_line_offset() + 1, |
| node=field_list, initial_state='FieldList', |
| blank_finish=blank_finish) |
| self.goto_line(newline_offset) |
| if not blank_finish: |
| self.parent += self.unindent_warning('Field list') |
| return [], next_state, [] |
| |
| def field(self, match): |
| name = self.parse_field_marker(match) |
| src, srcline = self.state_machine.get_source_and_line() |
| lineno = self.state_machine.abs_line_number() |
| indented, indent, line_offset, blank_finish = \ |
| self.state_machine.get_first_known_indented(match.end()) |
| field_node = nodes.field() |
| field_node.source = src |
| field_node.line = srcline |
| name_nodes, name_messages = self.inline_text(name, lineno) |
| field_node += nodes.field_name(name, '', *name_nodes) |
| field_body = nodes.field_body('\n'.join(indented), *name_messages) |
| field_node += field_body |
| if indented: |
| self.parse_field_body(indented, line_offset, field_body) |
| return field_node, blank_finish |
| |
| def parse_field_marker(self, match): |
| """Extract & return field name from a field marker match.""" |
| field = match.group()[1:] # strip off leading ':' |
| field = field[:field.rfind(':')] # strip off trailing ':' etc. |
| return field |
| |
| def parse_field_body(self, indented, offset, node): |
| self.nested_parse(indented, input_offset=offset, node=node) |
| |
| def option_marker(self, match, context, next_state): |
| """Option list item.""" |
| optionlist = nodes.option_list() |
| try: |
| listitem, blank_finish = self.option_list_item(match) |
| except MarkupError, error: |
| # This shouldn't happen; pattern won't match. |
| src, srcline = self.state_machine.get_source_and_line() |
| msg = self.reporter.error('Invalid option list marker: %s' % |
| str(error), source=src, line=srcline) |
| self.parent += msg |
| indented, indent, line_offset, blank_finish = \ |
| self.state_machine.get_first_known_indented(match.end()) |
| elements = self.block_quote(indented, line_offset) |
| self.parent += elements |
| if not blank_finish: |
| self.parent += self.unindent_warning('Option list') |
| return [], next_state, [] |
| self.parent += optionlist |
| optionlist += listitem |
| offset = self.state_machine.line_offset + 1 # next line |
| newline_offset, blank_finish = self.nested_list_parse( |
| self.state_machine.input_lines[offset:], |
| input_offset=self.state_machine.abs_line_offset() + 1, |
| node=optionlist, initial_state='OptionList', |
| blank_finish=blank_finish) |
| self.goto_line(newline_offset) |
| if not blank_finish: |
| self.parent += self.unindent_warning('Option list') |
| return [], next_state, [] |
| |
| def option_list_item(self, match): |
| offset = self.state_machine.abs_line_offset() |
| options = self.parse_option_marker(match) |
| indented, indent, line_offset, blank_finish = \ |
| self.state_machine.get_first_known_indented(match.end()) |
| if not indented: # not an option list item |
| self.goto_line(offset) |
| raise statemachine.TransitionCorrection('text') |
| option_group = nodes.option_group('', *options) |
| description = nodes.description('\n'.join(indented)) |
| option_list_item = nodes.option_list_item('', option_group, |
| description) |
| if indented: |
| self.nested_parse(indented, input_offset=line_offset, |
| node=description) |
| return option_list_item, blank_finish |
| |
| def parse_option_marker(self, match): |
| """ |
| Return a list of `node.option` and `node.option_argument` objects, |
| parsed from an option marker match. |
| |
| :Exception: `MarkupError` for invalid option markers. |
| """ |
| optlist = [] |
| optionstrings = match.group().rstrip().split(', ') |
| for optionstring in optionstrings: |
| tokens = optionstring.split() |
| delimiter = ' ' |
| firstopt = tokens[0].split('=') |
| if len(firstopt) > 1: |
| # "--opt=value" form |
| tokens[:1] = firstopt |
| delimiter = '=' |
| elif (len(tokens[0]) > 2 |
| and ((tokens[0].startswith('-') |
| and not tokens[0].startswith('--')) |
| or tokens[0].startswith('+'))): |
| # "-ovalue" form |
| tokens[:1] = [tokens[0][:2], tokens[0][2:]] |
| delimiter = '' |
| if len(tokens) > 1 and (tokens[1].startswith('<') |
| and tokens[-1].endswith('>')): |
| # "-o <value1 value2>" form; join all values into one token |
| tokens[1:] = [' '.join(tokens[1:])] |
| if 0 < len(tokens) <= 2: |
| option = nodes.option(optionstring) |
| option += nodes.option_string(tokens[0], tokens[0]) |
| if len(tokens) > 1: |
| option += nodes.option_argument(tokens[1], tokens[1], |
| delimiter=delimiter) |
| optlist.append(option) |
| else: |
| raise MarkupError( |
| 'wrong number of option tokens (=%s), should be 1 or 2: ' |
| '"%s"' % (len(tokens), optionstring)) |
| return optlist |
| |
| def doctest(self, match, context, next_state): |
| data = '\n'.join(self.state_machine.get_text_block()) |
| self.parent += nodes.doctest_block(data, data) |
| return [], next_state, [] |
| |
| def line_block(self, match, context, next_state): |
| """First line of a line block.""" |
| block = nodes.line_block() |
| self.parent += block |
| lineno = self.state_machine.abs_line_number() |
| line, messages, blank_finish = self.line_block_line(match, lineno) |
| block += line |
| self.parent += messages |
| if not blank_finish: |
| offset = self.state_machine.line_offset + 1 # next line |
| new_line_offset, blank_finish = self.nested_list_parse( |
| self.state_machine.input_lines[offset:], |
| input_offset=self.state_machine.abs_line_offset() + 1, |
| node=block, initial_state='LineBlock', |
| blank_finish=0) |
| self.goto_line(new_line_offset) |
| if not blank_finish: |
| src, srcline = self.state_machine.get_source_and_line() |
| self.parent += self.reporter.warning( |
| 'Line block ends without a blank line.', |
| source=src, line=srcline+1) |
| if len(block): |
| if block[0].indent is None: |
| block[0].indent = 0 |
| self.nest_line_block_lines(block) |
| return [], next_state, [] |
| |
| def line_block_line(self, match, lineno): |
| """Return one line element of a line_block.""" |
| indented, indent, line_offset, blank_finish = \ |
| self.state_machine.get_first_known_indented(match.end(), |
| until_blank=1) |
| text = u'\n'.join(indented) |
| text_nodes, messages = self.inline_text(text, lineno) |
| line = nodes.line(text, '', *text_nodes) |
| if match.string.rstrip() != '|': # not empty |
| line.indent = len(match.group(1)) - 1 |
| return line, messages, blank_finish |
| |
| def nest_line_block_lines(self, block): |
| for index in range(1, len(block)): |
| if block[index].indent is None: |
| block[index].indent = block[index - 1].indent |
| self.nest_line_block_segment(block) |
| |
| def nest_line_block_segment(self, block): |
| indents = [item.indent for item in block] |
| least = min(indents) |
| new_items = [] |
| new_block = nodes.line_block() |
| for item in block: |
| if item.indent > least: |
| new_block.append(item) |
| else: |
| if len(new_block): |
| self.nest_line_block_segment(new_block) |
| new_items.append(new_block) |
| new_block = nodes.line_block() |
| new_items.append(item) |
| if len(new_block): |
| self.nest_line_block_segment(new_block) |
| new_items.append(new_block) |
| block[:] = new_items |
| |
| def grid_table_top(self, match, context, next_state): |
| """Top border of a full table.""" |
| return self.table_top(match, context, next_state, |
| self.isolate_grid_table, |
| tableparser.GridTableParser) |
| |
| def simple_table_top(self, match, context, next_state): |
| """Top border of a simple table.""" |
| return self.table_top(match, context, next_state, |
| self.isolate_simple_table, |
| tableparser.SimpleTableParser) |
| |
| def table_top(self, match, context, next_state, |
| isolate_function, parser_class): |
| """Top border of a generic table.""" |
| nodelist, blank_finish = self.table(isolate_function, parser_class) |
| self.parent += nodelist |
| if not blank_finish: |
| src, srcline = self.state_machine.get_source_and_line() |
| msg = self.reporter.warning( |
| 'Blank line required after table.', |
| source=src, line=srcline+1) |
| self.parent += msg |
| return [], next_state, [] |
| |
| def table(self, isolate_function, parser_class): |
| """Parse a table.""" |
| block, messages, blank_finish = isolate_function() |
| if block: |
| try: |
| parser = parser_class() |
| tabledata = parser.parse(block) |
| tableline = (self.state_machine.abs_line_number() - len(block) |
| + 1) |
| table = self.build_table(tabledata, tableline) |
| nodelist = [table] + messages |
| except tableparser.TableMarkupError, detail: |
| nodelist = self.malformed_table( |
| block, ' '.join(detail.args)) + messages |
| else: |
| nodelist = messages |
| return nodelist, blank_finish |
| |
| def isolate_grid_table(self): |
| messages = [] |
| blank_finish = 1 |
| try: |
| block = self.state_machine.get_text_block(flush_left=1) |
| except statemachine.UnexpectedIndentationError, instance: |
| block, src, srcline = instance.args |
| messages.append(self.reporter.error('Unexpected indentation.', |
| source=src, line=srcline)) |
| blank_finish = 0 |
| block.disconnect() |
| # for East Asian chars: |
| block.pad_double_width(self.double_width_pad_char) |
| width = len(block[0].strip()) |
| for i in range(len(block)): |
| block[i] = block[i].strip() |
| if block[i][0] not in '+|': # check left edge |
| blank_finish = 0 |
| self.state_machine.previous_line(len(block) - i) |
| del block[i:] |
| break |
| if not self.grid_table_top_pat.match(block[-1]): # find bottom |
| blank_finish = 0 |
| # from second-last to third line of table: |
| for i in range(len(block) - 2, 1, -1): |
| if self.grid_table_top_pat.match(block[i]): |
| self.state_machine.previous_line(len(block) - i + 1) |
| del block[i+1:] |
| break |
| else: |
| messages.extend(self.malformed_table(block)) |
| return [], messages, blank_finish |
| for i in range(len(block)): # check right edge |
| if len(block[i]) != width or block[i][-1] not in '+|': |
| messages.extend(self.malformed_table(block)) |
| return [], messages, blank_finish |
| return block, messages, blank_finish |
| |
| def isolate_simple_table(self): |
| start = self.state_machine.line_offset |
| lines = self.state_machine.input_lines |
| limit = len(lines) - 1 |
| toplen = len(lines[start].strip()) |
| pattern_match = self.simple_table_border_pat.match |
| found = 0 |
| found_at = None |
| i = start + 1 |
| while i <= limit: |
| line = lines[i] |
| match = pattern_match(line) |
| if match: |
| if len(line.strip()) != toplen: |
| self.state_machine.next_line(i - start) |
| messages = self.malformed_table( |
| lines[start:i+1], 'Bottom/header table border does ' |
| 'not match top border.') |
| return [], messages, i == limit or not lines[i+1].strip() |
| found += 1 |
| found_at = i |
| if found == 2 or i == limit or not lines[i+1].strip(): |
| end = i |
| break |
| i += 1 |
| else: # reached end of input_lines |
| if found: |
| extra = ' or no blank line after table bottom' |
| self.state_machine.next_line(found_at - start) |
| block = lines[start:found_at+1] |
| else: |
| extra = '' |
| self.state_machine.next_line(i - start - 1) |
| block = lines[start:] |
| messages = self.malformed_table( |
| block, 'No bottom table border found%s.' % extra) |
| return [], messages, not extra |
| self.state_machine.next_line(end - start) |
| block = lines[start:end+1] |
| # for East Asian chars: |
| block.pad_double_width(self.double_width_pad_char) |
| return block, [], end == limit or not lines[end+1].strip() |
| |
| def malformed_table(self, block, detail=''): |
| block.replace(self.double_width_pad_char, '') |
| data = '\n'.join(block) |
| message = 'Malformed table.' |
| startline = self.state_machine.abs_line_number() - len(block) + 1 |
| src, srcline = self.state_machine.get_source_and_line(startline) |
| if detail: |
| message += '\n' + detail |
| error = self.reporter.error(message, nodes.literal_block(data, data), |
| source=src, line=srcline) |
| return [error] |
| |
| def build_table(self, tabledata, tableline, stub_columns=0): |
| colwidths, headrows, bodyrows = tabledata |
| table = nodes.table() |
| tgroup = nodes.tgroup(cols=len(colwidths)) |
| table += tgroup |
| for colwidth in colwidths: |
| colspec = nodes.colspec(colwidth=colwidth) |
| if stub_columns: |
| colspec.attributes['stub'] = 1 |
| stub_columns -= 1 |
| tgroup += colspec |
| if headrows: |
| thead = nodes.thead() |
| tgroup += thead |
| for row in headrows: |
| thead += self.build_table_row(row, tableline) |
| tbody = nodes.tbody() |
| tgroup += tbody |
| for row in bodyrows: |
| tbody += self.build_table_row(row, tableline) |
| return table |
| |
| def build_table_row(self, rowdata, tableline): |
| row = nodes.row() |
| for cell in rowdata: |
| if cell is None: |
| continue |
| morerows, morecols, offset, cellblock = cell |
| attributes = {} |
| if morerows: |
| attributes['morerows'] = morerows |
| if morecols: |
| attributes['morecols'] = morecols |
| entry = nodes.entry(**attributes) |
| row += entry |
| if ''.join(cellblock): |
| self.nested_parse(cellblock, input_offset=tableline+offset, |
| node=entry) |
| return row |
| |
| |
| explicit = Struct() |
| """Patterns and constants used for explicit markup recognition.""" |
| |
| explicit.patterns = Struct( |
| target=re.compile(r""" |
| ( |
| _ # anonymous target |
| | # *OR* |
| (?!_) # no underscore at the beginning |
| (?P<quote>`?) # optional open quote |
| (?![ `]) # first char. not space or |
| # backquote |
| (?P<name> # reference name |
| .+? |
| ) |
| %(non_whitespace_escape_before)s |
| (?P=quote) # close quote if open quote used |
| ) |
| (?<!(?<!\x00):) # no unescaped colon at end |
| %(non_whitespace_escape_before)s |
| [ ]? # optional space |
| : # end of reference name |
| ([ ]+|$) # followed by whitespace |
| """ % vars(Inliner), re.VERBOSE), |
| reference=re.compile(r""" |
| ( |
| (?P<simple>%(simplename)s)_ |
| | # *OR* |
| ` # open backquote |
| (?![ ]) # not space |
| (?P<phrase>.+?) # hyperlink phrase |
| %(non_whitespace_escape_before)s |
| `_ # close backquote, |
| # reference mark |
| ) |
| $ # end of string |
| """ % vars(Inliner), re.VERBOSE | re.UNICODE), |
| substitution=re.compile(r""" |
| ( |
| (?![ ]) # first char. not space |
| (?P<name>.+?) # substitution text |
| %(non_whitespace_escape_before)s |
| \| # close delimiter |
| ) |
| ([ ]+|$) # followed by whitespace |
| """ % vars(Inliner), re.VERBOSE),) |
| |
| def footnote(self, match): |
| src, srcline = self.state_machine.get_source_and_line() |
| indented, indent, offset, blank_finish = \ |
| self.state_machine.get_first_known_indented(match.end()) |
| label = match.group(1) |
| name = normalize_name(label) |
| footnote = nodes.footnote('\n'.join(indented)) |
| footnote.source = src |
| footnote.line = srcline |
| if name[0] == '#': # auto-numbered |
| name = name[1:] # autonumber label |
| footnote['auto'] = 1 |
| if name: |
| footnote['names'].append(name) |
| self.document.note_autofootnote(footnote) |
| elif name == '*': # auto-symbol |
| name = '' |
| footnote['auto'] = '*' |
| self.document.note_symbol_footnote(footnote) |
| else: # manually numbered |
| footnote += nodes.label('', label) |
| footnote['names'].append(name) |
| self.document.note_footnote(footnote) |
| if name: |
| self.document.note_explicit_target(footnote, footnote) |
| else: |
| self.document.set_id(footnote, footnote) |
| if indented: |
| self.nested_parse(indented, input_offset=offset, node=footnote) |
| return [footnote], blank_finish |
| |
| def citation(self, match): |
| src, srcline = self.state_machine.get_source_and_line() |
| indented, indent, offset, blank_finish = \ |
| self.state_machine.get_first_known_indented(match.end()) |
| label = match.group(1) |
| name = normalize_name(label) |
| citation = nodes.citation('\n'.join(indented)) |
| citation.source = src |
| citation.line = srcline |
| citation += nodes.label('', label) |
| citation['names'].append(name) |
| self.document.note_citation(citation) |
| self.document.note_explicit_target(citation, citation) |
| if indented: |
| self.nested_parse(indented, input_offset=offset, node=citation) |
| return [citation], blank_finish |
| |
| def hyperlink_target(self, match): |
| pattern = self.explicit.patterns.target |
| lineno = self.state_machine.abs_line_number() |
| src, srcline = self.state_machine.get_source_and_line() |
| block, indent, offset, blank_finish = \ |
| self.state_machine.get_first_known_indented( |
| match.end(), until_blank=1, strip_indent=0) |
| blocktext = match.string[:match.end()] + '\n'.join(block) |
| block = [escape2null(line) for line in block] |
| escaped = block[0] |
| blockindex = 0 |
| while 1: |
| targetmatch = pattern.match(escaped) |
| if targetmatch: |
| break |
| blockindex += 1 |
| try: |
| escaped += block[blockindex] |
| except IndexError: |
| raise MarkupError('malformed hyperlink target.') |
| del block[:blockindex] |
| block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip() |
| target = self.make_target(block, blocktext, lineno, |
| targetmatch.group('name')) |
| return [target], blank_finish |
| |
| def make_target(self, block, block_text, lineno, target_name): |
| target_type, data = self.parse_target(block, block_text, lineno) |
| if target_type == 'refname': |
| target = nodes.target(block_text, '', refname=normalize_name(data)) |
| target.indirect_reference_name = data |
| self.add_target(target_name, '', target, lineno) |
| self.document.note_indirect_target(target) |
| return target |
| elif target_type == 'refuri': |
| target = nodes.target(block_text, '') |
| self.add_target(target_name, data, target, lineno) |
| return target |
| else: |
| return data |
| |
| def parse_target(self, block, block_text, lineno): |
| """ |
| Determine the type of reference of a target. |
| |
| :Return: A 2-tuple, one of: |
| |
| - 'refname' and the indirect reference name |
| - 'refuri' and the URI |
| - 'malformed' and a system_message node |
| """ |
| if block and block[-1].strip()[-1:] == '_': # possible indirect target |
| reference = ' '.join([line.strip() for line in block]) |
| refname = self.is_reference(reference) |
| if refname: |
| return 'refname', refname |
| reference = ''.join([''.join(line.split()) for line in block]) |
| return 'refuri', unescape(reference) |
| |
| def is_reference(self, reference): |
| match = self.explicit.patterns.reference.match( |
| whitespace_normalize_name(reference)) |
| if not match: |
| return None |
| return unescape(match.group('simple') or match.group('phrase')) |
| |
| def add_target(self, targetname, refuri, target, lineno): |
| target.line = lineno |
| if targetname: |
| name = normalize_name(unescape(targetname)) |
| target['names'].append(name) |
| if refuri: |
| uri = self.inliner.adjust_uri(refuri) |
| if uri: |
| target['refuri'] = uri |
| else: |
| raise ApplicationError('problem with URI: %r' % refuri) |
| self.document.note_explicit_target(target, self.parent) |
| else: # anonymous target |
| if refuri: |
| target['refuri'] = refuri |
| target['anonymous'] = 1 |
| self.document.note_anonymous_target(target) |
| |
| def substitution_def(self, match): |
| pattern = self.explicit.patterns.substitution |
| src, srcline = self.state_machine.get_source_and_line() |
| block, indent, offset, blank_finish = \ |
| self.state_machine.get_first_known_indented(match.end(), |
| strip_indent=0) |
| blocktext = (match.string[:match.end()] + '\n'.join(block)) |
| block.disconnect() |
| escaped = escape2null(block[0].rstrip()) |
| blockindex = 0 |
| while 1: |
| subdefmatch = pattern.match(escaped) |
| if subdefmatch: |
| break |
| blockindex += 1 |
| try: |
| escaped = escaped + ' ' + escape2null(block[blockindex].strip()) |
| except IndexError: |
| raise MarkupError('malformed substitution definition.') |
| del block[:blockindex] # strip out the substitution marker |
| block[0] = (block[0].strip() + ' ')[subdefmatch.end()-len(escaped)-1:-1] |
| if not block[0]: |
| del block[0] |
| offset += 1 |
| while block and not block[-1].strip(): |
| block.pop() |
| subname = subdefmatch.group('name') |
| substitution_node = nodes.substitution_definition(blocktext) |
| substitution_node.source = src |
| substitution_node.line = srcline |
| if not block: |
| msg = self.reporter.warning( |
| 'Substitution definition "%s" missing contents.' % subname, |
| nodes.literal_block(blocktext, blocktext), |
| source=src, line=srcline) |
| return [msg], blank_finish |
| block[0] = block[0].strip() |
| substitution_node['names'].append( |
| nodes.whitespace_normalize_name(subname)) |
| new_abs_offset, blank_finish = self.nested_list_parse( |
| block, input_offset=offset, node=substitution_node, |
| initial_state='SubstitutionDef', blank_finish=blank_finish) |
| i = 0 |
| for node in substitution_node[:]: |
| if not (isinstance(node, nodes.Inline) or |
| isinstance(node, nodes.Text)): |
| self.parent += substitution_node[i] |
| del substitution_node[i] |
| else: |
| i += 1 |
| for node in substitution_node.traverse(nodes.Element): |
| if self.disallowed_inside_substitution_definitions(node): |
| pformat = nodes.literal_block('', node.pformat().rstrip()) |
| msg = self.reporter.error( |
| 'Substitution definition contains illegal element:', |
| pformat, nodes.literal_block(blocktext, blocktext), |
| source=src, line=srcline) |
| return [msg], blank_finish |
| if len(substitution_node) == 0: |
| msg = self.reporter.warning( |
| 'Substitution definition "%s" empty or invalid.' % subname, |
| nodes.literal_block(blocktext, blocktext), |
| source=src, line=srcline) |
| return [msg], blank_finish |
| self.document.note_substitution_def( |
| substitution_node, subname, self.parent) |
| return [substitution_node], blank_finish |
| |
| def disallowed_inside_substitution_definitions(self, node): |
| if (node['ids'] or |
| isinstance(node, nodes.reference) and node.get('anonymous') or |
| isinstance(node, nodes.footnote_reference) and node.get('auto')): |
| return 1 |
| else: |
| return 0 |
| |
| def directive(self, match, **option_presets): |
| """Returns a 2-tuple: list of nodes, and a "blank finish" boolean.""" |
| type_name = match.group(1) |
| directive_class, messages = directives.directive( |
| type_name, self.memo.language, self.document) |
| self.parent += messages |
| if directive_class: |
| return self.run_directive( |
| directive_class, match, type_name, option_presets) |
| else: |
| return self.unknown_directive(type_name) |
| |
| def run_directive(self, directive, match, type_name, option_presets): |
| """ |
| Parse a directive then run its directive function. |
| |
| Parameters: |
| |
| - `directive`: The class implementing the directive. Must be |
| a subclass of `rst.Directive`. |
| |
| - `match`: A regular expression match object which matched the first |
| line of the directive. |
| |
| - `type_name`: The directive name, as used in the source text. |
| |
| - `option_presets`: A dictionary of preset options, defaults for the |
| directive options. Currently, only an "alt" option is passed by |
| substitution definitions (value: the substitution name), which may |
| be used by an embedded image directive. |
| |
| Returns a 2-tuple: list of nodes, and a "blank finish" boolean. |
| """ |
| if isinstance(directive, (FunctionType, MethodType)): |
| from docutils.parsers.rst import convert_directive_function |
| directive = convert_directive_function(directive) |
| lineno = self.state_machine.abs_line_number() |
| src, srcline = self.state_machine.get_source_and_line() |
| initial_line_offset = self.state_machine.line_offset |
| indented, indent, line_offset, blank_finish \ |
| = self.state_machine.get_first_known_indented(match.end(), |
| strip_top=0) |
| block_text = '\n'.join(self.state_machine.input_lines[ |
| initial_line_offset : self.state_machine.line_offset + 1]) |
| try: |
| arguments, options, content, content_offset = ( |
| self.parse_directive_block(indented, line_offset, |
| directive, option_presets)) |
| except MarkupError, detail: |
| error = self.reporter.error( |
| 'Error in "%s" directive:\n%s.' % (type_name, |
| ' '.join(detail.args)), |
| nodes.literal_block(block_text, block_text), |
| source=src, line=srcline) |
| return [error], blank_finish |
| directive_instance = directive( |
| type_name, arguments, options, content, lineno, |
| content_offset, block_text, self, self.state_machine) |
| try: |
| result = directive_instance.run() |
| except docutils.parsers.rst.DirectiveError, error: |
| msg_node = self.reporter.system_message(error.level, error.msg, |
| source=src, line=srcline) |
| msg_node += nodes.literal_block(block_text, block_text) |
| result = [msg_node] |
| assert isinstance(result, list), \ |
| 'Directive "%s" must return a list of nodes.' % type_name |
| for i in range(len(result)): |
| assert isinstance(result[i], nodes.Node), \ |
| ('Directive "%s" returned non-Node object (index %s): %r' |
| % (type_name, i, result[i])) |
| return (result, |
| blank_finish or self.state_machine.is_next_line_blank()) |
| |
| def parse_directive_block(self, indented, line_offset, directive, |
| option_presets): |
| option_spec = directive.option_spec |
| has_content = directive.has_content |
| if indented and not indented[0].strip(): |
| indented.trim_start() |
| line_offset += 1 |
| while indented and not indented[-1].strip(): |
| indented.trim_end() |
| if indented and (directive.required_arguments |
| or directive.optional_arguments |
| or option_spec): |
| for i in range(len(indented)): |
| if not indented[i].strip(): |
| break |
| else: |
| i += 1 |
| arg_block = indented[:i] |
| content = indented[i+1:] |
| content_offset = line_offset + i + 1 |
| else: |
| content = indented |
| content_offset = line_offset |
| arg_block = [] |
| while content and not content[0].strip(): |
| content.trim_start() |
| content_offset += 1 |
| if option_spec: |
| options, arg_block = self.parse_directive_options( |
| option_presets, option_spec, arg_block) |
| if arg_block and not (directive.required_arguments |
| or directive.optional_arguments): |
| raise MarkupError('no arguments permitted; blank line ' |
| 'required before content block') |
| else: |
| options = {} |
| if directive.required_arguments or directive.optional_arguments: |
| arguments = self.parse_directive_arguments( |
| directive, arg_block) |
| else: |
| arguments = [] |
| if content and not has_content: |
| raise MarkupError('no content permitted') |
| return (arguments, options, content, content_offset) |
| |
| def parse_directive_options(self, option_presets, option_spec, arg_block): |
| options = option_presets.copy() |
| for i in range(len(arg_block)): |
| if arg_block[i][:1] == ':': |
| opt_block = arg_block[i:] |
| arg_block = arg_block[:i] |
| break |
| else: |
| opt_block = [] |
| if opt_block: |
| success, data = self.parse_extension_options(option_spec, |
| opt_block) |
| if success: # data is a dict of options |
| options.update(data) |
| else: # data is an error string |
| raise MarkupError(data) |
| return options, arg_block |
| |
| def parse_directive_arguments(self, directive, arg_block): |
| required = directive.required_arguments |
| optional = directive.optional_arguments |
| arg_text = '\n'.join(arg_block) |
| arguments = arg_text.split() |
| if len(arguments) < required: |
| raise MarkupError('%s argument(s) required, %s supplied' |
| % (required, len(arguments))) |
| elif len(arguments) > required + optional: |
| if directive.final_argument_whitespace: |
| arguments = arg_text.split(None, required + optional - 1) |
| else: |
| raise MarkupError( |
| 'maximum %s argument(s) allowed, %s supplied' |
| % (required + optional, len(arguments))) |
| return arguments |
| |
| def parse_extension_options(self, option_spec, datalines): |
| """ |
| Parse `datalines` for a field list containing extension options |
| matching `option_spec`. |
| |
| :Parameters: |
| - `option_spec`: a mapping of option name to conversion |
| function, which should raise an exception on bad input. |
| - `datalines`: a list of input strings. |
| |
| :Return: |
| - Success value, 1 or 0. |
| - An option dictionary on success, an error string on failure. |
| """ |
| node = nodes.field_list() |
| newline_offset, blank_finish = self.nested_list_parse( |
| datalines, 0, node, initial_state='ExtensionOptions', |
| blank_finish=1) |
| if newline_offset != len(datalines): # incomplete parse of block |
| return 0, 'invalid option block' |
| try: |
| options = utils.extract_extension_options(node, option_spec) |
| except KeyError, detail: |
| return 0, ('unknown option: "%s"' % detail.args[0]) |
| except (ValueError, TypeError), detail: |
| return 0, ('invalid option value: %s' % ' '.join(detail.args)) |
| except utils.ExtensionOptionError, detail: |
| return 0, ('invalid option data: %s' % ' '.join(detail.args)) |
| if blank_finish: |
| return 1, options |
| else: |
| return 0, 'option data incompletely parsed' |
| |
| def unknown_directive(self, type_name): |
| src, srcline = self.state_machine.get_source_and_line() |
| indented, indent, offset, blank_finish = \ |
| self.state_machine.get_first_known_indented(0, strip_indent=0) |
| text = '\n'.join(indented) |
| error = self.reporter.error( |
| 'Unknown directive type "%s".' % type_name, |
| nodes.literal_block(text, text), source=src, line=srcline) |
| return [error], blank_finish |
| |
| def comment(self, match): |
| if not match.string[match.end():].strip() \ |
| and self.state_machine.is_next_line_blank(): # an empty comment? |
| return [nodes.comment()], 1 # "A tiny but practical wart." |
| indented, indent, offset, blank_finish = \ |
| self.state_machine.get_first_known_indented(match.end()) |
| while indented and not indented[-1].strip(): |
| indented.trim_end() |
| text = '\n'.join(indented) |
| return [nodes.comment(text, text)], blank_finish |
| |
| explicit.constructs = [ |
| (footnote, |
| re.compile(r""" |
| \.\.[ ]+ # explicit markup start |
| \[ |
| ( # footnote label: |
| [0-9]+ # manually numbered footnote |
| | # *OR* |
| \# # anonymous auto-numbered footnote |
| | # *OR* |
| \#%s # auto-number ed?) footnote label |
| | # *OR* |
| \* # auto-symbol footnote |
| ) |
| \] |
| ([ ]+|$) # whitespace or end of line |
| """ % Inliner.simplename, re.VERBOSE | re.UNICODE)), |
| (citation, |
| re.compile(r""" |
| \.\.[ ]+ # explicit markup start |
| \[(%s)\] # citation label |
| ([ ]+|$) # whitespace or end of line |
| """ % Inliner.simplename, re.VERBOSE | re.UNICODE)), |
| (hyperlink_target, |
| re.compile(r""" |
| \.\.[ ]+ # explicit markup start |
| _ # target indicator |
| (?![ ]|$) # first char. not space or EOL |
| """, re.VERBOSE)), |
| (substitution_def, |
| re.compile(r""" |
| \.\.[ ]+ # explicit markup start |
| \| # substitution indicator |
| (?![ ]|$) # first char. not space or EOL |
| """, re.VERBOSE)), |
| (directive, |
| re.compile(r""" |
| \.\.[ ]+ # explicit markup start |
| (%s) # directive name |
| [ ]? # optional space |
| :: # directive delimiter |
| ([ ]+|$) # whitespace or end of line |
| """ % Inliner.simplename, re.VERBOSE | re.UNICODE))] |
| |
| def explicit_markup(self, match, context, next_state): |
| """Footnotes, hyperlink targets, directives, comments.""" |
| nodelist, blank_finish = self.explicit_construct(match) |
| self.parent += nodelist |
| self.explicit_list(blank_finish) |
| return [], next_state, [] |
| |
| def explicit_construct(self, match): |
| """Determine which explicit construct this is, parse & return it.""" |
| errors = [] |
| for method, pattern in self.explicit.constructs: |
| expmatch = pattern.match(match.string) |
| if expmatch: |
| try: |
| return method(self, expmatch) |
| except MarkupError, error: # never reached? |
| message = ' '.join(error.args) |
| src, srcline = self.state_machine.get_source_and_line() |
| errors.append(self.reporter.warning( |
| message, source=src, line=srcline)) |
| break |
| nodelist, blank_finish = self.comment(match) |
| return nodelist + errors, blank_finish |
| |
| def explicit_list(self, blank_finish): |
| """ |
| Create a nested state machine for a series of explicit markup |
| constructs (including anonymous hyperlink targets). |
| """ |
| offset = self.state_machine.line_offset + 1 # next line |
| newline_offset, blank_finish = self.nested_list_parse( |
| self.state_machine.input_lines[offset:], |
| input_offset=self.state_machine.abs_line_offset() + 1, |
| node=self.parent, initial_state='Explicit', |
| blank_finish=blank_finish, |
| match_titles=self.state_machine.match_titles) |
| self.goto_line(newline_offset) |
| if not blank_finish: |
| self.parent += self.unindent_warning('Explicit markup') |
| |
| def anonymous(self, match, context, next_state): |
| """Anonymous hyperlink targets.""" |
| nodelist, blank_finish = self.anonymous_target(match) |
| self.parent += nodelist |
| self.explicit_list(blank_finish) |
| return [], next_state, [] |
| |
| def anonymous_target(self, match): |
| lineno = self.state_machine.abs_line_number() |
| block, indent, offset, blank_finish \ |
| = self.state_machine.get_first_known_indented(match.end(), |
| until_blank=1) |
| blocktext = match.string[:match.end()] + '\n'.join(block) |
| block = [escape2null(line) for line in block] |
| target = self.make_target(block, blocktext, lineno, '') |
| return [target], blank_finish |
| |
| def line(self, match, context, next_state): |
| """Section title overline or transition marker.""" |
| if self.state_machine.match_titles: |
| return [match.string], 'Line', [] |
| elif match.string.strip() == '::': |
| raise statemachine.TransitionCorrection('text') |
| elif len(match.string.strip()) < 4: |
| msg = self.reporter.info( |
| 'Unexpected possible title overline or transition.\n' |
| "Treating it as ordinary text because it's so short.", |
| line=self.state_machine.abs_line_number()) |
| self.parent += msg |
| raise statemachine.TransitionCorrection('text') |
| else: |
| blocktext = self.state_machine.line |
| msg = self.reporter.severe( |
| 'Unexpected section title or transition.', |
| nodes.literal_block(blocktext, blocktext), |
| line=self.state_machine.abs_line_number()) |
| self.parent += msg |
| return [], next_state, [] |
| |
| def text(self, match, context, next_state): |
| """Titles, definition lists, paragraphs.""" |
| return [match.string], 'Text', [] |
| |
| |
| class RFC2822Body(Body): |
| |
| """ |
| RFC2822 headers are only valid as the first constructs in documents. As |
| soon as anything else appears, the `Body` state should take over. |
| """ |
| |
| patterns = Body.patterns.copy() # can't modify the original |
| patterns['rfc2822'] = r'[!-9;-~]+:( +|$)' |
| initial_transitions = [(name, 'Body') |
| for name in Body.initial_transitions] |
| initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text' |
| |
| def rfc2822(self, match, context, next_state): |
| """RFC2822-style field list item.""" |
| fieldlist = nodes.field_list(classes=['rfc2822']) |
| self.parent += fieldlist |
| field, blank_finish = self.rfc2822_field(match) |
| fieldlist += field |
| offset = self.state_machine.line_offset + 1 # next line |
| newline_offset, blank_finish = self.nested_list_parse( |
| self.state_machine.input_lines[offset:], |
| input_offset=self.state_machine.abs_line_offset() + 1, |
| node=fieldlist, initial_state='RFC2822List', |
| blank_finish=blank_finish) |
| self.goto_line(newline_offset) |
| if not blank_finish: |
| self.parent += self.unindent_warning( |
| 'RFC2822-style field list') |
| return [], next_state, [] |
| |
| def rfc2822_field(self, match): |
| name = match.string[:match.string.find(':')] |
| indented, indent, line_offset, blank_finish = \ |
| self.state_machine.get_first_known_indented(match.end(), |
| until_blank=1) |
| fieldnode = nodes.field() |
| fieldnode += nodes.field_name(name, name) |
| fieldbody = nodes.field_body('\n'.join(indented)) |
| fieldnode += fieldbody |
| if indented: |
| self.nested_parse(indented, input_offset=line_offset, |
| node=fieldbody) |
| return fieldnode, blank_finish |
| |
| |
| class SpecializedBody(Body): |
| |
| """ |
| Superclass for second and subsequent compound element members. Compound |
| elements are lists and list-like constructs. |
| |
| All transition methods are disabled (redefined as `invalid_input`). |
| Override individual methods in subclasses to re-enable. |
| |
| For example, once an initial bullet list item, say, is recognized, the |
| `BulletList` subclass takes over, with a "bullet_list" node as its |
| container. Upon encountering the initial bullet list item, `Body.bullet` |
| calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which |
| starts up a nested parsing session with `BulletList` as the initial state. |
| Only the ``bullet`` transition method is enabled in `BulletList`; as long |
| as only bullet list items are encountered, they are parsed and inserted |
| into the container. The first construct which is *not* a bullet list item |
| triggers the `invalid_input` method, which ends the nested parse and |
| closes the container. `BulletList` needs to recognize input that is |
| invalid in the context of a bullet list, which means everything *other |
| than* bullet list items, so it inherits the transition list created in |
| `Body`. |
| """ |
| |
| def invalid_input(self, match=None, context=None, next_state=None): |
| """Not a compound element member. Abort this state machine.""" |
| self.state_machine.previous_line() # back up so parent SM can reassess |
| raise EOFError |
| |
| indent = invalid_input |
| bullet = invalid_input |
| enumerator = invalid_input |
| field_marker = invalid_input |
| option_marker = invalid_input |
| doctest = invalid_input |
| line_block = invalid_input |
| grid_table_top = invalid_input |
| simple_table_top = invalid_input |
| explicit_markup = invalid_input |
| anonymous = invalid_input |
| line = invalid_input |
| text = invalid_input |
| |
| |
| class BulletList(SpecializedBody): |
| |
| """Second and subsequent bullet_list list_items.""" |
| |
| def bullet(self, match, context, next_state): |
| """Bullet list item.""" |
| if match.string[0] != self.parent['bullet']: |
| # different bullet: new list |
| self.invalid_input() |
| listitem, blank_finish = self.list_item(match.end()) |
| self.parent += listitem |
| self.blank_finish = blank_finish |
| return [], next_state, [] |
| |
| |
| class DefinitionList(SpecializedBody): |
| |
| """Second and subsequent definition_list_items.""" |
| |
| def text(self, match, context, next_state): |
| """Definition lists.""" |
| return [match.string], 'Definition', [] |
| |
| |
| class EnumeratedList(SpecializedBody): |
| |
| """Second and subsequent enumerated_list list_items.""" |
| |
| def enumerator(self, match, context, next_state): |
| """Enumerated list item.""" |
| format, sequence, text, ordinal = self.parse_enumerator( |
| match, self.parent['enumtype']) |
| if ( format != self.format |
| or (sequence != '#' and (sequence != self.parent['enumtype'] |
| or self.auto |
| or ordinal != (self.lastordinal + 1))) |
| or not self.is_enumerated_list_item(ordinal, sequence, format)): |
| # different enumeration: new list |
| self.invalid_input() |
| if sequence == '#': |
| self.auto = 1 |
| listitem, blank_finish = self.list_item(match.end()) |
| self.parent += listitem |
| self.blank_finish = blank_finish |
| self.lastordinal = ordinal |
| return [], next_state, [] |
| |
| |
| class FieldList(SpecializedBody): |
| |
| """Second and subsequent field_list fields.""" |
| |
| def field_marker(self, match, context, next_state): |
| """Field list field.""" |
| field, blank_finish = self.field(match) |
| self.parent += field |
| self.blank_finish = blank_finish |
| return [], next_state, [] |
| |
| |
| class OptionList(SpecializedBody): |
| |
| """Second and subsequent option_list option_list_items.""" |
| |
| def option_marker(self, match, context, next_state): |
| """Option list item.""" |
| try: |
| option_list_item, blank_finish = self.option_list_item(match) |
| except MarkupError: |
| self.invalid_input() |
| self.parent += option_list_item |
| self.blank_finish = blank_finish |
| return [], next_state, [] |
| |
| |
| class RFC2822List(SpecializedBody, RFC2822Body): |
| |
| """Second and subsequent RFC2822-style field_list fields.""" |
| |
| patterns = RFC2822Body.patterns |
| initial_transitions = RFC2822Body.initial_transitions |
| |
| def rfc2822(self, match, context, next_state): |
| """RFC2822-style field list item.""" |
| field, blank_finish = self.rfc2822_field(match) |
| self.parent += field |
| self.blank_finish = blank_finish |
| return [], 'RFC2822List', [] |
| |
| blank = SpecializedBody.invalid_input |
| |
| |
| class ExtensionOptions(FieldList): |
| |
| """ |
| Parse field_list fields for extension options. |
| |
| No nested parsing is done (including inline markup parsing). |
| """ |
| |
| def parse_field_body(self, indented, offset, node): |
| """Override `Body.parse_field_body` for simpler parsing.""" |
| lines = [] |
| for line in list(indented) + ['']: |
| if line.strip(): |
| lines.append(line) |
| elif lines: |
| text = '\n'.join(lines) |
| node += nodes.paragraph(text, text) |
| lines = [] |
| |
| |
| class LineBlock(SpecializedBody): |
| |
| """Second and subsequent lines of a line_block.""" |
| |
| blank = SpecializedBody.invalid_input |
| |
| def line_block(self, match, context, next_state): |
| """New line of line block.""" |
| lineno = self.state_machine.abs_line_number() |
| line, messages, blank_finish = self.line_block_line(match, lineno) |
| self.parent += line |
| self.parent.parent += messages |
| self.blank_finish = blank_finish |
| return [], next_state, [] |
| |
| |
| class Explicit(SpecializedBody): |
| |
| """Second and subsequent explicit markup construct.""" |
| |
| def explicit_markup(self, match, context, next_state): |
| """Footnotes, hyperlink targets, directives, comments.""" |
| nodelist, blank_finish = self.explicit_construct(match) |
| self.parent += nodelist |
| self.blank_finish = blank_finish |
| return [], next_state, [] |
| |
| def anonymous(self, match, context, next_state): |
| """Anonymous hyperlink targets.""" |
| nodelist, blank_finish = self.anonymous_target(match) |
| self.parent += nodelist |
| self.blank_finish = blank_finish |
| return [], next_state, [] |
| |
| blank = SpecializedBody.invalid_input |
| |
| |
| class SubstitutionDef(Body): |
| |
| """ |
| Parser for the contents of a substitution_definition element. |
| """ |
| |
| patterns = { |
| 'embedded_directive': re.compile(r'(%s)::( +|$)' |
| % Inliner.simplename, re.UNICODE), |
| 'text': r''} |
| initial_transitions = ['embedded_directive', 'text'] |
| |
| def embedded_directive(self, match, context, next_state): |
| nodelist, blank_finish = self.directive(match, |
| alt=self.parent['names'][0]) |
| self.parent += nodelist |
| if not self.state_machine.at_eof(): |
| self.blank_finish = blank_finish |
| raise EOFError |
| |
| def text(self, match, context, next_state): |
| if not self.state_machine.at_eof(): |
| self.blank_finish = self.state_machine.is_next_line_blank() |
| raise EOFError |
| |
| |
| class Text(RSTState): |
| |
| """ |
| Classifier of second line of a text block. |
| |
| Could be a paragraph, a definition list item, or a title. |
| """ |
| |
| patterns = {'underline': Body.patterns['line'], |
| 'text': r''} |
| initial_transitions = [('underline', 'Body'), ('text', 'Body')] |
| |
| def blank(self, match, context, next_state): |
| """End of paragraph.""" |
| paragraph, literalnext = self.paragraph( |
| context, self.state_machine.abs_line_number() - 1) |
| self.parent += paragraph |
| if literalnext: |
| self.parent += self.literal_block() |
| return [], 'Body', [] |
| |
| def eof(self, context): |
| if context: |
| self.blank(None, context, None) |
| return [] |
| |
| def indent(self, match, context, next_state): |
| """Definition list item.""" |
| definitionlist = nodes.definition_list() |
| definitionlistitem, blank_finish = self.definition_list_item(context) |
| definitionlist += definitionlistitem |
| self.parent += definitionlist |
| offset = self.state_machine.line_offset + 1 # next line |
| newline_offset, blank_finish = self.nested_list_parse( |
| self.state_machine.input_lines[offset:], |
| input_offset=self.state_machine.abs_line_offset() + 1, |
| node=definitionlist, initial_state='DefinitionList', |
| blank_finish=blank_finish, blank_finish_state='Definition') |
| self.goto_line(newline_offset) |
| if not blank_finish: |
| self.parent += self.unindent_warning('Definition list') |
| return [], 'Body', [] |
| |
| def underline(self, match, context, next_state): |
| """Section title.""" |
| lineno = self.state_machine.abs_line_number() |
| src, srcline = self.state_machine.get_source_and_line() |
| title = context[0].rstrip() |
| underline = match.string.rstrip() |
| source = title + '\n' + underline |
| messages = [] |
| if column_width(title) > len(underline): |
| if len(underline) < 4: |
| if self.state_machine.match_titles: |
| msg = self.reporter.info( |
| 'Possible title underline, too short for the title.\n' |
| "Treating it as ordinary text because it's so short.", |
| source=src, line=srcline) |
| self.parent += msg |
| raise statemachine.TransitionCorrection('text') |
| else: |
| blocktext = context[0] + '\n' + self.state_machine.line |
| msg = self.reporter.warning( |
| 'Title underline too short.', |
| nodes.literal_block(blocktext, blocktext), |
| source=src, line=srcline) |
| messages.append(msg) |
| if not self.state_machine.match_titles: |
| blocktext = context[0] + '\n' + self.state_machine.line |
| msg = self.reporter.severe( |
| 'Unexpected section title.', |
| nodes.literal_block(blocktext, blocktext), |
| source=src, line=srcline) |
| self.parent += messages |
| self.parent += msg |
| return [], next_state, [] |
| style = underline[0] |
| context[:] = [] |
| self.section(title, source, style, lineno - 1, messages) |
| return [], next_state, [] |
| |
| def text(self, match, context, next_state): |
| """Paragraph.""" |
| startline = self.state_machine.abs_line_number() - 1 |
| msg = None |
| try: |
| block = self.state_machine.get_text_block(flush_left=1) |
| except statemachine.UnexpectedIndentationError, instance: |
| block, src, srcline = instance.args |
| msg = self.reporter.error('Unexpected indentation.', |
| source=src, line=srcline) |
| lines = context + list(block) |
| paragraph, literalnext = self.paragraph(lines, startline) |
| self.parent += paragraph |
| self.parent += msg |
| if literalnext: |
| try: |
| self.state_machine.next_line() |
| except EOFError: |
| pass |
| self.parent += self.literal_block() |
| return [], next_state, [] |
| |
| def literal_block(self): |
| """Return a list of nodes.""" |
| indented, indent, offset, blank_finish = \ |
| self.state_machine.get_indented() |
| while indented and not indented[-1].strip(): |
| indented.trim_end() |
| if not indented: |
| return self.quoted_literal_block() |
| data = '\n'.join(indented) |
| literal_block = nodes.literal_block(data, data) |
| literal_block.line = offset + 1 |
| nodelist = [literal_block] |
| if not blank_finish: |
| nodelist.append(self.unindent_warning('Literal block')) |
| return nodelist |
| |
| def quoted_literal_block(self): |
| abs_line_offset = self.state_machine.abs_line_offset() |
| offset = self.state_machine.line_offset |
| parent_node = nodes.Element() |
| new_abs_offset = self.nested_parse( |
| self.state_machine.input_lines[offset:], |
| input_offset=abs_line_offset, node=parent_node, match_titles=0, |
| state_machine_kwargs={'state_classes': (QuotedLiteralBlock,), |
| 'initial_state': 'QuotedLiteralBlock'}) |
| self.goto_line(new_abs_offset) |
| return parent_node.children |
| |
| def definition_list_item(self, termline): |
| indented, indent, line_offset, blank_finish = \ |
| self.state_machine.get_indented() |
| definitionlistitem = nodes.definition_list_item( |
| '\n'.join(termline + list(indented))) |
| lineno = self.state_machine.abs_line_number() - 1 |
| src, srcline = self.state_machine.get_source_and_line() |
| definitionlistitem.source = src |
| definitionlistitem.line = srcline - 1 |
| termlist, messages = self.term(termline, lineno) |
| definitionlistitem += termlist |
| definition = nodes.definition('', *messages) |
| definitionlistitem += definition |
| if termline[0][-2:] == '::': |
| definition += self.reporter.info( |
| 'Blank line missing before literal block (after the "::")? ' |
| 'Interpreted as a definition list item.', |
| source=src, line=srcline) |
| self.nested_parse(indented, input_offset=line_offset, node=definition) |
| return definitionlistitem, blank_finish |
| |
| classifier_delimiter = re.compile(' +: +') |
| |
| def term(self, lines, lineno): |
| """Return a definition_list's term and optional classifiers.""" |
| assert len(lines) == 1 |
| text_nodes, messages = self.inline_text(lines[0], lineno) |
| term_node = nodes.term() |
| node_list = [term_node] |
| for i in range(len(text_nodes)): |
| node = text_nodes[i] |
| if isinstance(node, nodes.Text): |
| parts = self.classifier_delimiter.split(node.rawsource) |
| if len(parts) == 1: |
| node_list[-1] += node |
| else: |
| |
| node_list[-1] += nodes.Text(parts[0].rstrip()) |
| for part in parts[1:]: |
| classifier_node = nodes.classifier('', part) |
| node_list.append(classifier_node) |
| else: |
| node_list[-1] += node |
| return node_list, messages |
| |
| |
| class SpecializedText(Text): |
| |
| """ |
| Superclass for second and subsequent lines of Text-variants. |
| |
| All transition methods are disabled. Override individual methods in |
| subclasses to re-enable. |
| """ |
| |
| def eof(self, context): |
| """Incomplete construct.""" |
| return [] |
| |
| def invalid_input(self, match=None, context=None, next_state=None): |
| """Not a compound element member. Abort this state machine.""" |
| raise EOFError |
| |
| blank = invalid_input |
| indent = invalid_input |
| underline = invalid_input |
| text = invalid_input |
| |
| |
| class Definition(SpecializedText): |
| |
| """Second line of potential definition_list_item.""" |
| |
| def eof(self, context): |
| """Not a definition.""" |
| self.state_machine.previous_line(2) # so parent SM can reassess |
| return [] |
| |
| def indent(self, match, context, next_state): |
| """Definition list item.""" |
| definitionlistitem, blank_finish = self.definition_list_item(context) |
| self.parent += definitionlistitem |
| self.blank_finish = blank_finish |
| return [], 'DefinitionList', [] |
| |
| |
| class Line(SpecializedText): |
| |
| """ |
| Second line of over- & underlined section title or transition marker. |
| """ |
| |
| eofcheck = 1 # @@@ ??? |
| """Set to 0 while parsing sections, so that we don't catch the EOF.""" |
| |
| def eof(self, context): |
| """Transition marker at end of section or document.""" |
| marker = context[0].strip() |
| if self.memo.section_bubble_up_kludge: |
| self.memo.section_bubble_up_kludge = 0 |
| elif len(marker) < 4: |
| self.state_correction(context) |
| if self.eofcheck: # ignore EOFError with sections |
| lineno = self.state_machine.abs_line_number() - 1 |
| transition = nodes.transition(rawsource=context[0]) |
| transition.line = lineno |
| self.parent += transition |
| self.eofcheck = 1 |
| return [] |
| |
| def blank(self, match, context, next_state): |
| """Transition marker.""" |
| src, srcline = self.state_machine.get_source_and_line() |
| marker = context[0].strip() |
| if len(marker) < 4: |
| self.state_correction(context) |
| transition = nodes.transition(rawsource=marker) |
| transition.source = src |
| transition.line = srcline - 1 |
| self.parent += transition |
| return [], 'Body', [] |
| |
| def text(self, match, context, next_state): |
| """Potential over- & underlined title.""" |
| lineno = self.state_machine.abs_line_number() - 1 |
| src, srcline = self.state_machine.get_source_and_line() |
| overline = context[0] |
| title = match.string |
| underline = '' |
| try: |
| underline = self.state_machine.next_line() |
| except EOFError: |
| blocktext = overline + '\n' + title |
| if len(overline.rstrip()) < 4: |
| self.short_overline(context, blocktext, lineno, 2) |
| else: |
| msg = self.reporter.severe( |
| 'Incomplete section title.', |
| nodes.literal_block(blocktext, blocktext), |
| source=src, line=srcline-1) |
| self.parent += msg |
| return [], 'Body', [] |
| source = '%s\n%s\n%s' % (overline, title, underline) |
| overline = overline.rstrip() |
| underline = underline.rstrip() |
| if not self.transitions['underline'][0].match(underline): |
| blocktext = overline + '\n' + title + '\n' + underline |
| if len(overline.rstrip()) < 4: |
| self.short_overline(context, blocktext, lineno, 2) |
| else: |
| msg = self.reporter.severe( |
| 'Missing matching underline for section title overline.', |
| nodes.literal_block(source, source), |
| source=src, line=srcline-1) |
| self.parent += msg |
| return [], 'Body', [] |
| elif overline != underline: |
| blocktext = overline + '\n' + title + '\n' + underline |
| if len(overline.rstrip()) < 4: |
| self.short_overline(context, blocktext, lineno, 2) |
| else: |
| msg = self.reporter.severe( |
| 'Title overline & underline mismatch.', |
| nodes.literal_block(source, source), |
| source=src, line=srcline-1) |
| self.parent += msg |
| return [], 'Body', [] |
| title = title.rstrip() |
| messages = [] |
| if column_width(title) > len(overline): |
| blocktext = overline + '\n' + title + '\n' + underline |
| if len(overline.rstrip()) < 4: |
| self.short_overline(context, blocktext, lineno, 2) |
| else: |
| msg = self.reporter.warning( |
| 'Title overline too short.', |
| nodes.literal_block(source, source), |
| source=src, line=srcline-1) |
| messages.append(msg) |
| style = (overline[0], underline[0]) |
| self.eofcheck = 0 # @@@ not sure this is correct |
| self.section(title.lstrip(), source, style, lineno + 1, messages) |
| self.eofcheck = 1 |
| return [], 'Body', [] |
| |
| indent = text # indented title |
| |
| def underline(self, match, context, next_state): |
| overline = context[0] |
| blocktext = overline + '\n' + self.state_machine.line |
| lineno = self.state_machine.abs_line_number() - 1 |
| src, srcline = self.state_machine.get_source_and_line() |
| if len(overline.rstrip()) < 4: |
| self.short_overline(context, blocktext, lineno, 1) |
| msg = self.reporter.error( |
| 'Invalid section title or transition marker.', |
| nodes.literal_block(blocktext, blocktext), |
| source=src, line=srcline-1) |
| self.parent += msg |
| return [], 'Body', [] |
| |
| def short_overline(self, context, blocktext, lineno, lines=1): |
| src, srcline = self.state_machine.get_source_and_line(lineno) |
| msg = self.reporter.info( |
| 'Possible incomplete section title.\nTreating the overline as ' |
| "ordinary text because it's so short.", |
| source=src, line=srcline) |
| self.parent += msg |
| self.state_correction(context, lines) |
| |
| def state_correction(self, context, lines=1): |
| self.state_machine.previous_line(lines) |
| context[:] = [] |
| raise statemachine.StateCorrection('Body', 'text') |
| |
| |
| class QuotedLiteralBlock(RSTState): |
| |
| """ |
| Nested parse handler for quoted (unindented) literal blocks. |
| |
| Special-purpose. Not for inclusion in `state_classes`. |
| """ |
| |
| patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats, |
| 'text': r''} |
| initial_transitions = ('initial_quoted', 'text') |
| |
| def __init__(self, state_machine, debug=0): |
| RSTState.__init__(self, state_machine, debug) |
| self.messages = [] |
| self.initial_lineno = None |
| |
| def blank(self, match, context, next_state): |
| if context: |
| raise EOFError |
| else: |
| return context, next_state, [] |
| |
| def eof(self, context): |
| if context: |
| src, srcline = self.state_machine.get_source_and_line( |
| self.initial_lineno) |
| text = '\n'.join(context) |
| literal_block = nodes.literal_block(text, text) |
| literal_block.source = src |
| literal_block.line = srcline |
| self.parent += literal_block |
| else: |
| self.parent += self.reporter.warning( |
| 'Literal block expected; none found.', |
| line=self.state_machine.abs_line_number()) |
| # src not available, because statemachine.input_lines is empty |
| self.state_machine.previous_line() |
| self.parent += self.messages |
| return [] |
| |
| def indent(self, match, context, next_state): |
| assert context, ('QuotedLiteralBlock.indent: context should not ' |
| 'be empty!') |
| self.messages.append( |
| self.reporter.error('Unexpected indentation.', |
| line=self.state_machine.abs_line_number())) |
| self.state_machine.previous_line() |
| raise EOFError |
| |
| def initial_quoted(self, match, context, next_state): |
| """Match arbitrary quote character on the first line only.""" |
| self.remove_transition('initial_quoted') |
| quote = match.string[0] |
| pattern = re.compile(re.escape(quote)) |
| # New transition matches consistent quotes only: |
| self.add_transition('quoted', |
| (pattern, self.quoted, self.__class__.__name__)) |
| self.initial_lineno = self.state_machine.abs_line_number() |
| return [match.string], next_state, [] |
| |
| def quoted(self, match, context, next_state): |
| """Match consistent quotes on subsequent lines.""" |
| context.append(match.string) |
| return context, next_state, [] |
| |
| def text(self, match, context, next_state): |
| if context: |
| src, srcline = self.state_machine.get_source_and_line() |
| self.messages.append( |
| self.reporter.error('Inconsistent literal block quoting.', |
| source=src, line=srcline)) |
| self.state_machine.previous_line() |
| raise EOFError |
| |
| |
| state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList, |
| OptionList, LineBlock, ExtensionOptions, Explicit, Text, |
| Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List) |
| """Standard set of State classes used to start `RSTStateMachine`.""" |