| # |
| # epytext.py: epydoc formatted docstring parsing |
| # Edward Loper |
| # |
| # Created [04/10/01 12:00 AM] |
| # $Id: epytext.py 1652 2007-09-26 04:45:34Z edloper $ |
| # |
| |
| """ |
| Parser for epytext strings. Epytext is a lightweight markup whose |
| primary intended application is Python documentation strings. This |
| parser converts Epytext strings to a simple DOM-like representation |
| (encoded as a tree of L{Element} objects and strings). Epytext |
| strings can contain the following X{structural blocks}: |
| |
| - X{epytext}: The top-level element of the DOM tree. |
| - X{para}: A paragraph of text. Paragraphs contain no newlines, |
| and all spaces are soft. |
| - X{section}: A section or subsection. |
| - X{field}: A tagged field. These fields provide information |
| about specific aspects of a Python object, such as the |
| description of a function's parameter, or the author of a |
| module. |
| - X{literalblock}: A block of literal text. This text should be |
| displayed as it would be displayed in plaintext. The |
| parser removes the appropriate amount of leading whitespace |
| from each line in the literal block. |
| - X{doctestblock}: A block containing sample python code, |
| formatted according to the specifications of the C{doctest} |
| module. |
| - X{ulist}: An unordered list. |
| - X{olist}: An ordered list. |
| - X{li}: A list item. This tag is used both for unordered list |
| items and for ordered list items. |
| |
| Additionally, the following X{inline regions} may be used within |
| C{para} blocks: |
| |
| - X{code}: Source code and identifiers. |
| - X{math}: Mathematical expressions. |
| - X{index}: A term which should be included in an index, if one |
| is generated. |
| - X{italic}: Italicized text. |
| - X{bold}: Bold-faced text. |
| - X{uri}: A Universal Resource Indicator (URI) or Universal |
| Resource Locator (URL) |
| - X{link}: A Python identifier which should be hyperlinked to |
| the named object's documentation, when possible. |
| |
| The returned DOM tree will conform to the the following Document Type |
| Description:: |
| |
| <!ENTITY % colorized '(code | math | index | italic | |
| bold | uri | link | symbol)*'> |
| |
| <!ELEMENT epytext ((para | literalblock | doctestblock | |
| section | ulist | olist)*, fieldlist?)> |
| |
| <!ELEMENT para (#PCDATA | %colorized;)*> |
| |
| <!ELEMENT section (para | listblock | doctestblock | |
| section | ulist | olist)+> |
| |
| <!ELEMENT fieldlist (field+)> |
| <!ELEMENT field (tag, arg?, (para | listblock | doctestblock) |
| ulist | olist)+)> |
| <!ELEMENT tag (#PCDATA)> |
| <!ELEMENT arg (#PCDATA)> |
| |
| <!ELEMENT literalblock (#PCDATA | %colorized;)*> |
| <!ELEMENT doctestblock (#PCDATA)> |
| |
| <!ELEMENT ulist (li+)> |
| <!ELEMENT olist (li+)> |
| <!ELEMENT li (para | literalblock | doctestblock | ulist | olist)+> |
| <!ATTLIST li bullet NMTOKEN #IMPLIED> |
| <!ATTLIST olist start NMTOKEN #IMPLIED> |
| |
| <!ELEMENT uri (name, target)> |
| <!ELEMENT link (name, target)> |
| <!ELEMENT name (#PCDATA | %colorized;)*> |
| <!ELEMENT target (#PCDATA)> |
| |
| <!ELEMENT code (#PCDATA | %colorized;)*> |
| <!ELEMENT math (#PCDATA | %colorized;)*> |
| <!ELEMENT italic (#PCDATA | %colorized;)*> |
| <!ELEMENT bold (#PCDATA | %colorized;)*> |
| <!ELEMENT indexed (#PCDATA | %colorized;)> |
| <!ATTLIST code style CDATA #IMPLIED> |
| |
| <!ELEMENT symbol (#PCDATA)> |
| |
| @var SYMBOLS: A list of the of escape symbols that are supported |
| by epydoc. Currently the following symbols are supported: |
| <<<SYMBOLS>>> |
| """ |
| # Note: the symbol list is appended to the docstring automatically, |
| # below. |
| |
| __docformat__ = 'epytext en' |
| |
| # Code organization.. |
| # 1. parse() |
| # 2. tokenize() |
| # 3. colorize() |
| # 4. helpers |
| # 5. testing |
| |
| import re, string, types, sys, os.path |
| from epydoc.markup import * |
| from epydoc.util import wordwrap, plaintext_to_html, plaintext_to_latex |
| from epydoc.markup.doctest import doctest_to_html, doctest_to_latex |
| |
| ################################################## |
| ## DOM-Like Encoding |
| ################################################## |
| |
| class Element: |
| """ |
| A very simple DOM-like representation for parsed epytext |
| documents. Each epytext document is encoded as a tree whose nodes |
| are L{Element} objects, and whose leaves are C{string}s. Each |
| node is marked by a I{tag} and zero or more I{attributes}. Each |
| attribute is a mapping from a string key to a string value. |
| """ |
| def __init__(self, tag, *children, **attribs): |
| self.tag = tag |
| """A string tag indicating the type of this element. |
| @type: C{string}""" |
| |
| self.children = list(children) |
| """A list of the children of this element. |
| @type: C{list} of (C{string} or C{Element})""" |
| |
| self.attribs = attribs |
| """A dictionary mapping attribute names to attribute values |
| for this element. |
| @type: C{dict} from C{string} to C{string}""" |
| |
| def __str__(self): |
| """ |
| Return a string representation of this element, using XML |
| notation. |
| @bug: Doesn't escape '<' or '&' or '>'. |
| """ |
| attribs = ''.join([' %s=%r' % t for t in self.attribs.items()]) |
| return ('<%s%s>' % (self.tag, attribs) + |
| ''.join([str(child) for child in self.children]) + |
| '</%s>' % self.tag) |
| |
| def __repr__(self): |
| attribs = ''.join([', %s=%r' % t for t in self.attribs.items()]) |
| args = ''.join([', %r' % c for c in self.children]) |
| return 'Element(%s%s%s)' % (self.tag, args, attribs) |
| |
| ################################################## |
| ## Constants |
| ################################################## |
| |
| # The possible heading underline characters, listed in order of |
| # heading depth. |
| _HEADING_CHARS = "=-~" |
| |
| # Escape codes. These should be needed very rarely. |
| _ESCAPES = {'lb':'{', 'rb': '}'} |
| |
| # Symbols. These can be generated via S{...} escapes. |
| SYMBOLS = [ |
| # Arrows |
| '<-', '->', '^', 'v', |
| |
| # Greek letters |
| 'alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta', |
| 'eta', 'theta', 'iota', 'kappa', 'lambda', 'mu', |
| 'nu', 'xi', 'omicron', 'pi', 'rho', 'sigma', |
| 'tau', 'upsilon', 'phi', 'chi', 'psi', 'omega', |
| 'Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta', |
| 'Eta', 'Theta', 'Iota', 'Kappa', 'Lambda', 'Mu', |
| 'Nu', 'Xi', 'Omicron', 'Pi', 'Rho', 'Sigma', |
| 'Tau', 'Upsilon', 'Phi', 'Chi', 'Psi', 'Omega', |
| |
| # HTML character entities |
| 'larr', 'rarr', 'uarr', 'darr', 'harr', 'crarr', |
| 'lArr', 'rArr', 'uArr', 'dArr', 'hArr', |
| 'copy', 'times', 'forall', 'exist', 'part', |
| 'empty', 'isin', 'notin', 'ni', 'prod', 'sum', |
| 'prop', 'infin', 'ang', 'and', 'or', 'cap', 'cup', |
| 'int', 'there4', 'sim', 'cong', 'asymp', 'ne', |
| 'equiv', 'le', 'ge', 'sub', 'sup', 'nsub', |
| 'sube', 'supe', 'oplus', 'otimes', 'perp', |
| |
| # Alternate (long) names |
| 'infinity', 'integral', 'product', |
| '>=', '<=', |
| ] |
| # Convert to a dictionary, for quick lookup |
| _SYMBOLS = {} |
| for symbol in SYMBOLS: _SYMBOLS[symbol] = 1 |
| |
| # Add symbols to the docstring. |
| symblist = ' ' |
| symblist += ';\n '.join([' - C{E{S}{%s}}=S{%s}' % (symbol, symbol) |
| for symbol in SYMBOLS]) |
| __doc__ = __doc__.replace('<<<SYMBOLS>>>', symblist) |
| del symbol, symblist |
| |
| # Tags for colorizing text. |
| _COLORIZING_TAGS = { |
| 'C': 'code', |
| 'M': 'math', |
| 'X': 'indexed', |
| 'I': 'italic', |
| 'B': 'bold', |
| 'U': 'uri', |
| 'L': 'link', # A Python identifier that should be linked to |
| 'E': 'escape', # escapes characters or creates symbols |
| 'S': 'symbol', |
| 'G': 'graph', |
| } |
| |
| # Which tags can use "link syntax" (e.g., U{Python<www.python.org>})? |
| _LINK_COLORIZING_TAGS = ['link', 'uri'] |
| |
| ################################################## |
| ## Structuring (Top Level) |
| ################################################## |
| |
| def parse(str, errors = None): |
| """ |
| Return a DOM tree encoding the contents of an epytext string. Any |
| errors generated during parsing will be stored in C{errors}. |
| |
| @param str: The epytext string to parse. |
| @type str: C{string} |
| @param errors: A list where any errors generated during parsing |
| will be stored. If no list is specified, then fatal errors |
| will generate exceptions, and non-fatal errors will be |
| ignored. |
| @type errors: C{list} of L{ParseError} |
| @return: a DOM tree encoding the contents of an epytext string. |
| @rtype: C{Element} |
| @raise ParseError: If C{errors} is C{None} and an error is |
| encountered while parsing. |
| """ |
| # Initialize errors list. |
| if errors == None: |
| errors = [] |
| raise_on_error = 1 |
| else: |
| raise_on_error = 0 |
| |
| # Preprocess the string. |
| str = re.sub('\015\012', '\012', str) |
| str = string.expandtabs(str) |
| |
| # Tokenize the input string. |
| tokens = _tokenize(str, errors) |
| |
| # Have we encountered a field yet? |
| encountered_field = 0 |
| |
| # Create an document to hold the epytext. |
| doc = Element('epytext') |
| |
| # Maintain two parallel stacks: one contains DOM elements, and |
| # gives the ancestors of the current block. The other contains |
| # indentation values, and gives the indentation of the |
| # corresponding DOM elements. An indentation of "None" reflects |
| # an unknown indentation. However, the indentation must be |
| # greater than, or greater than or equal to, the indentation of |
| # the prior element (depending on what type of DOM element it |
| # corresponds to). No 2 consecutive indent_stack values will be |
| # ever be "None." Use initial dummy elements in the stack, so we |
| # don't have to worry about bounds checking. |
| stack = [None, doc] |
| indent_stack = [-1, None] |
| |
| for token in tokens: |
| # Uncomment this for debugging: |
| #print ('%s: %s\n%s: %s\n' % |
| # (''.join(['%-11s' % (t and t.tag) for t in stack]), |
| # token.tag, ''.join(['%-11s' % i for i in indent_stack]), |
| # token.indent)) |
| |
| # Pop any completed blocks off the stack. |
| _pop_completed_blocks(token, stack, indent_stack) |
| |
| # If Token has type PARA, colorize and add the new paragraph |
| if token.tag == Token.PARA: |
| _add_para(doc, token, stack, indent_stack, errors) |
| |
| # If Token has type HEADING, add the new section |
| elif token.tag == Token.HEADING: |
| _add_section(doc, token, stack, indent_stack, errors) |
| |
| # If Token has type LBLOCK, add the new literal block |
| elif token.tag == Token.LBLOCK: |
| stack[-1].children.append(token.to_dom(doc)) |
| |
| # If Token has type DTBLOCK, add the new doctest block |
| elif token.tag == Token.DTBLOCK: |
| stack[-1].children.append(token.to_dom(doc)) |
| |
| # If Token has type BULLET, add the new list/list item/field |
| elif token.tag == Token.BULLET: |
| _add_list(doc, token, stack, indent_stack, errors) |
| else: |
| assert 0, 'Unknown token type: '+token.tag |
| |
| # Check if the DOM element we just added was a field.. |
| if stack[-1].tag == 'field': |
| encountered_field = 1 |
| elif encountered_field == 1: |
| if len(stack) <= 3: |
| estr = ("Fields must be the final elements in an "+ |
| "epytext string.") |
| errors.append(StructuringError(estr, token.startline)) |
| |
| # Graphs use inline markup (G{...}) but are really block-level |
| # elements; so "raise" any graphs we generated. This is a bit of |
| # a hack, but the alternative is to define a new markup for |
| # block-level elements, which I'd rather not do. (See sourceforge |
| # bug #1673017.) |
| for child in doc.children: |
| _raise_graphs(child, doc) |
| |
| # If there was an error, then signal it! |
| if len([e for e in errors if e.is_fatal()]) > 0: |
| if raise_on_error: |
| raise errors[0] |
| else: |
| return None |
| |
| # Return the top-level epytext DOM element. |
| return doc |
| |
| def _raise_graphs(tree, parent): |
| # Recurse to children. |
| have_graph_child = False |
| for elt in tree.children: |
| if isinstance(elt, Element): |
| _raise_graphs(elt, tree) |
| if elt.tag == 'graph': have_graph_child = True |
| |
| block = ('section', 'fieldlist', 'field', 'ulist', 'olist', 'li') |
| if have_graph_child and tree.tag not in block: |
| child_index = 0 |
| for elt in tree.children: |
| if isinstance(elt, Element) and elt.tag == 'graph': |
| # We found a graph: splice it into the parent. |
| parent_index = parent.children.index(tree) |
| left = tree.children[:child_index] |
| right = tree.children[child_index+1:] |
| parent.children[parent_index:parent_index+1] = [ |
| Element(tree.tag, *left, **tree.attribs), |
| elt, |
| Element(tree.tag, *right, **tree.attribs)] |
| child_index = 0 |
| parent_index += 2 |
| else: |
| child_index += 1 |
| |
| def _pop_completed_blocks(token, stack, indent_stack): |
| """ |
| Pop any completed blocks off the stack. This includes any |
| blocks that we have dedented past, as well as any list item |
| blocks that we've dedented to. The top element on the stack |
| should only be a list if we're about to start a new list |
| item (i.e., if the next token is a bullet). |
| """ |
| indent = token.indent |
| if indent != None: |
| while (len(stack) > 2): |
| pop = 0 |
| |
| # Dedent past a block |
| if indent_stack[-1]!=None and indent<indent_stack[-1]: pop=1 |
| elif indent_stack[-1]==None and indent<indent_stack[-2]: pop=1 |
| |
| # Dedent to a list item, if it is follwed by another list |
| # item with the same indentation. |
| elif (token.tag == 'bullet' and indent==indent_stack[-2] and |
| stack[-1].tag in ('li', 'field')): pop=1 |
| |
| # End of a list (no more list items available) |
| elif (stack[-1].tag in ('ulist', 'olist') and |
| (token.tag != 'bullet' or token.contents[-1] == ':')): |
| pop=1 |
| |
| # Pop the block, if it's complete. Otherwise, we're done. |
| if pop == 0: return |
| stack.pop() |
| indent_stack.pop() |
| |
| def _add_para(doc, para_token, stack, indent_stack, errors): |
| """Colorize the given paragraph, and add it to the DOM tree.""" |
| # Check indentation, and update the parent's indentation |
| # when appropriate. |
| if indent_stack[-1] == None: |
| indent_stack[-1] = para_token.indent |
| if para_token.indent == indent_stack[-1]: |
| # Colorize the paragraph and add it. |
| para = _colorize(doc, para_token, errors) |
| if para_token.inline: |
| para.attribs['inline'] = True |
| stack[-1].children.append(para) |
| else: |
| estr = "Improper paragraph indentation." |
| errors.append(StructuringError(estr, para_token.startline)) |
| |
| def _add_section(doc, heading_token, stack, indent_stack, errors): |
| """Add a new section to the DOM tree, with the given heading.""" |
| if indent_stack[-1] == None: |
| indent_stack[-1] = heading_token.indent |
| elif indent_stack[-1] != heading_token.indent: |
| estr = "Improper heading indentation." |
| errors.append(StructuringError(estr, heading_token.startline)) |
| |
| # Check for errors. |
| for tok in stack[2:]: |
| if tok.tag != "section": |
| estr = "Headings must occur at the top level." |
| errors.append(StructuringError(estr, heading_token.startline)) |
| break |
| if (heading_token.level+2) > len(stack): |
| estr = "Wrong underline character for heading." |
| errors.append(StructuringError(estr, heading_token.startline)) |
| |
| # Pop the appropriate number of headings so we're at the |
| # correct level. |
| stack[heading_token.level+2:] = [] |
| indent_stack[heading_token.level+2:] = [] |
| |
| # Colorize the heading |
| head = _colorize(doc, heading_token, errors, 'heading') |
| |
| # Add the section's and heading's DOM elements. |
| sec = Element("section") |
| stack[-1].children.append(sec) |
| stack.append(sec) |
| sec.children.append(head) |
| indent_stack.append(None) |
| |
| def _add_list(doc, bullet_token, stack, indent_stack, errors): |
| """ |
| Add a new list item or field to the DOM tree, with the given |
| bullet or field tag. When necessary, create the associated |
| list. |
| """ |
| # Determine what type of bullet it is. |
| if bullet_token.contents[-1] == '-': |
| list_type = 'ulist' |
| elif bullet_token.contents[-1] == '.': |
| list_type = 'olist' |
| elif bullet_token.contents[-1] == ':': |
| list_type = 'fieldlist' |
| else: |
| raise AssertionError('Bad Bullet: %r' % bullet_token.contents) |
| |
| # Is this a new list? |
| newlist = 0 |
| if stack[-1].tag != list_type: |
| newlist = 1 |
| elif list_type == 'olist' and stack[-1].tag == 'olist': |
| old_listitem = stack[-1].children[-1] |
| old_bullet = old_listitem.attribs.get("bullet").split('.')[:-1] |
| new_bullet = bullet_token.contents.split('.')[:-1] |
| if (new_bullet[:-1] != old_bullet[:-1] or |
| int(new_bullet[-1]) != int(old_bullet[-1])+1): |
| newlist = 1 |
| |
| # Create the new list. |
| if newlist: |
| if stack[-1].tag is 'fieldlist': |
| # The new list item is not a field list item (since this |
| # is a new list); but it's indented the same as the field |
| # list. This either means that they forgot to indent the |
| # list, or they are trying to put something after the |
| # field list. The first one seems more likely, so we'll |
| # just warn about that (to avoid confusion). |
| estr = "Lists must be indented." |
| errors.append(StructuringError(estr, bullet_token.startline)) |
| if stack[-1].tag in ('ulist', 'olist', 'fieldlist'): |
| stack.pop() |
| indent_stack.pop() |
| |
| if (list_type != 'fieldlist' and indent_stack[-1] is not None and |
| bullet_token.indent == indent_stack[-1]): |
| # Ignore this error if there's text on the same line as |
| # the comment-opening quote -- epydoc can't reliably |
| # determine the indentation for that line. |
| if bullet_token.startline != 1 or bullet_token.indent != 0: |
| estr = "Lists must be indented." |
| errors.append(StructuringError(estr, bullet_token.startline)) |
| |
| if list_type == 'fieldlist': |
| # Fieldlist should be at the top-level. |
| for tok in stack[2:]: |
| if tok.tag != "section": |
| estr = "Fields must be at the top level." |
| errors.append( |
| StructuringError(estr, bullet_token.startline)) |
| break |
| stack[2:] = [] |
| indent_stack[2:] = [] |
| |
| # Add the new list. |
| lst = Element(list_type) |
| stack[-1].children.append(lst) |
| stack.append(lst) |
| indent_stack.append(bullet_token.indent) |
| if list_type == 'olist': |
| start = bullet_token.contents.split('.')[:-1] |
| if start != '1': |
| lst.attribs["start"] = start[-1] |
| |
| # Fields are treated somewhat specially: A "fieldlist" |
| # node is created to make the parsing simpler, but fields |
| # are adjoined directly into the "epytext" node, not into |
| # the "fieldlist" node. |
| if list_type == 'fieldlist': |
| li = Element("field") |
| token_words = bullet_token.contents[1:-1].split(None, 1) |
| tag_elt = Element("tag") |
| tag_elt.children.append(token_words[0]) |
| li.children.append(tag_elt) |
| |
| if len(token_words) > 1: |
| arg_elt = Element("arg") |
| arg_elt.children.append(token_words[1]) |
| li.children.append(arg_elt) |
| else: |
| li = Element("li") |
| if list_type == 'olist': |
| li.attribs["bullet"] = bullet_token.contents |
| |
| # Add the bullet. |
| stack[-1].children.append(li) |
| stack.append(li) |
| indent_stack.append(None) |
| |
| ################################################## |
| ## Tokenization |
| ################################################## |
| |
| class Token: |
| """ |
| C{Token}s are an intermediate data structure used while |
| constructing the structuring DOM tree for a formatted docstring. |
| There are five types of C{Token}: |
| |
| - Paragraphs |
| - Literal blocks |
| - Doctest blocks |
| - Headings |
| - Bullets |
| |
| The text contained in each C{Token} is stored in the |
| C{contents} variable. The string in this variable has been |
| normalized. For paragraphs, this means that it has been converted |
| into a single line of text, with newline/indentation replaced by |
| single spaces. For literal blocks and doctest blocks, this means |
| that the appropriate amount of leading whitespace has been removed |
| from each line. |
| |
| Each C{Token} has an indentation level associated with it, |
| stored in the C{indent} variable. This indentation level is used |
| by the structuring procedure to assemble hierarchical blocks. |
| |
| @type tag: C{string} |
| @ivar tag: This C{Token}'s type. Possible values are C{Token.PARA} |
| (paragraph), C{Token.LBLOCK} (literal block), C{Token.DTBLOCK} |
| (doctest block), C{Token.HEADINGC}, and C{Token.BULLETC}. |
| |
| @type startline: C{int} |
| @ivar startline: The line on which this C{Token} begins. This |
| line number is only used for issuing errors. |
| |
| @type contents: C{string} |
| @ivar contents: The normalized text contained in this C{Token}. |
| |
| @type indent: C{int} or C{None} |
| @ivar indent: The indentation level of this C{Token} (in |
| number of leading spaces). A value of C{None} indicates an |
| unknown indentation; this is used for list items and fields |
| that begin with one-line paragraphs. |
| |
| @type level: C{int} or C{None} |
| @ivar level: The heading-level of this C{Token} if it is a |
| heading; C{None}, otherwise. Valid heading levels are 0, 1, |
| and 2. |
| |
| @type inline: C{bool} |
| @ivar inline: If True, the element is an inline level element, comparable |
| to an HTML C{<span>} tag. Else, it is a block level element, comparable |
| to an HTML C{<div>}. |
| |
| @type PARA: C{string} |
| @cvar PARA: The C{tag} value for paragraph C{Token}s. |
| @type LBLOCK: C{string} |
| @cvar LBLOCK: The C{tag} value for literal C{Token}s. |
| @type DTBLOCK: C{string} |
| @cvar DTBLOCK: The C{tag} value for doctest C{Token}s. |
| @type HEADING: C{string} |
| @cvar HEADING: The C{tag} value for heading C{Token}s. |
| @type BULLET: C{string} |
| @cvar BULLET: The C{tag} value for bullet C{Token}s. This C{tag} |
| value is also used for field tag C{Token}s, since fields |
| function syntactically the same as list items. |
| """ |
| # The possible token types. |
| PARA = "para" |
| LBLOCK = "literalblock" |
| DTBLOCK = "doctestblock" |
| HEADING = "heading" |
| BULLET = "bullet" |
| |
| def __init__(self, tag, startline, contents, indent, level=None, |
| inline=False): |
| """ |
| Create a new C{Token}. |
| |
| @param tag: The type of the new C{Token}. |
| @type tag: C{string} |
| @param startline: The line on which the new C{Token} begins. |
| @type startline: C{int} |
| @param contents: The normalized contents of the new C{Token}. |
| @type contents: C{string} |
| @param indent: The indentation of the new C{Token} (in number |
| of leading spaces). A value of C{None} indicates an |
| unknown indentation. |
| @type indent: C{int} or C{None} |
| @param level: The heading-level of this C{Token} if it is a |
| heading; C{None}, otherwise. |
| @type level: C{int} or C{None} |
| @param inline: Is this C{Token} inline as a C{<span>}?. |
| @type inline: C{bool} |
| """ |
| self.tag = tag |
| self.startline = startline |
| self.contents = contents |
| self.indent = indent |
| self.level = level |
| self.inline = inline |
| |
| def __repr__(self): |
| """ |
| @rtype: C{string} |
| @return: the formal representation of this C{Token}. |
| C{Token}s have formal representaitons of the form:: |
| <Token: para at line 12> |
| """ |
| return '<Token: %s at line %s>' % (self.tag, self.startline) |
| |
| def to_dom(self, doc): |
| """ |
| @return: a DOM representation of this C{Token}. |
| @rtype: L{Element} |
| """ |
| e = Element(self.tag) |
| e.children.append(self.contents) |
| return e |
| |
| # Construct regular expressions for recognizing bullets. These are |
| # global so they don't have to be reconstructed each time we tokenize |
| # a docstring. |
| _ULIST_BULLET = '[-]( +|$)' |
| _OLIST_BULLET = '(\d+[.])+( +|$)' |
| _FIELD_BULLET = '@\w+( [^{}:\n]+)?:' |
| _BULLET_RE = re.compile(_ULIST_BULLET + '|' + |
| _OLIST_BULLET + '|' + |
| _FIELD_BULLET) |
| _LIST_BULLET_RE = re.compile(_ULIST_BULLET + '|' + _OLIST_BULLET) |
| _FIELD_BULLET_RE = re.compile(_FIELD_BULLET) |
| del _ULIST_BULLET, _OLIST_BULLET, _FIELD_BULLET |
| |
| def _tokenize_doctest(lines, start, block_indent, tokens, errors): |
| """ |
| Construct a L{Token} containing the doctest block starting at |
| C{lines[start]}, and append it to C{tokens}. C{block_indent} |
| should be the indentation of the doctest block. Any errors |
| generated while tokenizing the doctest block will be appended to |
| C{errors}. |
| |
| @param lines: The list of lines to be tokenized |
| @param start: The index into C{lines} of the first line of the |
| doctest block to be tokenized. |
| @param block_indent: The indentation of C{lines[start]}. This is |
| the indentation of the doctest block. |
| @param errors: A list where any errors generated during parsing |
| will be stored. If no list is specified, then errors will |
| generate exceptions. |
| @return: The line number of the first line following the doctest |
| block. |
| |
| @type lines: C{list} of C{string} |
| @type start: C{int} |
| @type block_indent: C{int} |
| @type tokens: C{list} of L{Token} |
| @type errors: C{list} of L{ParseError} |
| @rtype: C{int} |
| """ |
| # If they dedent past block_indent, keep track of the minimum |
| # indentation. This is used when removing leading indentation |
| # from the lines of the doctest block. |
| min_indent = block_indent |
| |
| linenum = start + 1 |
| while linenum < len(lines): |
| # Find the indentation of this line. |
| line = lines[linenum] |
| indent = len(line) - len(line.lstrip()) |
| |
| # A blank line ends doctest block. |
| if indent == len(line): break |
| |
| # A Dedent past block_indent is an error. |
| if indent < block_indent: |
| min_indent = min(min_indent, indent) |
| estr = 'Improper doctest block indentation.' |
| errors.append(TokenizationError(estr, linenum)) |
| |
| # Go on to the next line. |
| linenum += 1 |
| |
| # Add the token, and return the linenum after the token ends. |
| contents = [line[min_indent:] for line in lines[start:linenum]] |
| contents = '\n'.join(contents) |
| tokens.append(Token(Token.DTBLOCK, start, contents, block_indent)) |
| return linenum |
| |
| def _tokenize_literal(lines, start, block_indent, tokens, errors): |
| """ |
| Construct a L{Token} containing the literal block starting at |
| C{lines[start]}, and append it to C{tokens}. C{block_indent} |
| should be the indentation of the literal block. Any errors |
| generated while tokenizing the literal block will be appended to |
| C{errors}. |
| |
| @param lines: The list of lines to be tokenized |
| @param start: The index into C{lines} of the first line of the |
| literal block to be tokenized. |
| @param block_indent: The indentation of C{lines[start]}. This is |
| the indentation of the literal block. |
| @param errors: A list of the errors generated by parsing. Any |
| new errors generated while will tokenizing this paragraph |
| will be appended to this list. |
| @return: The line number of the first line following the literal |
| block. |
| |
| @type lines: C{list} of C{string} |
| @type start: C{int} |
| @type block_indent: C{int} |
| @type tokens: C{list} of L{Token} |
| @type errors: C{list} of L{ParseError} |
| @rtype: C{int} |
| """ |
| linenum = start + 1 |
| while linenum < len(lines): |
| # Find the indentation of this line. |
| line = lines[linenum] |
| indent = len(line) - len(line.lstrip()) |
| |
| # A Dedent to block_indent ends the literal block. |
| # (Ignore blank likes, though) |
| if len(line) != indent and indent <= block_indent: |
| break |
| |
| # Go on to the next line. |
| linenum += 1 |
| |
| # Add the token, and return the linenum after the token ends. |
| contents = [line[block_indent+1:] for line in lines[start:linenum]] |
| contents = '\n'.join(contents) |
| contents = re.sub('(\A[ \n]*\n)|(\n[ \n]*\Z)', '', contents) |
| tokens.append(Token(Token.LBLOCK, start, contents, block_indent)) |
| return linenum |
| |
| def _tokenize_listart(lines, start, bullet_indent, tokens, errors): |
| """ |
| Construct L{Token}s for the bullet and the first paragraph of the |
| list item (or field) starting at C{lines[start]}, and append them |
| to C{tokens}. C{bullet_indent} should be the indentation of the |
| list item. Any errors generated while tokenizing will be |
| appended to C{errors}. |
| |
| @param lines: The list of lines to be tokenized |
| @param start: The index into C{lines} of the first line of the |
| list item to be tokenized. |
| @param bullet_indent: The indentation of C{lines[start]}. This is |
| the indentation of the list item. |
| @param errors: A list of the errors generated by parsing. Any |
| new errors generated while will tokenizing this paragraph |
| will be appended to this list. |
| @return: The line number of the first line following the list |
| item's first paragraph. |
| |
| @type lines: C{list} of C{string} |
| @type start: C{int} |
| @type bullet_indent: C{int} |
| @type tokens: C{list} of L{Token} |
| @type errors: C{list} of L{ParseError} |
| @rtype: C{int} |
| """ |
| linenum = start + 1 |
| para_indent = None |
| doublecolon = lines[start].rstrip()[-2:] == '::' |
| |
| # Get the contents of the bullet. |
| para_start = _BULLET_RE.match(lines[start], bullet_indent).end() |
| bcontents = lines[start][bullet_indent:para_start].strip() |
| |
| while linenum < len(lines): |
| # Find the indentation of this line. |
| line = lines[linenum] |
| indent = len(line) - len(line.lstrip()) |
| |
| # "::" markers end paragraphs. |
| if doublecolon: break |
| if line.rstrip()[-2:] == '::': doublecolon = 1 |
| |
| # A blank line ends the token |
| if indent == len(line): break |
| |
| # Dedenting past bullet_indent ends the list item. |
| if indent < bullet_indent: break |
| |
| # A line beginning with a bullet ends the token. |
| if _BULLET_RE.match(line, indent): break |
| |
| # If this is the second line, set the paragraph indentation, or |
| # end the token, as appropriate. |
| if para_indent == None: para_indent = indent |
| |
| # A change in indentation ends the token |
| if indent != para_indent: break |
| |
| # Go on to the next line. |
| linenum += 1 |
| |
| # Add the bullet token. |
| tokens.append(Token(Token.BULLET, start, bcontents, bullet_indent, |
| inline=True)) |
| |
| # Add the paragraph token. |
| pcontents = ([lines[start][para_start:].strip()] + |
| [line.strip() for line in lines[start+1:linenum]]) |
| pcontents = ' '.join(pcontents).strip() |
| if pcontents: |
| tokens.append(Token(Token.PARA, start, pcontents, para_indent, |
| inline=True)) |
| |
| # Return the linenum after the paragraph token ends. |
| return linenum |
| |
| def _tokenize_para(lines, start, para_indent, tokens, errors): |
| """ |
| Construct a L{Token} containing the paragraph starting at |
| C{lines[start]}, and append it to C{tokens}. C{para_indent} |
| should be the indentation of the paragraph . Any errors |
| generated while tokenizing the paragraph will be appended to |
| C{errors}. |
| |
| @param lines: The list of lines to be tokenized |
| @param start: The index into C{lines} of the first line of the |
| paragraph to be tokenized. |
| @param para_indent: The indentation of C{lines[start]}. This is |
| the indentation of the paragraph. |
| @param errors: A list of the errors generated by parsing. Any |
| new errors generated while will tokenizing this paragraph |
| will be appended to this list. |
| @return: The line number of the first line following the |
| paragraph. |
| |
| @type lines: C{list} of C{string} |
| @type start: C{int} |
| @type para_indent: C{int} |
| @type tokens: C{list} of L{Token} |
| @type errors: C{list} of L{ParseError} |
| @rtype: C{int} |
| """ |
| linenum = start + 1 |
| doublecolon = 0 |
| while linenum < len(lines): |
| # Find the indentation of this line. |
| line = lines[linenum] |
| indent = len(line) - len(line.lstrip()) |
| |
| # "::" markers end paragraphs. |
| if doublecolon: break |
| if line.rstrip()[-2:] == '::': doublecolon = 1 |
| |
| # Blank lines end paragraphs |
| if indent == len(line): break |
| |
| # Indentation changes end paragraphs |
| if indent != para_indent: break |
| |
| # List bullets end paragraphs |
| if _BULLET_RE.match(line, indent): break |
| |
| # Check for mal-formatted field items. |
| if line[indent] == '@': |
| estr = "Possible mal-formatted field item." |
| errors.append(TokenizationError(estr, linenum, is_fatal=0)) |
| |
| # Go on to the next line. |
| linenum += 1 |
| |
| contents = [line.strip() for line in lines[start:linenum]] |
| |
| # Does this token look like a heading? |
| if ((len(contents) < 2) or |
| (contents[1][0] not in _HEADING_CHARS) or |
| (abs(len(contents[0])-len(contents[1])) > 5)): |
| looks_like_heading = 0 |
| else: |
| looks_like_heading = 1 |
| for char in contents[1]: |
| if char != contents[1][0]: |
| looks_like_heading = 0 |
| break |
| |
| if looks_like_heading: |
| if len(contents[0]) != len(contents[1]): |
| estr = ("Possible heading typo: the number of "+ |
| "underline characters must match the "+ |
| "number of heading characters.") |
| errors.append(TokenizationError(estr, start, is_fatal=0)) |
| else: |
| level = _HEADING_CHARS.index(contents[1][0]) |
| tokens.append(Token(Token.HEADING, start, |
| contents[0], para_indent, level)) |
| return start+2 |
| |
| # Add the paragraph token, and return the linenum after it ends. |
| contents = ' '.join(contents) |
| tokens.append(Token(Token.PARA, start, contents, para_indent)) |
| return linenum |
| |
| def _tokenize(str, errors): |
| """ |
| Split a given formatted docstring into an ordered list of |
| C{Token}s, according to the epytext markup rules. |
| |
| @param str: The epytext string |
| @type str: C{string} |
| @param errors: A list where any errors generated during parsing |
| will be stored. If no list is specified, then errors will |
| generate exceptions. |
| @type errors: C{list} of L{ParseError} |
| @return: a list of the C{Token}s that make up the given string. |
| @rtype: C{list} of L{Token} |
| """ |
| tokens = [] |
| lines = str.split('\n') |
| |
| # Scan through the lines, determining what @type of token we're |
| # dealing with, and tokenizing it, as appropriate. |
| linenum = 0 |
| while linenum < len(lines): |
| # Get the current line and its indentation. |
| line = lines[linenum] |
| indent = len(line)-len(line.lstrip()) |
| |
| if indent == len(line): |
| # Ignore blank lines. |
| linenum += 1 |
| continue |
| elif line[indent:indent+4] == '>>> ': |
| # blocks starting with ">>> " are doctest block tokens. |
| linenum = _tokenize_doctest(lines, linenum, indent, |
| tokens, errors) |
| elif _BULLET_RE.match(line, indent): |
| # blocks starting with a bullet are LI start tokens. |
| linenum = _tokenize_listart(lines, linenum, indent, |
| tokens, errors) |
| if tokens[-1].indent != None: |
| indent = tokens[-1].indent |
| else: |
| # Check for mal-formatted field items. |
| if line[indent] == '@': |
| estr = "Possible mal-formatted field item." |
| errors.append(TokenizationError(estr, linenum, is_fatal=0)) |
| |
| # anything else is either a paragraph or a heading. |
| linenum = _tokenize_para(lines, linenum, indent, tokens, errors) |
| |
| # Paragraph tokens ending in '::' initiate literal blocks. |
| if (tokens[-1].tag == Token.PARA and |
| tokens[-1].contents[-2:] == '::'): |
| tokens[-1].contents = tokens[-1].contents[:-1] |
| linenum = _tokenize_literal(lines, linenum, indent, tokens, errors) |
| |
| return tokens |
| |
| |
| ################################################## |
| ## Inline markup ("colorizing") |
| ################################################## |
| |
| # Assorted regular expressions used for colorizing. |
| _BRACE_RE = re.compile('{|}') |
| _TARGET_RE = re.compile('^(.*?)\s*<(?:URI:|URL:)?([^<>]+)>$') |
| |
| def _colorize(doc, token, errors, tagName='para'): |
| """ |
| Given a string containing the contents of a paragraph, produce a |
| DOM C{Element} encoding that paragraph. Colorized regions are |
| represented using DOM C{Element}s, and text is represented using |
| DOM C{Text}s. |
| |
| @param errors: A list of errors. Any newly generated errors will |
| be appended to this list. |
| @type errors: C{list} of C{string} |
| |
| @param tagName: The element tag for the DOM C{Element} that should |
| be generated. |
| @type tagName: C{string} |
| |
| @return: a DOM C{Element} encoding the given paragraph. |
| @returntype: C{Element} |
| """ |
| str = token.contents |
| linenum = 0 |
| |
| # Maintain a stack of DOM elements, containing the ancestors of |
| # the text currently being analyzed. New elements are pushed when |
| # "{" is encountered, and old elements are popped when "}" is |
| # encountered. |
| stack = [Element(tagName)] |
| |
| # This is just used to make error-reporting friendlier. It's a |
| # stack parallel to "stack" containing the index of each element's |
| # open brace. |
| openbrace_stack = [0] |
| |
| # Process the string, scanning for '{' and '}'s. start is the |
| # index of the first unprocessed character. Each time through the |
| # loop, we process the text from the first unprocessed character |
| # to the next open or close brace. |
| start = 0 |
| while 1: |
| match = _BRACE_RE.search(str, start) |
| if match == None: break |
| end = match.start() |
| |
| # Open braces start new colorizing elements. When preceeded |
| # by a capital letter, they specify a colored region, as |
| # defined by the _COLORIZING_TAGS dictionary. Otherwise, |
| # use a special "literal braces" element (with tag "litbrace"), |
| # and convert them to literal braces once we find the matching |
| # close-brace. |
| if match.group() == '{': |
| if (end>0) and 'A' <= str[end-1] <= 'Z': |
| if (end-1) > start: |
| stack[-1].children.append(str[start:end-1]) |
| if str[end-1] not in _COLORIZING_TAGS: |
| estr = "Unknown inline markup tag." |
| errors.append(ColorizingError(estr, token, end-1)) |
| stack.append(Element('unknown')) |
| else: |
| tag = _COLORIZING_TAGS[str[end-1]] |
| stack.append(Element(tag)) |
| else: |
| if end > start: |
| stack[-1].children.append(str[start:end]) |
| stack.append(Element('litbrace')) |
| openbrace_stack.append(end) |
| stack[-2].children.append(stack[-1]) |
| |
| # Close braces end colorizing elements. |
| elif match.group() == '}': |
| # Check for (and ignore) unbalanced braces. |
| if len(stack) <= 1: |
| estr = "Unbalanced '}'." |
| errors.append(ColorizingError(estr, token, end)) |
| start = end + 1 |
| continue |
| |
| # Add any remaining text. |
| if end > start: |
| stack[-1].children.append(str[start:end]) |
| |
| # Special handling for symbols: |
| if stack[-1].tag == 'symbol': |
| if (len(stack[-1].children) != 1 or |
| not isinstance(stack[-1].children[0], basestring)): |
| estr = "Invalid symbol code." |
| errors.append(ColorizingError(estr, token, end)) |
| else: |
| symb = stack[-1].children[0] |
| if symb in _SYMBOLS: |
| # It's a symbol |
| stack[-2].children[-1] = Element('symbol', symb) |
| else: |
| estr = "Invalid symbol code." |
| errors.append(ColorizingError(estr, token, end)) |
| |
| # Special handling for escape elements: |
| if stack[-1].tag == 'escape': |
| if (len(stack[-1].children) != 1 or |
| not isinstance(stack[-1].children[0], basestring)): |
| estr = "Invalid escape code." |
| errors.append(ColorizingError(estr, token, end)) |
| else: |
| escp = stack[-1].children[0] |
| if escp in _ESCAPES: |
| # It's an escape from _ESCPAES |
| stack[-2].children[-1] = _ESCAPES[escp] |
| elif len(escp) == 1: |
| # It's a single-character escape (eg E{.}) |
| stack[-2].children[-1] = escp |
| else: |
| estr = "Invalid escape code." |
| errors.append(ColorizingError(estr, token, end)) |
| |
| # Special handling for literal braces elements: |
| if stack[-1].tag == 'litbrace': |
| stack[-2].children[-1:] = ['{'] + stack[-1].children + ['}'] |
| |
| # Special handling for graphs: |
| if stack[-1].tag == 'graph': |
| _colorize_graph(doc, stack[-1], token, end, errors) |
| |
| # Special handling for link-type elements: |
| if stack[-1].tag in _LINK_COLORIZING_TAGS: |
| _colorize_link(doc, stack[-1], token, end, errors) |
| |
| # Pop the completed element. |
| openbrace_stack.pop() |
| stack.pop() |
| |
| start = end+1 |
| |
| # Add any final text. |
| if start < len(str): |
| stack[-1].children.append(str[start:]) |
| |
| if len(stack) != 1: |
| estr = "Unbalanced '{'." |
| errors.append(ColorizingError(estr, token, openbrace_stack[-1])) |
| |
| return stack[0] |
| |
| GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph', 'callgraph'] |
| |
| def _colorize_graph(doc, graph, token, end, errors): |
| """ |
| Eg:: |
| G{classtree} |
| G{classtree x, y, z} |
| G{importgraph} |
| """ |
| bad_graph_spec = False |
| |
| children = graph.children[:] |
| graph.children = [] |
| |
| if len(children) != 1 or not isinstance(children[0], basestring): |
| bad_graph_spec = "Bad graph specification" |
| else: |
| pieces = children[0].split(None, 1) |
| graphtype = pieces[0].replace(':','').strip().lower() |
| if graphtype in GRAPH_TYPES: |
| if len(pieces) == 2: |
| if re.match(r'\s*:?\s*([\w\.]+\s*,?\s*)*', pieces[1]): |
| args = pieces[1].replace(',', ' ').replace(':','').split() |
| else: |
| bad_graph_spec = "Bad graph arg list" |
| else: |
| args = [] |
| else: |
| bad_graph_spec = ("Bad graph type %s -- use one of %s" % |
| (pieces[0], ', '.join(GRAPH_TYPES))) |
| |
| if bad_graph_spec: |
| errors.append(ColorizingError(bad_graph_spec, token, end)) |
| graph.children.append('none') |
| graph.children.append('') |
| return |
| |
| graph.children.append(graphtype) |
| for arg in args: |
| graph.children.append(arg) |
| |
| def _colorize_link(doc, link, token, end, errors): |
| variables = link.children[:] |
| |
| # If the last child isn't text, we know it's bad. |
| if len(variables)==0 or not isinstance(variables[-1], basestring): |
| estr = "Bad %s target." % link.tag |
| errors.append(ColorizingError(estr, token, end)) |
| return |
| |
| # Did they provide an explicit target? |
| match2 = _TARGET_RE.match(variables[-1]) |
| if match2: |
| (text, target) = match2.groups() |
| variables[-1] = text |
| # Can we extract an implicit target? |
| elif len(variables) == 1: |
| target = variables[0] |
| else: |
| estr = "Bad %s target." % link.tag |
| errors.append(ColorizingError(estr, token, end)) |
| return |
| |
| # Construct the name element. |
| name_elt = Element('name', *variables) |
| |
| # Clean up the target. For URIs, assume http or mailto if they |
| # don't specify (no relative urls) |
| target = re.sub(r'\s', '', target) |
| if link.tag=='uri': |
| if not re.match(r'\w+:', target): |
| if re.match(r'\w+@(\w+)(\.\w+)*', target): |
| target = 'mailto:' + target |
| else: |
| target = 'http://'+target |
| elif link.tag=='link': |
| # Remove arg lists for functions (e.g., L{_colorize_link()}) |
| target = re.sub(r'\(.*\)$', '', target) |
| if not re.match(r'^[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)*$', target): |
| estr = "Bad link target." |
| errors.append(ColorizingError(estr, token, end)) |
| return |
| |
| # Construct the target element. |
| target_elt = Element('target', target) |
| |
| # Add them to the link element. |
| link.children = [name_elt, target_elt] |
| |
| ################################################## |
| ## Formatters |
| ################################################## |
| |
| def to_epytext(tree, indent=0, seclevel=0): |
| """ |
| Convert a DOM document encoding epytext back to an epytext string. |
| This is the inverse operation from L{parse}. I.e., assuming there |
| are no errors, the following is true: |
| - C{parse(to_epytext(tree)) == tree} |
| |
| The inverse is true, except that whitespace, line wrapping, and |
| character escaping may be done differently. |
| - C{to_epytext(parse(str)) == str} (approximately) |
| |
| @param tree: A DOM document encoding of an epytext string. |
| @type tree: C{Element} |
| @param indent: The indentation for the string representation of |
| C{tree}. Each line of the returned string will begin with |
| C{indent} space characters. |
| @type indent: C{int} |
| @param seclevel: The section level that C{tree} appears at. This |
| is used to generate section headings. |
| @type seclevel: C{int} |
| @return: The epytext string corresponding to C{tree}. |
| @rtype: C{string} |
| """ |
| if isinstance(tree, basestring): |
| str = re.sub(r'\{', '\0', tree) |
| str = re.sub(r'\}', '\1', str) |
| return str |
| |
| if tree.tag == 'epytext': indent -= 2 |
| if tree.tag == 'section': seclevel += 1 |
| variables = [to_epytext(c, indent+2, seclevel) for c in tree.children] |
| childstr = ''.join(variables) |
| |
| # Clean up for literal blocks (add the double "::" back) |
| childstr = re.sub(':(\s*)\2', '::\\1', childstr) |
| |
| if tree.tag == 'para': |
| str = wordwrap(childstr, indent)+'\n' |
| str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str) |
| str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str) |
| str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str) |
| str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str) |
| str = re.sub('\0', 'E{lb}', str) |
| str = re.sub('\1', 'E{rb}', str) |
| return str |
| elif tree.tag == 'li': |
| bullet = tree.attribs.get('bullet') or '-' |
| return indent*' '+ bullet + ' ' + childstr.lstrip() |
| elif tree.tag == 'heading': |
| str = re.sub('\0', 'E{lb}',childstr) |
| str = re.sub('\1', 'E{rb}', str) |
| uline = len(childstr)*_HEADING_CHARS[seclevel-1] |
| return (indent-2)*' ' + str + '\n' + (indent-2)*' '+uline+'\n' |
| elif tree.tag == 'doctestblock': |
| str = re.sub('\0', '{', childstr) |
| str = re.sub('\1', '}', str) |
| lines = [' '+indent*' '+line for line in str.split('\n')] |
| return '\n'.join(lines) + '\n\n' |
| elif tree.tag == 'literalblock': |
| str = re.sub('\0', '{', childstr) |
| str = re.sub('\1', '}', str) |
| lines = [(indent+1)*' '+line for line in str.split('\n')] |
| return '\2' + '\n'.join(lines) + '\n\n' |
| elif tree.tag == 'field': |
| numargs = 0 |
| while tree.children[numargs+1].tag == 'arg': numargs += 1 |
| tag = variables[0] |
| args = variables[1:1+numargs] |
| body = variables[1+numargs:] |
| str = (indent)*' '+'@'+variables[0] |
| if args: str += '(' + ', '.join(args) + ')' |
| return str + ':\n' + ''.join(body) |
| elif tree.tag == 'target': |
| return '<%s>' % childstr |
| elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext', |
| 'section', 'olist', 'ulist', 'name'): |
| return childstr |
| elif tree.tag == 'symbol': |
| return 'E{%s}' % childstr |
| elif tree.tag == 'graph': |
| return 'G{%s}' % ' '.join(variables) |
| else: |
| for (tag, name) in _COLORIZING_TAGS.items(): |
| if name == tree.tag: |
| return '%s{%s}' % (tag, childstr) |
| raise ValueError('Unknown DOM element %r' % tree.tag) |
| |
| SYMBOL_TO_PLAINTEXT = { |
| 'crarr': '\\', |
| } |
| |
| def to_plaintext(tree, indent=0, seclevel=0): |
| """ |
| Convert a DOM document encoding epytext to a string representation. |
| This representation is similar to the string generated by |
| C{to_epytext}, but C{to_plaintext} removes inline markup, prints |
| escaped characters in unescaped form, etc. |
| |
| @param tree: A DOM document encoding of an epytext string. |
| @type tree: C{Element} |
| @param indent: The indentation for the string representation of |
| C{tree}. Each line of the returned string will begin with |
| C{indent} space characters. |
| @type indent: C{int} |
| @param seclevel: The section level that C{tree} appears at. This |
| is used to generate section headings. |
| @type seclevel: C{int} |
| @return: The epytext string corresponding to C{tree}. |
| @rtype: C{string} |
| """ |
| if isinstance(tree, basestring): return tree |
| |
| if tree.tag == 'section': seclevel += 1 |
| |
| # Figure out the child indent level. |
| if tree.tag == 'epytext': cindent = indent |
| elif tree.tag == 'li' and tree.attribs.get('bullet'): |
| cindent = indent + 1 + len(tree.attribs.get('bullet')) |
| else: |
| cindent = indent + 2 |
| variables = [to_plaintext(c, cindent, seclevel) for c in tree.children] |
| childstr = ''.join(variables) |
| |
| if tree.tag == 'para': |
| return wordwrap(childstr, indent)+'\n' |
| elif tree.tag == 'li': |
| # We should be able to use getAttribute here; but there's no |
| # convenient way to test if an element has an attribute.. |
| bullet = tree.attribs.get('bullet') or '-' |
| return indent*' ' + bullet + ' ' + childstr.lstrip() |
| elif tree.tag == 'heading': |
| uline = len(childstr)*_HEADING_CHARS[seclevel-1] |
| return ((indent-2)*' ' + childstr + '\n' + |
| (indent-2)*' ' + uline + '\n') |
| elif tree.tag == 'doctestblock': |
| lines = [(indent+2)*' '+line for line in childstr.split('\n')] |
| return '\n'.join(lines) + '\n\n' |
| elif tree.tag == 'literalblock': |
| lines = [(indent+1)*' '+line for line in childstr.split('\n')] |
| return '\n'.join(lines) + '\n\n' |
| elif tree.tag == 'fieldlist': |
| return childstr |
| elif tree.tag == 'field': |
| numargs = 0 |
| while tree.children[numargs+1].tag == 'arg': numargs += 1 |
| tag = variables[0] |
| args = variables[1:1+numargs] |
| body = variables[1+numargs:] |
| str = (indent)*' '+'@'+variables[0] |
| if args: str += '(' + ', '.join(args) + ')' |
| return str + ':\n' + ''.join(body) |
| elif tree.tag == 'uri': |
| if len(variables) != 2: raise ValueError('Bad URI ') |
| elif variables[0] == variables[1]: return '<%s>' % variables[1] |
| else: return '%r<%s>' % (variables[0], variables[1]) |
| elif tree.tag == 'link': |
| if len(variables) != 2: raise ValueError('Bad Link') |
| return '%s' % variables[0] |
| elif tree.tag in ('olist', 'ulist'): |
| # [xx] always use condensed lists. |
| ## Use a condensed list if each list item is 1 line long. |
| #for child in variables: |
| # if child.count('\n') > 2: return childstr |
| return childstr.replace('\n\n', '\n')+'\n' |
| elif tree.tag == 'symbol': |
| return '%s' % SYMBOL_TO_PLAINTEXT.get(childstr, childstr) |
| elif tree.tag == 'graph': |
| return '<<%s graph: %s>>' % (variables[0], ', '.join(variables[1:])) |
| else: |
| # Assume that anything else can be passed through. |
| return childstr |
| |
| def to_debug(tree, indent=4, seclevel=0): |
| """ |
| Convert a DOM document encoding epytext back to an epytext string, |
| annotated with extra debugging information. This function is |
| similar to L{to_epytext}, but it adds explicit information about |
| where different blocks begin, along the left margin. |
| |
| @param tree: A DOM document encoding of an epytext string. |
| @type tree: C{Element} |
| @param indent: The indentation for the string representation of |
| C{tree}. Each line of the returned string will begin with |
| C{indent} space characters. |
| @type indent: C{int} |
| @param seclevel: The section level that C{tree} appears at. This |
| is used to generate section headings. |
| @type seclevel: C{int} |
| @return: The epytext string corresponding to C{tree}. |
| @rtype: C{string} |
| """ |
| if isinstance(tree, basestring): |
| str = re.sub(r'\{', '\0', tree) |
| str = re.sub(r'\}', '\1', str) |
| return str |
| |
| if tree.tag == 'section': seclevel += 1 |
| variables = [to_debug(c, indent+2, seclevel) for c in tree.children] |
| childstr = ''.join(variables) |
| |
| # Clean up for literal blocks (add the double "::" back) |
| childstr = re.sub(':( *\n \|\n)\2', '::\\1', childstr) |
| |
| if tree.tag == 'para': |
| str = wordwrap(childstr, indent-6, 69)+'\n' |
| str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str) |
| str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str) |
| str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str) |
| str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str) |
| str = re.sub('\0', 'E{lb}', str) |
| str = re.sub('\1', 'E{rb}', str) |
| lines = str.rstrip().split('\n') |
| lines[0] = ' P>|' + lines[0] |
| lines[1:] = [' |'+l for l in lines[1:]] |
| return '\n'.join(lines)+'\n |\n' |
| elif tree.tag == 'li': |
| bullet = tree.attribs.get('bullet') or '-' |
| return ' LI>|'+ (indent-6)*' '+ bullet + ' ' + childstr[6:].lstrip() |
| elif tree.tag in ('olist', 'ulist'): |
| return 'LIST>|'+(indent-4)*' '+childstr[indent+2:] |
| elif tree.tag == 'heading': |
| str = re.sub('\0', 'E{lb}', childstr) |
| str = re.sub('\1', 'E{rb}', str) |
| uline = len(childstr)*_HEADING_CHARS[seclevel-1] |
| return ('SEC'+`seclevel`+'>|'+(indent-8)*' ' + str + '\n' + |
| ' |'+(indent-8)*' ' + uline + '\n') |
| elif tree.tag == 'doctestblock': |
| str = re.sub('\0', '{', childstr) |
| str = re.sub('\1', '}', str) |
| lines = [' |'+(indent-4)*' '+line for line in str.split('\n')] |
| lines[0] = 'DTST>'+lines[0][5:] |
| return '\n'.join(lines) + '\n |\n' |
| elif tree.tag == 'literalblock': |
| str = re.sub('\0', '{', childstr) |
| str = re.sub('\1', '}', str) |
| lines = [' |'+(indent-5)*' '+line for line in str.split('\n')] |
| lines[0] = ' LIT>'+lines[0][5:] |
| return '\2' + '\n'.join(lines) + '\n |\n' |
| elif tree.tag == 'field': |
| numargs = 0 |
| while tree.children[numargs+1].tag == 'arg': numargs += 1 |
| tag = variables[0] |
| args = variables[1:1+numargs] |
| body = variables[1+numargs:] |
| str = ' FLD>|'+(indent-6)*' '+'@'+variables[0] |
| if args: str += '(' + ', '.join(args) + ')' |
| return str + ':\n' + ''.join(body) |
| elif tree.tag == 'target': |
| return '<%s>' % childstr |
| elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext', |
| 'section', 'olist', 'ulist', 'name'): |
| return childstr |
| elif tree.tag == 'symbol': |
| return 'E{%s}' % childstr |
| elif tree.tag == 'graph': |
| return 'G{%s}' % ' '.join(variables) |
| else: |
| for (tag, name) in _COLORIZING_TAGS.items(): |
| if name == tree.tag: |
| return '%s{%s}' % (tag, childstr) |
| raise ValueError('Unknown DOM element %r' % tree.tag) |
| |
| ################################################## |
| ## Top-Level Wrapper function |
| ################################################## |
| def pparse(str, show_warnings=1, show_errors=1, stream=sys.stderr): |
| """ |
| Pretty-parse the string. This parses the string, and catches any |
| warnings or errors produced. Any warnings and errors are |
| displayed, and the resulting DOM parse structure is returned. |
| |
| @param str: The string to parse. |
| @type str: C{string} |
| @param show_warnings: Whether or not to display non-fatal errors |
| generated by parsing C{str}. |
| @type show_warnings: C{boolean} |
| @param show_errors: Whether or not to display fatal errors |
| generated by parsing C{str}. |
| @type show_errors: C{boolean} |
| @param stream: The stream that warnings and errors should be |
| written to. |
| @type stream: C{stream} |
| @return: a DOM document encoding the contents of C{str}. |
| @rtype: C{Element} |
| @raise SyntaxError: If any fatal errors were encountered. |
| """ |
| errors = [] |
| confused = 0 |
| try: |
| val = parse(str, errors) |
| warnings = [e for e in errors if not e.is_fatal()] |
| errors = [e for e in errors if e.is_fatal()] |
| except: |
| confused = 1 |
| |
| if not show_warnings: warnings = [] |
| warnings.sort() |
| errors.sort() |
| if warnings: |
| print >>stream, '='*SCRWIDTH |
| print >>stream, "WARNINGS" |
| print >>stream, '-'*SCRWIDTH |
| for warning in warnings: |
| print >>stream, warning.as_warning() |
| print >>stream, '='*SCRWIDTH |
| if errors and show_errors: |
| if not warnings: print >>stream, '='*SCRWIDTH |
| print >>stream, "ERRORS" |
| print >>stream, '-'*SCRWIDTH |
| for error in errors: |
| print >>stream, error |
| print >>stream, '='*SCRWIDTH |
| |
| if confused: raise |
| elif errors: raise SyntaxError('Encountered Errors') |
| else: return val |
| |
| ################################################## |
| ## Parse Errors |
| ################################################## |
| |
| class TokenizationError(ParseError): |
| """ |
| An error generated while tokenizing a formatted documentation |
| string. |
| """ |
| |
| class StructuringError(ParseError): |
| """ |
| An error generated while structuring a formatted documentation |
| string. |
| """ |
| |
| class ColorizingError(ParseError): |
| """ |
| An error generated while colorizing a paragraph. |
| """ |
| def __init__(self, descr, token, charnum, is_fatal=1): |
| """ |
| Construct a new colorizing exception. |
| |
| @param descr: A short description of the error. |
| @type descr: C{string} |
| @param token: The token where the error occured |
| @type token: L{Token} |
| @param charnum: The character index of the position in |
| C{token} where the error occured. |
| @type charnum: C{int} |
| """ |
| ParseError.__init__(self, descr, token.startline, is_fatal) |
| self.token = token |
| self.charnum = charnum |
| |
| CONTEXT_RANGE = 20 |
| def descr(self): |
| RANGE = self.CONTEXT_RANGE |
| if self.charnum <= RANGE: |
| left = self.token.contents[0:self.charnum] |
| else: |
| left = '...'+self.token.contents[self.charnum-RANGE:self.charnum] |
| if (len(self.token.contents)-self.charnum) <= RANGE: |
| right = self.token.contents[self.charnum:] |
| else: |
| right = (self.token.contents[self.charnum:self.charnum+RANGE] |
| + '...') |
| return ('%s\n\n%s%s\n%s^' % (self._descr, left, right, ' '*len(left))) |
| |
| ################################################## |
| ## Convenience parsers |
| ################################################## |
| |
| def parse_as_literal(str): |
| """ |
| Return a DOM document matching the epytext DTD, containing a |
| single literal block. That literal block will include the |
| contents of the given string. This method is typically used as a |
| fall-back when the parser fails. |
| |
| @param str: The string which should be enclosed in a literal |
| block. |
| @type str: C{string} |
| |
| @return: A DOM document containing C{str} in a single literal |
| block. |
| @rtype: C{Element} |
| """ |
| return Element('epytext', Element('literalblock', str)) |
| |
| def parse_as_para(str): |
| """ |
| Return a DOM document matching the epytext DTD, containing a |
| single paragraph. That paragraph will include the contents of the |
| given string. This can be used to wrap some forms of |
| automatically generated information (such as type names) in |
| paragraphs. |
| |
| @param str: The string which should be enclosed in a paragraph. |
| @type str: C{string} |
| |
| @return: A DOM document containing C{str} in a single paragraph. |
| @rtype: C{Element} |
| """ |
| return Element('epytext', Element('para', str)) |
| |
| ################################################################# |
| ## SUPPORT FOR EPYDOC |
| ################################################################# |
| |
| def parse_docstring(docstring, errors, **options): |
| """ |
| Parse the given docstring, which is formatted using epytext; and |
| return a C{ParsedDocstring} representation of its contents. |
| @param docstring: The docstring to parse |
| @type docstring: C{string} |
| @param errors: A list where any errors generated during parsing |
| will be stored. |
| @type errors: C{list} of L{ParseError} |
| @param options: Extra options. Unknown options are ignored. |
| Currently, no extra options are defined. |
| @rtype: L{ParsedDocstring} |
| """ |
| return ParsedEpytextDocstring(parse(docstring, errors), **options) |
| |
| class ParsedEpytextDocstring(ParsedDocstring): |
| SYMBOL_TO_HTML = { |
| # Symbols |
| '<-': '←', '->': '→', '^': '↑', 'v': '↓', |
| |
| # Greek letters |
| 'alpha': 'α', 'beta': 'β', 'gamma': 'γ', |
| 'delta': 'δ', 'epsilon': 'ε', 'zeta': 'ζ', |
| 'eta': 'η', 'theta': 'θ', 'iota': 'ι', |
| 'kappa': 'κ', 'lambda': 'λ', 'mu': 'μ', |
| 'nu': 'ν', 'xi': 'ξ', 'omicron': 'ο', |
| 'pi': 'π', 'rho': 'ρ', 'sigma': 'σ', |
| 'tau': 'τ', 'upsilon': 'υ', 'phi': 'φ', |
| 'chi': 'χ', 'psi': 'ψ', 'omega': 'ω', |
| 'Alpha': 'Α', 'Beta': 'Β', 'Gamma': 'Γ', |
| 'Delta': 'Δ', 'Epsilon': 'Ε', 'Zeta': 'Ζ', |
| 'Eta': 'Η', 'Theta': 'Θ', 'Iota': 'Ι', |
| 'Kappa': 'Κ', 'Lambda': 'Λ', 'Mu': 'Μ', |
| 'Nu': 'Ν', 'Xi': 'Ξ', 'Omicron': 'Ο', |
| 'Pi': 'Π', 'Rho': 'Ρ', 'Sigma': 'Σ', |
| 'Tau': 'Τ', 'Upsilon': 'Υ', 'Phi': 'Φ', |
| 'Chi': 'Χ', 'Psi': 'Ψ', 'Omega': 'Ω', |
| |
| # HTML character entities |
| 'larr': '←', 'rarr': '→', 'uarr': '↑', |
| 'darr': '↓', 'harr': '↔', 'crarr': '↵', |
| 'lArr': '⇐', 'rArr': '⇒', 'uArr': '⇑', |
| 'dArr': '⇓', 'hArr': '⇔', |
| 'copy': '©', 'times': '×', 'forall': '∀', |
| 'exist': '∃', 'part': '∂', |
| 'empty': '∅', 'isin': '∈', 'notin': '∉', |
| 'ni': '∋', 'prod': '∏', 'sum': '∑', |
| 'prop': '∝', 'infin': '∞', 'ang': '∠', |
| 'and': '∧', 'or': '∨', 'cap': '∩', 'cup': '∪', |
| 'int': '∫', 'there4': '∴', 'sim': '∼', |
| 'cong': '≅', 'asymp': '≈', 'ne': '≠', |
| 'equiv': '≡', 'le': '≤', 'ge': '≥', |
| 'sub': '⊂', 'sup': '⊃', 'nsub': '⊄', |
| 'sube': '⊆', 'supe': '⊇', 'oplus': '⊕', |
| 'otimes': '⊗', 'perp': '⊥', |
| |
| # Alternate (long) names |
| 'infinity': '∞', 'integral': '∫', 'product': '∏', |
| '<=': '≤', '>=': '≥', |
| } |
| |
| SYMBOL_TO_LATEX = { |
| # Symbols |
| '<-': r'\(\leftarrow\)', '->': r'\(\rightarrow\)', |
| '^': r'\(\uparrow\)', 'v': r'\(\downarrow\)', |
| |
| # Greek letters (use lower case when upcase not available) |
| |
| 'alpha': r'\(\alpha\)', 'beta': r'\(\beta\)', 'gamma': |
| r'\(\gamma\)', 'delta': r'\(\delta\)', 'epsilon': |
| r'\(\epsilon\)', 'zeta': r'\(\zeta\)', 'eta': r'\(\eta\)', |
| 'theta': r'\(\theta\)', 'iota': r'\(\iota\)', 'kappa': |
| r'\(\kappa\)', 'lambda': r'\(\lambda\)', 'mu': r'\(\mu\)', |
| 'nu': r'\(\nu\)', 'xi': r'\(\xi\)', 'omicron': r'\(o\)', 'pi': |
| r'\(\pi\)', 'rho': r'\(\rho\)', 'sigma': r'\(\sigma\)', 'tau': |
| r'\(\tau\)', 'upsilon': r'\(\upsilon\)', 'phi': r'\(\phi\)', |
| 'chi': r'\(\chi\)', 'psi': r'\(\psi\)', 'omega': |
| r'\(\omega\)', |
| |
| 'Alpha': r'\(\alpha\)', 'Beta': r'\(\beta\)', 'Gamma': |
| r'\(\Gamma\)', 'Delta': r'\(\Delta\)', 'Epsilon': |
| r'\(\epsilon\)', 'Zeta': r'\(\zeta\)', 'Eta': r'\(\eta\)', |
| 'Theta': r'\(\Theta\)', 'Iota': r'\(\iota\)', 'Kappa': |
| r'\(\kappa\)', 'Lambda': r'\(\Lambda\)', 'Mu': r'\(\mu\)', |
| 'Nu': r'\(\nu\)', 'Xi': r'\(\Xi\)', 'Omicron': r'\(o\)', 'Pi': |
| r'\(\Pi\)', 'ho': r'\(\rho\)', 'Sigma': r'\(\Sigma\)', 'Tau': |
| r'\(\tau\)', 'Upsilon': r'\(\Upsilon\)', 'Phi': r'\(\Phi\)', |
| 'Chi': r'\(\chi\)', 'Psi': r'\(\Psi\)', 'Omega': |
| r'\(\Omega\)', |
| |
| # HTML character entities |
| 'larr': r'\(\leftarrow\)', 'rarr': r'\(\rightarrow\)', 'uarr': |
| r'\(\uparrow\)', 'darr': r'\(\downarrow\)', 'harr': |
| r'\(\leftrightarrow\)', 'crarr': r'\(\hookleftarrow\)', |
| 'lArr': r'\(\Leftarrow\)', 'rArr': r'\(\Rightarrow\)', 'uArr': |
| r'\(\Uparrow\)', 'dArr': r'\(\Downarrow\)', 'hArr': |
| r'\(\Leftrightarrow\)', 'copy': r'{\textcopyright}', |
| 'times': r'\(\times\)', 'forall': r'\(\forall\)', 'exist': |
| r'\(\exists\)', 'part': r'\(\partial\)', 'empty': |
| r'\(\emptyset\)', 'isin': r'\(\in\)', 'notin': r'\(\notin\)', |
| 'ni': r'\(\ni\)', 'prod': r'\(\prod\)', 'sum': r'\(\sum\)', |
| 'prop': r'\(\propto\)', 'infin': r'\(\infty\)', 'ang': |
| r'\(\angle\)', 'and': r'\(\wedge\)', 'or': r'\(\vee\)', 'cap': |
| r'\(\cap\)', 'cup': r'\(\cup\)', 'int': r'\(\int\)', 'there4': |
| r'\(\therefore\)', 'sim': r'\(\sim\)', 'cong': r'\(\cong\)', |
| 'asymp': r'\(\approx\)', 'ne': r'\(\ne\)', 'equiv': |
| r'\(\equiv\)', 'le': r'\(\le\)', 'ge': r'\(\ge\)', 'sub': |
| r'\(\subset\)', 'sup': r'\(\supset\)', 'nsub': r'\(\supset\)', |
| 'sube': r'\(\subseteq\)', 'supe': r'\(\supseteq\)', 'oplus': |
| r'\(\oplus\)', 'otimes': r'\(\otimes\)', 'perp': r'\(\perp\)', |
| |
| # Alternate (long) names |
| 'infinity': r'\(\infty\)', 'integral': r'\(\int\)', 'product': |
| r'\(\prod\)', '<=': r'\(\le\)', '>=': r'\(\ge\)', |
| } |
| |
| def __init__(self, dom_tree, **options): |
| self._tree = dom_tree |
| # Caching: |
| self._html = self._latex = self._plaintext = None |
| self._terms = None |
| # inline option -- mark top-level children as inline. |
| if options.get('inline') and self._tree is not None: |
| for elt in self._tree.children: |
| elt.attribs['inline'] = True |
| |
| def __str__(self): |
| return str(self._tree) |
| |
| def to_html(self, docstring_linker, directory=None, docindex=None, |
| context=None, **options): |
| if self._html is not None: return self._html |
| if self._tree is None: return '' |
| indent = options.get('indent', 0) |
| self._html = self._to_html(self._tree, docstring_linker, directory, |
| docindex, context, indent) |
| return self._html |
| |
| def to_latex(self, docstring_linker, **options): |
| if self._latex is not None: return self._latex |
| if self._tree is None: return '' |
| indent = options.get('indent', 0) |
| self._hyperref = options.get('hyperref', 1) |
| self._latex = self._to_latex(self._tree, docstring_linker, indent) |
| return self._latex |
| |
| def to_plaintext(self, docstring_linker, **options): |
| # [XX] don't cache -- different options might be used!! |
| #if self._plaintext is not None: return self._plaintext |
| if self._tree is None: return '' |
| if 'indent' in options: |
| self._plaintext = to_plaintext(self._tree, |
| indent=options['indent']) |
| else: |
| self._plaintext = to_plaintext(self._tree) |
| return self._plaintext |
| |
| def _index_term_key(self, tree): |
| str = to_plaintext(tree) |
| str = re.sub(r'\s\s+', '-', str) |
| return "index-"+re.sub("[^a-zA-Z0-9]", "_", str) |
| |
| def _to_html(self, tree, linker, directory, docindex, context, |
| indent=0, seclevel=0): |
| if isinstance(tree, basestring): |
| return plaintext_to_html(tree) |
| |
| if tree.tag == 'epytext': indent -= 2 |
| if tree.tag == 'section': seclevel += 1 |
| |
| # Process the variables first. |
| variables = [self._to_html(c, linker, directory, docindex, context, |
| indent+2, seclevel) |
| for c in tree.children] |
| |
| # Construct the HTML string for the variables. |
| childstr = ''.join(variables) |
| |
| # Perform the approriate action for the DOM tree type. |
| if tree.tag == 'para': |
| return wordwrap( |
| (tree.attribs.get('inline') and '%s' or '<p>%s</p>') % childstr, |
| indent) |
| elif tree.tag == 'code': |
| style = tree.attribs.get('style') |
| if style: |
| return '<code class="%s">%s</code>' % (style, childstr) |
| else: |
| return '<code>%s</code>' % childstr |
| elif tree.tag == 'uri': |
| return ('<a href="%s" target="_top">%s</a>' % |
| (variables[1], variables[0])) |
| elif tree.tag == 'link': |
| return linker.translate_identifier_xref(variables[1], variables[0]) |
| elif tree.tag == 'italic': |
| return '<i>%s</i>' % childstr |
| elif tree.tag == 'math': |
| return '<i class="math">%s</i>' % childstr |
| elif tree.tag == 'indexed': |
| term = Element('epytext', *tree.children, **tree.attribs) |
| return linker.translate_indexterm(ParsedEpytextDocstring(term)) |
| #term_key = self._index_term_key(tree) |
| #return linker.translate_indexterm(childstr, term_key) |
| elif tree.tag == 'bold': |
| return '<b>%s</b>' % childstr |
| elif tree.tag == 'ulist': |
| return '%s<ul>\n%s%s</ul>\n' % (indent*' ', childstr, indent*' ') |
| elif tree.tag == 'olist': |
| start = tree.attribs.get('start') or '' |
| return ('%s<ol start="%s">\n%s%s</ol>\n' % |
| (indent*' ', start, childstr, indent*' ')) |
| elif tree.tag == 'li': |
| return indent*' '+'<li>\n%s%s</li>\n' % (childstr, indent*' ') |
| elif tree.tag == 'heading': |
| return ('%s<h%s class="heading">%s</h%s>\n' % |
| ((indent-2)*' ', seclevel, childstr, seclevel)) |
| elif tree.tag == 'literalblock': |
| return '<pre class="literalblock">\n%s\n</pre>\n' % childstr |
| elif tree.tag == 'doctestblock': |
| return doctest_to_html(tree.children[0].strip()) |
| elif tree.tag == 'fieldlist': |
| raise AssertionError("There should not be any field lists left") |
| elif tree.tag in ('epytext', 'section', 'tag', 'arg', |
| 'name', 'target', 'html'): |
| return childstr |
| elif tree.tag == 'symbol': |
| symbol = tree.children[0] |
| return self.SYMBOL_TO_HTML.get(symbol, '[%s]' % symbol) |
| elif tree.tag == 'graph': |
| # Generate the graph. |
| graph = self._build_graph(variables[0], variables[1:], linker, |
| docindex, context) |
| if not graph: return '' |
| # Write the graph. |
| image_url = '%s.gif' % graph.uid |
| image_file = os.path.join(directory, image_url) |
| return graph.to_html(image_file, image_url) |
| else: |
| raise ValueError('Unknown epytext DOM element %r' % tree.tag) |
| |
| #GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph'] |
| def _build_graph(self, graph_type, graph_args, linker, |
| docindex, context): |
| # Generate the graph |
| if graph_type == 'classtree': |
| from epydoc.apidoc import ClassDoc |
| if graph_args: |
| bases = [docindex.find(name, context) |
| for name in graph_args] |
| elif isinstance(context, ClassDoc): |
| bases = [context] |
| else: |
| log.warning("Could not construct class tree: you must " |
| "specify one or more base classes.") |
| return None |
| from epydoc.docwriter.dotgraph import class_tree_graph |
| return class_tree_graph(bases, linker, context) |
| elif graph_type == 'packagetree': |
| from epydoc.apidoc import ModuleDoc |
| if graph_args: |
| packages = [docindex.find(name, context) |
| for name in graph_args] |
| elif isinstance(context, ModuleDoc): |
| packages = [context] |
| else: |
| log.warning("Could not construct package tree: you must " |
| "specify one or more root packages.") |
| return None |
| from epydoc.docwriter.dotgraph import package_tree_graph |
| return package_tree_graph(packages, linker, context) |
| elif graph_type == 'importgraph': |
| from epydoc.apidoc import ModuleDoc |
| modules = [d for d in docindex.root if isinstance(d, ModuleDoc)] |
| from epydoc.docwriter.dotgraph import import_graph |
| return import_graph(modules, docindex, linker, context) |
| |
| elif graph_type == 'callgraph': |
| if graph_args: |
| docs = [docindex.find(name, context) for name in graph_args] |
| docs = [doc for doc in docs if doc is not None] |
| else: |
| docs = [context] |
| from epydoc.docwriter.dotgraph import call_graph |
| return call_graph(docs, docindex, linker, context) |
| else: |
| log.warning("Unknown graph type %s" % graph_type) |
| |
| |
| def _to_latex(self, tree, linker, indent=0, seclevel=0, breakany=0): |
| if isinstance(tree, basestring): |
| return plaintext_to_latex(tree, breakany=breakany) |
| |
| if tree.tag == 'section': seclevel += 1 |
| |
| # Figure out the child indent level. |
| if tree.tag == 'epytext': cindent = indent |
| else: cindent = indent + 2 |
| variables = [self._to_latex(c, linker, cindent, seclevel, breakany) |
| for c in tree.children] |
| childstr = ''.join(variables) |
| |
| if tree.tag == 'para': |
| return wordwrap(childstr, indent)+'\n' |
| elif tree.tag == 'code': |
| return '\\texttt{%s}' % childstr |
| elif tree.tag == 'uri': |
| if len(variables) != 2: raise ValueError('Bad URI ') |
| if self._hyperref: |
| # ~ and # should not be escaped in the URI. |
| uri = tree.children[1].children[0] |
| uri = uri.replace('{\\textasciitilde}', '~') |
| uri = uri.replace('\\#', '#') |
| if variables[0] == variables[1]: |
| return '\\href{%s}{\\textit{%s}}' % (uri, variables[1]) |
| else: |
| return ('%s\\footnote{\\href{%s}{%s}}' % |
| (variables[0], uri, variables[1])) |
| else: |
| if variables[0] == variables[1]: |
| return '\\textit{%s}' % variables[1] |
| else: |
| return '%s\\footnote{%s}' % (variables[0], variables[1]) |
| elif tree.tag == 'link': |
| if len(variables) != 2: raise ValueError('Bad Link') |
| return linker.translate_identifier_xref(variables[1], variables[0]) |
| elif tree.tag == 'italic': |
| return '\\textit{%s}' % childstr |
| elif tree.tag == 'math': |
| return '\\textit{%s}' % childstr |
| elif tree.tag == 'indexed': |
| term = Element('epytext', *tree.children, **tree.attribs) |
| return linker.translate_indexterm(ParsedEpytextDocstring(term)) |
| elif tree.tag == 'bold': |
| return '\\textbf{%s}' % childstr |
| elif tree.tag == 'li': |
| return indent*' ' + '\\item ' + childstr.lstrip() |
| elif tree.tag == 'heading': |
| return ' '*(indent-2) + '(section) %s\n\n' % childstr |
| elif tree.tag == 'doctestblock': |
| return doctest_to_latex(tree.children[0].strip()) |
| elif tree.tag == 'literalblock': |
| return '\\begin{alltt}\n%s\\end{alltt}\n\n' % childstr |
| elif tree.tag == 'fieldlist': |
| return indent*' '+'{omitted fieldlist}\n' |
| elif tree.tag == 'olist': |
| return (' '*indent + '\\begin{enumerate}\n\n' + |
| ' '*indent + '\\setlength{\\parskip}{0.5ex}\n' + |
| childstr + |
| ' '*indent + '\\end{enumerate}\n\n') |
| elif tree.tag == 'ulist': |
| return (' '*indent + '\\begin{itemize}\n' + |
| ' '*indent + '\\setlength{\\parskip}{0.6ex}\n' + |
| childstr + |
| ' '*indent + '\\end{itemize}\n\n') |
| elif tree.tag == 'symbol': |
| symbol = tree.children[0] |
| return self.SYMBOL_TO_LATEX.get(symbol, '[%s]' % symbol) |
| elif tree.tag == 'graph': |
| return '(GRAPH)' |
| #raise ValueError, 'graph not implemented yet for latex' |
| else: |
| # Assume that anything else can be passed through. |
| return childstr |
| |
| _SUMMARY_RE = re.compile(r'(\s*[\w\W]*?\.)(\s|$)') |
| |
| def summary(self): |
| if self._tree is None: return self, False |
| tree = self._tree |
| doc = Element('epytext') |
| |
| # Find the first paragraph. |
| variables = tree.children |
| while (len(variables) > 0) and (variables[0].tag != 'para'): |
| if variables[0].tag in ('section', 'ulist', 'olist', 'li'): |
| variables = variables[0].children |
| else: |
| variables = variables[1:] |
| |
| # Special case: if the docstring contains a single literal block, |
| # then try extracting the summary from it. |
| if (len(variables) == 0 and len(tree.children) == 1 and |
| tree.children[0].tag == 'literalblock'): |
| str = re.split(r'\n\s*(\n|$).*', |
| tree.children[0].children[0], 1)[0] |
| variables = [Element('para')] |
| variables[0].children.append(str) |
| |
| # If we didn't find a paragraph, return an empty epytext. |
| if len(variables) == 0: return ParsedEpytextDocstring(doc), False |
| |
| # Is there anything else, excluding tags, after the first variable? |
| long_docs = False |
| for var in variables[1:]: |
| if isinstance(var, Element) and var.tag == 'fieldlist': |
| continue |
| long_docs = True |
| break |
| |
| # Extract the first sentence. |
| parachildren = variables[0].children |
| para = Element('para', inline=True) |
| doc.children.append(para) |
| for parachild in parachildren: |
| if isinstance(parachild, basestring): |
| m = self._SUMMARY_RE.match(parachild) |
| if m: |
| para.children.append(m.group(1)) |
| long_docs |= parachild is not parachildren[-1] |
| if not long_docs: |
| other = parachild[m.end():] |
| if other and not other.isspace(): |
| long_docs = True |
| return ParsedEpytextDocstring(doc), long_docs |
| para.children.append(parachild) |
| |
| return ParsedEpytextDocstring(doc), long_docs |
| |
| def split_fields(self, errors=None): |
| if self._tree is None: return (self, ()) |
| tree = Element(self._tree.tag, *self._tree.children, |
| **self._tree.attribs) |
| fields = [] |
| |
| if (tree.children and |
| tree.children[-1].tag == 'fieldlist' and |
| tree.children[-1].children): |
| field_nodes = tree.children[-1].children |
| del tree.children[-1] |
| |
| for field in field_nodes: |
| # Get the tag |
| tag = field.children[0].children[0].lower() |
| del field.children[0] |
| |
| # Get the argument. |
| if field.children and field.children[0].tag == 'arg': |
| arg = field.children[0].children[0] |
| del field.children[0] |
| else: |
| arg = None |
| |
| # Process the field. |
| field.tag = 'epytext' |
| fields.append(Field(tag, arg, ParsedEpytextDocstring(field))) |
| |
| # Save the remaining docstring as the description.. |
| if tree.children and tree.children[0].children: |
| return ParsedEpytextDocstring(tree), fields |
| else: |
| return None, fields |
| |
| |
| def index_terms(self): |
| if self._terms is None: |
| self._terms = [] |
| self._index_terms(self._tree, self._terms) |
| return self._terms |
| |
| def _index_terms(self, tree, terms): |
| if tree is None or isinstance(tree, basestring): |
| return |
| |
| if tree.tag == 'indexed': |
| term = Element('epytext', *tree.children, **tree.attribs) |
| terms.append(ParsedEpytextDocstring(term)) |
| |
| # Look for index items in child nodes. |
| for child in tree.children: |
| self._index_terms(child, terms) |