| # |
| # javadoc.py: javadoc docstring parsing |
| # Edward Loper |
| # |
| # Created [07/03/03 12:37 PM] |
| # $Id: javadoc.py 1574 2007-03-07 02:55:14Z dvarrazzo $ |
| # |
| |
| """ |
| Epydoc parser for U{Javadoc<http://java.sun.com/j2se/javadoc/>} |
| docstrings. Javadoc is an HTML-based markup language that was |
| developed for documenting Java APIs with inline comments. It consists |
| of raw HTML, augmented by Javadoc tags. There are two types of |
| Javadoc tag: |
| |
| - X{Javadoc block tags} correspond to Epydoc fields. They are |
| marked by starting a line with a string of the form \"C{@M{tag} |
| [M{arg}]}\", where C{M{tag}} indicates the type of block, and |
| C{M{arg}} is an optional argument. (For fields that take |
| arguments, Javadoc assumes that the single word immediately |
| following the tag is an argument; multi-word arguments cannot be |
| used with javadoc.) |
| |
| - X{inline Javadoc tags} are used for inline markup. In particular, |
| epydoc uses them for crossreference links between documentation. |
| Inline tags may appear anywhere in the text, and have the form |
| \"C{{@M{tag} M{[args...]}}}\", where C{M{tag}} indicates the |
| type of inline markup, and C{M{args}} are optional arguments. |
| |
| Epydoc supports all Javadoc tags, I{except}: |
| - C{{@docRoot}}, which gives the (relative) URL of the generated |
| documentation's root. |
| - C{{@inheritDoc}}, which copies the documentation of the nearest |
| overridden object. This can be used to combine the documentation |
| of the overridden object with the documentation of the |
| overridding object. |
| - C{@serial}, C{@serialField}, and C{@serialData} which describe the |
| serialization (pickling) of an object. |
| - C{{@value}}, which copies the value of a constant. |
| |
| @warning: Epydoc only supports HTML output for Javadoc docstrings. |
| """ |
| __docformat__ = 'epytext en' |
| |
| # Imports |
| import re |
| from xml.dom.minidom import * |
| from epydoc.markup import * |
| |
| def parse_docstring(docstring, errors, **options): |
| """ |
| Parse the given docstring, which is formatted using Javadoc; and |
| return a C{ParsedDocstring} representation of its contents. |
| @param docstring: The docstring to parse |
| @type docstring: C{string} |
| @param errors: A list where any errors generated during parsing |
| will be stored. |
| @type errors: C{list} of L{ParseError} |
| @param options: Extra options. Unknown options are ignored. |
| Currently, no extra options are defined. |
| @rtype: L{ParsedDocstring} |
| """ |
| return ParsedJavadocDocstring(docstring, errors) |
| |
| class ParsedJavadocDocstring(ParsedDocstring): |
| """ |
| An encoded version of a Javadoc docstring. Since Javadoc is a |
| fairly simple markup language, we don't do any processing in |
| advance; instead, we wait to split fields or resolve |
| crossreference links until we need to. |
| |
| @group Field Splitting: split_fields, _ARG_FIELDS, _FIELD_RE |
| @cvar _ARG_FIELDS: A list of the fields that take arguments. |
| Since Javadoc doesn't mark arguments in any special way, we |
| must consult this list to decide whether the first word of a |
| field is an argument or not. |
| @cvar _FIELD_RE: A regular expression used to search for Javadoc |
| block tags. |
| |
| @group HTML Output: to_html, _LINK_SPLIT_RE, _LINK_RE |
| @cvar _LINK_SPLIT_RE: A regular expression used to search for |
| Javadoc inline tags. |
| @cvar _LINK_RE: A regular expression used to process Javadoc |
| inline tags. |
| """ |
| def __init__(self, docstring, errors=None): |
| """ |
| Create a new C{ParsedJavadocDocstring}. |
| |
| @param docstring: The docstring that should be used to |
| construct this C{ParsedJavadocDocstring}. |
| @type docstring: C{string} |
| @param errors: A list where any errors generated during |
| parsing will be stored. If no list is given, then |
| all errors are ignored. |
| @type errors: C{list} of L{ParseError} |
| """ |
| self._docstring = docstring |
| if errors is None: errors = [] |
| self._check_links(errors) |
| |
| #//////////////////////////////////////////////////////////// |
| # Field Splitting |
| #//////////////////////////////////////////////////////////// |
| |
| _ARG_FIELDS = ('group variable var type cvariable cvar ivariable '+ |
| 'ivar param '+ |
| 'parameter arg argument raise raises exception '+ |
| 'except deffield newfield keyword kwarg kwparam').split() |
| _FIELD_RE = re.compile(r'(^\s*\@\w+[\s$])', re.MULTILINE) |
| |
| # Inherit docs from ParsedDocstring. |
| def split_fields(self, errors=None): |
| |
| # Split the docstring into an alternating list of field tags |
| # and text (odd pieces are field tags). |
| pieces = self._FIELD_RE.split(self._docstring) |
| |
| # The first piece is the description. |
| descr = ParsedJavadocDocstring(pieces[0]) |
| |
| # The remaining pieces are the block fields (alternating tags |
| # and bodies; odd pieces are tags). |
| fields = [] |
| for i in range(1, len(pieces)): |
| if i%2 == 1: |
| # Get the field tag. |
| tag = pieces[i].strip()[1:] |
| else: |
| # Get the field argument (if appropriate). |
| if tag in self._ARG_FIELDS: |
| subpieces = pieces[i].strip().split(None, 1)+['',''] |
| (arg, body) = subpieces[:2] |
| else: |
| (arg, body) = (None, pieces[i]) |
| |
| # Special processing for @see fields, since Epydoc |
| # allows unrestricted text in them, but Javadoc just |
| # uses them for xref links: |
| if tag == 'see' and body: |
| if body[0] in '"\'': |
| if body[-1] == body[0]: body = body[1:-1] |
| elif body[0] == '<': pass |
| else: body = '{@link %s}' % body |
| |
| # Construct the field. |
| parsed_body = ParsedJavadocDocstring(body) |
| fields.append(Field(tag, arg, parsed_body)) |
| |
| if pieces[0].strip(): |
| return (descr, fields) |
| else: |
| return (None, fields) |
| |
| #//////////////////////////////////////////////////////////// |
| # HTML Output. |
| #//////////////////////////////////////////////////////////// |
| |
| _LINK_SPLIT_RE = re.compile(r'({@link(?:plain)?\s[^}]+})') |
| _LINK_RE = re.compile(r'{@link(?:plain)?\s+' + r'([\w#.]+)' + |
| r'(?:\([^\)]*\))?' + r'(\s+.*)?' + r'}') |
| |
| # Inherit docs from ParsedDocstring. |
| def to_html(self, docstring_linker, **options): |
| # Split the docstring into an alternating list of HTML and |
| # links (odd pieces are links). |
| pieces = self._LINK_SPLIT_RE.split(self._docstring) |
| |
| # This function is used to translate {@link ...}s to HTML. |
| translate_xref = docstring_linker.translate_identifier_xref |
| |
| # Build up the HTML string from the pieces. For HTML pieces |
| # (even), just add it to html. For link pieces (odd), use |
| # docstring_linker to translate the crossreference link to |
| # HTML for us. |
| html = '' |
| for i in range(len(pieces)): |
| if i%2 == 0: |
| html += pieces[i] |
| else: |
| # Decompose the link into pieces. |
| m = self._LINK_RE.match(pieces[i]) |
| if m is None: continue # Error flagged by _check_links |
| (target, name) = m.groups() |
| |
| # Normalize the target name. |
| if target[0] == '#': target = target[1:] |
| target = target.replace('#', '.') |
| target = re.sub(r'\(.*\)', '', target) |
| |
| # Provide a name, if it wasn't specified. |
| if name is None: name = target |
| else: name = name.strip() |
| |
| # Use docstring_linker to convert the name to html. |
| html += translate_xref(target, name) |
| return html |
| |
| def _check_links(self, errors): |
| """ |
| Make sure that all @{link}s are valid. We need a separate |
| method for ths because we want to do this at parse time, not |
| html output time. Any errors found are appended to C{errors}. |
| """ |
| pieces = self._LINK_SPLIT_RE.split(self._docstring) |
| linenum = 0 |
| for i in range(len(pieces)): |
| if i%2 == 1 and not self._LINK_RE.match(pieces[i]): |
| estr = 'Bad link %r' % pieces[i] |
| errors.append(ParseError(estr, linenum, is_fatal=0)) |
| linenum += pieces[i].count('\n') |
| |
| #//////////////////////////////////////////////////////////// |
| # Plaintext Output. |
| #//////////////////////////////////////////////////////////// |
| |
| # Inherit docs from ParsedDocstring. Since we don't define |
| # to_latex, this is used when generating latex output. |
| def to_plaintext(self, docstring_linker, **options): |
| return self._docstring |
| |
| _SUMMARY_RE = re.compile(r'(\s*[\w\W]*?\.)(\s|$)') |
| |
| # Jeff's hack to get summary working |
| def summary(self): |
| # Drop tags |
| doc = "\n".join([ row for row in self._docstring.split('\n') |
| if not row.lstrip().startswith('@') ]) |
| |
| m = self._SUMMARY_RE.match(doc) |
| if m: |
| other = doc[m.end():] |
| return (ParsedJavadocDocstring(m.group(1)), |
| other != '' and not other.isspace()) |
| |
| else: |
| parts = doc.strip('\n').split('\n', 1) |
| if len(parts) == 1: |
| summary = parts[0] |
| other = False |
| else: |
| summary = parts[0] + '...' |
| other = True |
| |
| return ParsedJavadocDocstring(summary), other |
| |
| # def concatenate(self, other): |
| # if not isinstance(other, ParsedJavadocDocstring): |
| # raise ValueError, 'Could not concatenate docstrings' |
| # return ParsedJavadocDocstring(self._docstring+other._docstring) |