| # epydoc -- Source code parsing |
| # |
| # Copyright (C) 2005 Edward Loper |
| # Author: Edward Loper <[email protected]> |
| # URL: <http://epydoc.sf.net> |
| # |
| # $Id: docparser.py 1673 2008-01-29 05:42:58Z edloper $ |
| |
| """ |
| Extract API documentation about python objects by parsing their source |
| code. |
| |
| The function L{parse_docs()}, which provides the main interface |
| of this module, reads and parses the Python source code for a |
| module, and uses it to create an L{APIDoc} object containing |
| the API documentation for the variables and values defined in |
| that modules. |
| |
| Currently, C{parse_docs()} extracts documentation from the following |
| source code constructions: |
| |
| - module docstring |
| - import statements |
| - class definition blocks |
| - function definition blocks |
| - assignment statements |
| - simple assignment statements |
| - assignment statements with multiple C{'='}s |
| - assignment statements with unpacked left-hand sides |
| - assignment statements that wrap a function in classmethod |
| or staticmethod. |
| - assignment to special variables __path__, __all__, and |
| __docformat__. |
| - delete statements |
| |
| C{parse_docs()} does not yet support the following source code |
| constructions: |
| |
| - assignment statements that create properties |
| |
| By default, C{parse_docs()} will expore the contents of top-level |
| C{try} and C{if} blocks. If desired, C{parse_docs()} can also |
| be configured to explore the contents of C{while} and C{for} blocks. |
| (See the configuration constants, below.) |
| |
| @todo: Make it possible to extend the functionality of C{parse_docs()}, |
| by replacing process_line with a dispatch table that can be |
| customized (similarly to C{docintrospector.register_introspector()}). |
| """ |
| __docformat__ = 'epytext en' |
| |
| ###################################################################### |
| ## Imports |
| ###################################################################### |
| |
| # Python source code parsing: |
| import token, tokenize |
| # Finding modules: |
| import imp |
| # File services: |
| import os, os.path, sys |
| # Unicode: |
| import codecs |
| # API documentation encoding: |
| from epydoc.apidoc import * |
| # For looking up the docs of builtins: |
| import __builtin__, exceptions |
| import epydoc.docintrospecter |
| # Misc utility functions: |
| from epydoc.util import * |
| # Backwards compatibility |
| from epydoc.compat import * |
| |
| ###################################################################### |
| ## Doc Parser |
| ###################################################################### |
| |
| class ParseError(Exception): |
| """ |
| An exception that is used to signify that C{docparser} encountered |
| syntactically invalid Python code while processing a Python source |
| file. |
| """ |
| |
| _moduledoc_cache = {} |
| """A cache of C{ModuleDoc}s that we've already created. |
| C{_moduledoc_cache} is a dictionary mapping from filenames to |
| C{ValueDoc} objects. |
| @type: C{dict}""" |
| |
| #//////////////////////////////////////////////////////////// |
| # Configuration Constants |
| #//////////////////////////////////////////////////////////// |
| |
| #{ Configuration Constants: Control Flow |
| PARSE_TRY_BLOCKS = True |
| """Should the contents of C{try} blocks be examined?""" |
| PARSE_EXCEPT_BLOCKS = True |
| """Should the contents of C{except} blocks be examined?""" |
| PARSE_FINALLY_BLOCKS = True |
| """Should the contents of C{finally} blocks be examined?""" |
| PARSE_IF_BLOCKS = True |
| """Should the contents of C{if} blocks be examined?""" |
| PARSE_ELSE_BLOCKS = True |
| """Should the contents of C{else} and C{elif} blocks be examined?""" |
| PARSE_WHILE_BLOCKS = False |
| """Should the contents of C{while} blocks be examined?""" |
| PARSE_FOR_BLOCKS = False |
| """Should the contents of C{for} blocks be examined?""" |
| |
| #{ Configuration Constants: Imports |
| IMPORT_HANDLING = 'link' |
| """What should C{docparser} do when it encounters an import |
| statement? |
| - C{'link'}: Create variabledoc objects with imported_from pointers |
| to the source object. |
| - C{'parse'}: Parse the imported file, to find the actual |
| documentation for the imported object. (This will fall back |
| to the 'link' behavior if the imported file can't be parsed, |
| e.g., if it's a builtin.) |
| """ |
| |
| IMPORT_STAR_HANDLING = 'parse' |
| """When C{docparser} encounters a C{'from M{m} import *'} |
| statement, and is unable to parse C{M{m}} (either because |
| L{IMPORT_HANDLING}=C{'link'}, or because parsing failed), how |
| should it determine the list of identifiers expored by C{M{m}}? |
| - C{'ignore'}: ignore the import statement, and don't create |
| any new variables. |
| - C{'parse'}: parse it to find a list of the identifiers that it |
| exports. (This will fall back to the 'ignore' behavior if the |
| imported file can't be parsed, e.g., if it's a builtin.) |
| - C{'introspect'}: import the module and introspect it (using C{dir}) |
| to find a list of the identifiers that it exports. (This will |
| fall back to the 'ignore' behavior if the imported file can't |
| be parsed, e.g., if it's a builtin.) |
| """ |
| |
| DEFAULT_DECORATOR_BEHAVIOR = 'transparent' |
| """When C{DocParse} encounters an unknown decorator, what should |
| it do to the documentation of the decorated function? |
| - C{'transparent'}: leave the function's documentation as-is. |
| - C{'opaque'}: replace the function's documentation with an |
| empty C{ValueDoc} object, reflecting the fact that we have no |
| knowledge about what value the decorator returns. |
| """ |
| |
| BASE_HANDLING = 'parse'#'link' |
| """What should C{docparser} do when it encounters a base class that |
| was imported from another module? |
| - C{'link'}: Create a valuedoc with a C{proxy_for} pointer to the |
| base class. |
| - C{'parse'}: Parse the file containing the base class, to find |
| the actual documentation for it. (This will fall back to the |
| 'link' behavior if the imported file can't be parsed, e.g., if |
| it's a builtin.) |
| """ |
| |
| #{ Configuration Constants: Comment docstrings |
| COMMENT_DOCSTRING_MARKER = '#:' |
| """The prefix used to mark comments that contain attribute |
| docstrings for variables.""" |
| |
| #{ Configuration Constants: Grouping |
| START_GROUP_MARKER = '#{' |
| """The prefix used to mark a comment that starts a group. This marker |
| should be followed (on the same line) by the name of the group. |
| Following a start-group comment, all variables defined at the same |
| indentation level will be assigned to this group name, until the |
| parser reaches the end of the file, a matching end-group comment, or |
| another start-group comment at the same indentation level. |
| """ |
| |
| END_GROUP_MARKER = '#}' |
| """The prefix used to mark a comment that ends a group. See |
| L{START_GROUP_MARKER}.""" |
| |
| #///////////////////////////////////////////////////////////////// |
| #{ Module parser |
| #///////////////////////////////////////////////////////////////// |
| |
| def parse_docs(filename=None, name=None, context=None, is_script=False): |
| """ |
| Generate the API documentation for a specified object by |
| parsing Python source files, and return it as a L{ValueDoc}. |
| The object to generate documentation for may be specified |
| using the C{filename} parameter I{or} the C{name} parameter. |
| (It is an error to specify both a filename and a name; or to |
| specify neither a filename nor a name). |
| |
| @param filename: The name of the file that contains the python |
| source code for a package, module, or script. If |
| C{filename} is specified, then C{parse} will return a |
| C{ModuleDoc} describing its contents. |
| @param name: The fully-qualified python dotted name of any |
| value (including packages, modules, classes, and |
| functions). C{parse_docs()} will automatically figure out |
| which module(s) it needs to parse in order to find the |
| documentation for the specified object. |
| @param context: The API documentation for the package that |
| contains C{filename}. If no context is given, then |
| C{filename} is assumed to contain a top-level module or |
| package. It is an error to specify a C{context} if the |
| C{name} argument is used. |
| @rtype: L{ValueDoc} |
| """ |
| # Always introspect __builtins__ & exceptions (e.g., in case |
| # they're used as base classes.) |
| epydoc.docintrospecter.introspect_docs(__builtin__) |
| epydoc.docintrospecter.introspect_docs(exceptions) |
| |
| # If our input is a python object name, then delegate to |
| # _find(). |
| if filename is None and name is not None: |
| if context: |
| raise ValueError("context should only be specified together " |
| "with filename, not with name.") |
| name = DottedName(name) |
| val_doc = _find(name) |
| if val_doc.canonical_name is UNKNOWN: |
| val_doc.canonical_name = name |
| return val_doc |
| |
| # If our input is a filename, then create a ModuleDoc for it, |
| # and use process_file() to populate its attributes. |
| elif filename is not None and name is None: |
| # Use a python source version, if possible. |
| if not is_script: |
| try: filename = py_src_filename(filename) |
| except ValueError, e: raise ImportError('%s' % e) |
| |
| # Check the cache, first. |
| if filename in _moduledoc_cache: |
| return _moduledoc_cache[filename] |
| |
| log.info("Parsing %s" % filename) |
| |
| # If the context wasn't provided, then check if the file is in |
| # a package directory. If so, then update basedir & name to |
| # contain the topmost package's directory and the fully |
| # qualified name for this file. (This update assume the |
| # default value of __path__ for the parent packages; if the |
| # parent packages override their __path__s, then this can |
| # cause us not to find the value.) |
| if context is None and not is_script: |
| basedir = os.path.split(filename)[0] |
| name = os.path.splitext(os.path.split(filename)[1])[0] |
| if name == '__init__': |
| basedir, name = os.path.split(basedir) |
| context = _parse_package(basedir) |
| |
| # Figure out the canonical name of the module we're parsing. |
| if not is_script: |
| module_name, is_pkg = _get_module_name(filename, context) |
| else: |
| module_name = DottedName(munge_script_name(filename)) |
| is_pkg = False |
| |
| # Create a new ModuleDoc for the module, & add it to the cache. |
| module_doc = ModuleDoc(canonical_name=module_name, variables={}, |
| sort_spec=[], imports=[], |
| filename=filename, package=context, |
| is_package=is_pkg, submodules=[], |
| docs_extracted_by='parser') |
| module_doc.defining_module = module_doc |
| _moduledoc_cache[filename] = module_doc |
| |
| # Set the module's __path__ to its default value. |
| if is_pkg: |
| module_doc.path = [os.path.split(module_doc.filename)[0]] |
| |
| # Add this module to the parent package's list of submodules. |
| if context is not None: |
| context.submodules.append(module_doc) |
| |
| # Tokenize & process the contents of the module's source file. |
| try: |
| process_file(module_doc) |
| except tokenize.TokenError, e: |
| msg, (srow, scol) = e.args |
| raise ParseError('Error during parsing: %s ' |
| '(%s, line %d, char %d)' % |
| (msg, module_doc.filename, srow, scol)) |
| except IndentationError, e: |
| raise ParseError('Error during parsing: %s (%s)' % |
| (e, module_doc.filename)) |
| |
| # Handle any special variables (__path__, __docformat__, etc.) |
| handle_special_module_vars(module_doc) |
| |
| # Return the completed ModuleDoc |
| return module_doc |
| else: |
| raise ValueError("Expected exactly one of the following " |
| "arguments: name, filename") |
| |
| def _parse_package(package_dir): |
| """ |
| If the given directory is a package directory, then parse its |
| __init__.py file (and the __init__.py files of all ancestor |
| packages); and return its C{ModuleDoc}. |
| """ |
| if not is_package_dir(package_dir): |
| return None |
| parent_dir = os.path.split(package_dir)[0] |
| parent_doc = _parse_package(parent_dir) |
| package_file = os.path.join(package_dir, '__init__') |
| return parse_docs(filename=package_file, context=parent_doc) |
| |
| # Special vars: |
| # C{__docformat__}, C{__all__}, and C{__path__}. |
| def handle_special_module_vars(module_doc): |
| # If __docformat__ is defined, parse its value. |
| toktree = _module_var_toktree(module_doc, '__docformat__') |
| if toktree is not None: |
| try: module_doc.docformat = parse_string(toktree) |
| except: pass |
| del module_doc.variables['__docformat__'] |
| |
| # If __all__ is defined, parse its value. |
| toktree = _module_var_toktree(module_doc, '__all__') |
| if toktree is not None: |
| try: |
| public_names = set(parse_string_list(toktree)) |
| for name, var_doc in module_doc.variables.items(): |
| if name in public_names: |
| var_doc.is_public = True |
| if not isinstance(var_doc, ModuleDoc): |
| var_doc.is_imported = False |
| else: |
| var_doc.is_public = False |
| except ParseError: |
| # If we couldn't parse the list, give precedence to introspection. |
| for name, var_doc in module_doc.variables.items(): |
| if not isinstance(var_doc, ModuleDoc): |
| var_doc.is_imported = UNKNOWN |
| del module_doc.variables['__all__'] |
| |
| # If __path__ is defined, then extract its value (pkgs only) |
| if module_doc.is_package: |
| toktree = _module_var_toktree(module_doc, '__path__') |
| if toktree is not None: |
| try: |
| module_doc.path = parse_string_list(toktree) |
| except ParseError: |
| pass # [xx] |
| del module_doc.variables['__path__'] |
| |
| def _module_var_toktree(module_doc, name): |
| var_doc = module_doc.variables.get(name) |
| if (var_doc is None or var_doc.value in (None, UNKNOWN) or |
| var_doc.value.toktree is UNKNOWN): |
| return None |
| else: |
| return var_doc.value.toktree |
| |
| #//////////////////////////////////////////////////////////// |
| #{ Module Lookup |
| #//////////////////////////////////////////////////////////// |
| |
| def _find(name, package_doc=None): |
| """ |
| Return the API documentaiton for the object whose name is |
| C{name}. C{package_doc}, if specified, is the API |
| documentation for the package containing the named object. |
| """ |
| # If we're inside a package, then find the package's path. |
| if package_doc is None: |
| path = None |
| elif package_doc.path is not UNKNOWN: |
| path = package_doc.path |
| else: |
| path = [os.path.split(package_doc.filename)[0]] |
| |
| # The leftmost identifier in `name` should be a module or |
| # package on the given path; find it and parse it. |
| filename = _get_filename(name[0], path) |
| module_doc = parse_docs(filename, context=package_doc) |
| |
| # If the name just has one identifier, then the module we just |
| # parsed is the object we're looking for; return it. |
| if len(name) == 1: return module_doc |
| |
| # Otherwise, we're looking for something inside the module. |
| # First, check to see if it's in a variable (but ignore |
| # variables that just contain imported submodules). |
| if not _is_submodule_import_var(module_doc, name[1]): |
| try: return _find_in_namespace(name[1:], module_doc) |
| except ImportError: pass |
| |
| # If not, then check to see if it's in a subpackage. |
| if module_doc.is_package: |
| return _find(name[1:], module_doc) |
| |
| # If it's not in a variable or a subpackage, then we can't |
| # find it. |
| raise ImportError('Could not find value') |
| |
| def _is_submodule_import_var(module_doc, var_name): |
| """ |
| Return true if C{var_name} is the name of a variable in |
| C{module_doc} that just contains an C{imported_from} link to a |
| submodule of the same name. (I.e., is a variable created when |
| a package imports one of its own submodules.) |
| """ |
| var_doc = module_doc.variables.get(var_name) |
| full_var_name = DottedName(module_doc.canonical_name, var_name) |
| return (var_doc is not None and |
| var_doc.imported_from == full_var_name) |
| |
| def _find_in_namespace(name, namespace_doc): |
| if name[0] not in namespace_doc.variables: |
| raise ImportError('Could not find value') |
| |
| # Look up the variable in the namespace. |
| var_doc = namespace_doc.variables[name[0]] |
| if var_doc.value is UNKNOWN: |
| raise ImportError('Could not find value') |
| val_doc = var_doc.value |
| |
| # If the variable's value was imported, then follow its |
| # alias link. |
| if var_doc.imported_from not in (None, UNKNOWN): |
| return _find(var_doc.imported_from+name[1:]) |
| |
| # Otherwise, if the name has one identifier, then this is the |
| # value we're looking for; return it. |
| elif len(name) == 1: |
| return val_doc |
| |
| # Otherwise, if this value is a namespace, look inside it. |
| elif isinstance(val_doc, NamespaceDoc): |
| return _find_in_namespace(name[1:], val_doc) |
| |
| # Otherwise, we ran into a dead end. |
| else: |
| raise ImportError('Could not find value') |
| |
| def _get_filename(identifier, path=None): |
| if path is UNKNOWN: path = None |
| try: |
| fp, filename, (s,m,typ) = imp.find_module(identifier, path) |
| if fp is not None: fp.close() |
| except ImportError: |
| raise ImportError, 'No Python source file found.' |
| |
| if typ == imp.PY_SOURCE: |
| return filename |
| elif typ == imp.PY_COMPILED: |
| # See if we can find a corresponding non-compiled version. |
| filename = re.sub('.py\w$', '.py', filename) |
| if not os.path.exists(filename): |
| raise ImportError, 'No Python source file found.' |
| return filename |
| elif typ == imp.PKG_DIRECTORY: |
| filename = os.path.join(filename, '__init__.py') |
| if not os.path.exists(filename): |
| filename = os.path.join(filename, '__init__.pyw') |
| if not os.path.exists(filename): |
| raise ImportError, 'No package file found.' |
| return filename |
| elif typ == imp.C_BUILTIN: |
| raise ImportError, 'No Python source file for builtin modules.' |
| elif typ == imp.C_EXTENSION: |
| raise ImportError, 'No Python source file for c extensions.' |
| else: |
| raise ImportError, 'No Python source file found.' |
| |
| #///////////////////////////////////////////////////////////////// |
| #{ File tokenization loop |
| #///////////////////////////////////////////////////////////////// |
| |
| def process_file(module_doc): |
| """ |
| Read the given C{ModuleDoc}'s file, and add variables |
| corresponding to any objects defined in that file. In |
| particular, read and tokenize C{module_doc.filename}, and |
| process each logical line using L{process_line()}. |
| """ |
| # Keep track of the current line number: |
| lineno = None |
| |
| # Use this list to collect the tokens on a single logical line: |
| line_toks = [] |
| |
| # This list contains one APIDoc for each indentation level. |
| # The first element is the APIDoc for the module, and each |
| # subsequent element is the APIDoc for the object at that |
| # indentation level. The final element of the list is the |
| # C{APIDoc} for the entity that we're currently processing. |
| parent_docs = [module_doc] |
| |
| # The APIDoc for the object that was defined by the previous |
| # line, if any; or None otherwise. This is used to update |
| # parent_docs when we encounter an indent; and to decide what |
| # object (if any) is described by a docstring. |
| prev_line_doc = module_doc |
| |
| # A list of comments that occur before or on the current |
| # logical line, used to build the comment docstring. Each |
| # element is a tuple (comment_text, comment_lineno). |
| comments = [] |
| |
| # A list of decorator lines that occur before the current |
| # logical line. This is used so we can process a function |
| # declaration line and its decorators all at once. |
| decorators = [] |
| |
| # A list of group names, one for each indentation level. This is |
| # used to keep track groups that are defined by comment markers |
| # START_GROUP_MARKER and END_GROUP_MARKER. |
| groups = [None] |
| |
| # When we encounter a comment start group marker, set this to the |
| # name of the group; but wait until we're ready to process the |
| # next line before we actually set groups[-1] to this value. This |
| # is necessary because at the top of a block, the tokenizer gives |
| # us comments before the INDENT token; but if we encounter a group |
| # start marker at the top of a block, then we want it to apply |
| # inside that block, not outside it. |
| start_group = None |
| |
| # Check if the source file declares an encoding. |
| encoding = get_module_encoding(module_doc.filename) |
| |
| # The token-eating loop: |
| try: |
| module_file = codecs.open(module_doc.filename, 'rU', encoding) |
| except LookupError: |
| log.warning("Unknown encoding %r for %s; using the default" |
| "encoding instead (iso-8859-1)" % |
| (encoding, module_doc.filename)) |
| encoding = 'iso-8859-1' |
| module_file = codecs.open(module_doc.filename, 'rU', encoding) |
| tok_iter = tokenize.generate_tokens(module_file.readline) |
| for toktype, toktext, (srow,scol), (erow,ecol), line_str in tok_iter: |
| # BOM encoding marker: ignore. |
| if (toktype == token.ERRORTOKEN and |
| (toktext == u'\ufeff' or |
| toktext.encode(encoding) == '\xef\xbb\xbf')): |
| pass |
| |
| # Error token: abort |
| elif toktype == token.ERRORTOKEN: |
| raise ParseError('Error during parsing: invalid syntax ' |
| '(%s, line %d, char %d: %r)' % |
| (module_doc.filename, srow, scol, toktext)) |
| |
| # Indent token: update the parent_doc stack. |
| elif toktype == token.INDENT: |
| if prev_line_doc is None: |
| parent_docs.append(parent_docs[-1]) |
| else: |
| parent_docs.append(prev_line_doc) |
| groups.append(None) |
| |
| # Dedent token: update the parent_doc stack. |
| elif toktype == token.DEDENT: |
| if line_toks == []: |
| parent_docs.pop() |
| groups.pop() |
| else: |
| # This *should* only happen if the file ends on an |
| # indented line, with no final newline. |
| # (otherwise, this is the wrong thing to do.) |
| pass |
| |
| # Line-internal newline token: if we're still at the start of |
| # the logical line, and we've seen one or more comment lines, |
| # then discard them: blank lines are not allowed between a |
| # comment block and the thing it describes. |
| elif toktype == tokenize.NL: |
| if comments and not line_toks: |
| log.warning('Ignoring docstring comment block followed by ' |
| 'a blank line in %r on line %r' % |
| (module_doc.filename, srow-1)) |
| comments = [] |
| |
| # Comment token: add to comments if appropriate. |
| elif toktype == tokenize.COMMENT: |
| if toktext.startswith(COMMENT_DOCSTRING_MARKER): |
| comment_line = toktext[len(COMMENT_DOCSTRING_MARKER):].rstrip() |
| if comment_line.startswith(" "): |
| comment_line = comment_line[1:] |
| comments.append( [comment_line, srow]) |
| elif toktext.startswith(START_GROUP_MARKER): |
| start_group = toktext[len(START_GROUP_MARKER):].strip() |
| elif toktext.startswith(END_GROUP_MARKER): |
| for i in range(len(groups)-1, -1, -1): |
| if groups[i]: |
| groups[i] = None |
| break |
| else: |
| log.warning("Got group end marker without a corresponding " |
| "start marker in %r on line %r" % |
| (module_doc.filename, srow)) |
| |
| # Normal token: Add it to line_toks. (If it's a non-unicode |
| # string literal, then we need to re-encode using the file's |
| # encoding, to get back to the original 8-bit data; and then |
| # convert that string with 8-bit data to a 7-bit ascii |
| # representation.) |
| elif toktype != token.NEWLINE and toktype != token.ENDMARKER: |
| if lineno is None: lineno = srow |
| if toktype == token.STRING: |
| str_prefixes = re.match('[^\'"]*', toktext).group() |
| if 'u' not in str_prefixes: |
| s = toktext.encode(encoding) |
| toktext = decode_with_backslashreplace(s) |
| line_toks.append( (toktype, toktext) ) |
| |
| # Decorator line: add it to the decorators list. |
| elif line_toks and line_toks[0] == (token.OP, '@'): |
| decorators.append(shallow_parse(line_toks)) |
| line_toks = [] |
| |
| # End of line token, but nothing to do. |
| elif line_toks == []: |
| pass |
| |
| # End of line token: parse the logical line & process it. |
| else: |
| if start_group: |
| groups[-1] = start_group |
| start_group = None |
| |
| if parent_docs[-1] != 'skip_block': |
| try: |
| prev_line_doc = process_line( |
| shallow_parse(line_toks), parent_docs, prev_line_doc, |
| lineno, comments, decorators, encoding) |
| except ParseError, e: |
| raise ParseError('Error during parsing: invalid ' |
| 'syntax (%s, line %d) -- %s' % |
| (module_doc.filename, lineno, e)) |
| except KeyboardInterrupt, e: raise |
| except Exception, e: |
| log.error('Internal error during parsing (%s, line ' |
| '%s):\n%s' % (module_doc.filename, lineno, e)) |
| raise |
| |
| # grouping... |
| if groups[-1] and prev_line_doc not in (None, 'skip_block'): |
| if isinstance(prev_line_doc, VariableDoc): |
| # prev_line_doc's container will only be |
| # UNKNOWN if it's an instance variable that |
| # didn't have a doc-comment, but might still |
| # be followed by a docstring. Since we |
| # tokenize in order, we can't do lookahead to |
| # see if the variable will have a comment; but |
| # it should only be added to the container if |
| # it does. So we defer the grouping of that |
| # to be handled by process_docstring instead. |
| if prev_line_doc.container is not UNKNOWN: |
| add_to_group(prev_line_doc.container, |
| prev_line_doc, groups[-1]) |
| elif isinstance(parent_docs[-1], NamespaceDoc): |
| add_to_group(parent_docs[-1], prev_line_doc, |
| groups[-1]) |
| else: |
| prev_line_doc = None |
| |
| # Reset line contents. |
| line_toks = [] |
| lineno = None |
| comments = [] |
| decorators = [] |
| |
| def add_to_group(container, api_doc, group_name): |
| if container.group_specs is UNKNOWN: |
| container.group_specs = [] |
| |
| if isinstance(api_doc, VariableDoc): |
| var_name = api_doc.name |
| else: |
| if api_doc.canonical_name is UNKNOWN: log.debug('ouch', `api_doc`) |
| var_name = api_doc.canonical_name[-1] |
| |
| for (name, group_vars) in container.group_specs: |
| if name == group_name: |
| group_vars.append(var_name) |
| return |
| else: |
| container.group_specs.append( (group_name, [var_name]) ) |
| |
| def script_guard(line): |
| """Detect the idiomatic trick C{if __name__ == "__main__":}""" |
| return (len(line) == 5 |
| and line[1][1] == '__name__' # this is the most selective |
| and line[0][1] == 'if' |
| and line[2][1] == '==' |
| and line[4][1] == ':' |
| and line[3][1][1:-1] == '__main__') |
| |
| #///////////////////////////////////////////////////////////////// |
| #{ Shallow parser |
| #///////////////////////////////////////////////////////////////// |
| |
| def shallow_parse(line_toks): |
| """ |
| Given a flat list of tokens, return a nested tree structure |
| (called a X{token tree}), whose leaves are identical to the |
| original list, but whose structure reflects the structure |
| implied by the grouping tokens (i.e., parenthases, braces, and |
| brackets). If the parenthases, braces, and brackets do not |
| match, or are not balanced, then raise a ParseError. |
| |
| Assign some structure to a sequence of structure (group parens). |
| """ |
| stack = [[]] |
| parens = [] |
| for tok in line_toks: |
| toktype, toktext = tok |
| if toktext in ('(','[','{'): |
| parens.append(tok) |
| stack.append([tok]) |
| elif toktext in ('}',']',')'): |
| if not parens: |
| raise ParseError('Unbalanced parens') |
| left_paren = parens.pop()[1] |
| if left_paren+toktext not in ('()', '[]', '{}'): |
| raise ParseError('Mismatched parens') |
| lst = stack.pop() |
| lst.append(tok) |
| stack[-1].append(lst) |
| else: |
| stack[-1].append(tok) |
| if len(stack) != 1 or len(parens) != 0: |
| raise ParseError('Unbalanced parens') |
| return stack[0] |
| |
| #///////////////////////////////////////////////////////////////// |
| #{ Line processing |
| #///////////////////////////////////////////////////////////////// |
| # The methods process_*() are used to handle lines. |
| |
| def process_line(line, parent_docs, prev_line_doc, lineno, |
| comments, decorators, encoding): |
| """ |
| @return: C{new-doc}, C{decorator}..? |
| """ |
| args = (line, parent_docs, prev_line_doc, lineno, |
| comments, decorators, encoding) |
| |
| if not line: # blank line. |
| return None |
| elif (token.OP, ':') in line[:-1]: |
| return process_one_line_block(*args) |
| elif (token.OP, ';') in line: |
| return process_multi_stmt(*args) |
| elif line[0] == (token.NAME, 'def'): |
| return process_funcdef(*args) |
| elif line[0] == (token.OP, '@'): |
| return process_funcdef(*args) |
| elif line[0] == (token.NAME, 'class'): |
| return process_classdef(*args) |
| elif line[0] == (token.NAME, 'import'): |
| return process_import(*args) |
| elif line[0] == (token.NAME, 'from'): |
| return process_from_import(*args) |
| elif line[0] == (token.NAME, 'del'): |
| return process_del(*args) |
| elif len(line)==1 and line[0][0] == token.STRING: |
| return process_docstring(*args) |
| elif (token.OP, '=') in line: |
| return process_assignment(*args) |
| elif (line[0][0] == token.NAME and |
| line[0][1] in CONTROL_FLOW_KEYWORDS): |
| return process_control_flow_line(*args) |
| else: |
| return None |
| # [xx] do something with control structures like for/if? |
| |
| #///////////////////////////////////////////////////////////////// |
| # Line handler: control flow |
| #///////////////////////////////////////////////////////////////// |
| |
| CONTROL_FLOW_KEYWORDS = [ |
| #: A list of the control flow keywords. If a line begins with |
| #: one of these keywords, then it should be handled by |
| #: C{process_control_flow_line}. |
| 'if', 'elif', 'else', 'while', 'for', 'try', 'except', 'finally'] |
| |
| def process_control_flow_line(line, parent_docs, prev_line_doc, |
| lineno, comments, decorators, encoding): |
| keyword = line[0][1] |
| |
| # If it's a 'for' block: create the loop variable. |
| if keyword == 'for' and PARSE_FOR_BLOCKS: |
| loopvar_name = parse_dotted_name( |
| split_on(line[1:], (token.NAME, 'in'))[0]) |
| parent = get_lhs_parent(loopvar_name, parent_docs) |
| if parent is not None: |
| var_doc = VariableDoc(name=loopvar_name[-1], is_alias=False, |
| is_imported=False, is_instvar=False, |
| docs_extracted_by='parser') |
| set_variable(parent, var_doc) |
| |
| if ((keyword == 'if' and PARSE_IF_BLOCKS and not script_guard(line)) or |
| (keyword == 'elif' and PARSE_ELSE_BLOCKS) or |
| (keyword == 'else' and PARSE_ELSE_BLOCKS) or |
| (keyword == 'while' and PARSE_WHILE_BLOCKS) or |
| (keyword == 'for' and PARSE_FOR_BLOCKS) or |
| (keyword == 'try' and PARSE_TRY_BLOCKS) or |
| (keyword == 'except' and PARSE_EXCEPT_BLOCKS) or |
| (keyword == 'finally' and PARSE_FINALLY_BLOCKS)): |
| # Return "None" to indicate that we should process the |
| # block using the same context that we were already in. |
| return None |
| else: |
| # Return 'skip_block' to indicate that we should ignore |
| # the contents of this block. |
| return 'skip_block' |
| |
| #///////////////////////////////////////////////////////////////// |
| # Line handler: imports |
| #///////////////////////////////////////////////////////////////// |
| # [xx] I could optionally add ValueDoc's for the imported |
| # variables with proxy_for set to the imported source; but |
| # I don't think I gain much of anything by doing so. |
| |
| def process_import(line, parent_docs, prev_line_doc, lineno, |
| comments, decorators, encoding): |
| if not isinstance(parent_docs[-1], NamespaceDoc): return |
| |
| names = split_on(line[1:], (token.OP, ',')) |
| |
| for name in names: |
| name_pieces = split_on(name, (token.NAME, 'as')) |
| if len(name_pieces) == 1: |
| src_name = parse_dotted_name(name_pieces[0]) |
| _import_var(src_name, parent_docs) |
| elif len(name_pieces) == 2: |
| if len(name_pieces[1]) != 1: |
| raise ParseError('Expected identifier after "as"') |
| src_name = parse_dotted_name(name_pieces[0]) |
| var_name = parse_name(name_pieces[1][0]) |
| _import_var_as(src_name, var_name, parent_docs) |
| else: |
| raise ParseError('Multiple "as" tokens in import') |
| |
| def process_from_import(line, parent_docs, prev_line_doc, lineno, |
| comments, decorators, encoding): |
| if not isinstance(parent_docs[-1], NamespaceDoc): return |
| |
| pieces = split_on(line[1:], (token.NAME, 'import')) |
| if len(pieces) != 2 or not pieces[0] or not pieces[1]: |
| raise ParseError("Bad from-import") |
| lhs, rhs = pieces |
| |
| # The RHS might be parenthasized, as specified by PEP 328: |
| # http://www.python.org/peps/pep-0328.html |
| if (len(rhs) == 1 and isinstance(rhs[0], list) and |
| rhs[0][0] == (token.OP, '(') and rhs[0][-1] == (token.OP, ')')): |
| rhs = rhs[0][1:-1] |
| |
| # >>> from __future__ import nested_scopes |
| if lhs == [(token.NAME, '__future__')]: |
| return |
| |
| # >>> from sys import * |
| elif rhs == [(token.OP, '*')]: |
| src_name = parse_dotted_name(lhs) |
| _process_fromstar_import(src_name, parent_docs) |
| |
| # >>> from os.path import join, split |
| else: |
| # Allow relative imports in this case, as per PEP 328 |
| src_name = parse_dotted_name(lhs, |
| parent_name=parent_docs[-1].canonical_name) |
| parts = split_on(rhs, (token.OP, ',')) |
| for part in parts: |
| # from m import x |
| if len(part) == 1: |
| var_name = parse_name(part[0]) |
| _import_var_as(DottedName(src_name, var_name), |
| var_name, parent_docs) |
| |
| # from m import x as y |
| elif len(part) == 3 and part[1] == (token.NAME, 'as'): |
| orig_name = parse_name(part[0]) |
| var_name = parse_name(part[2]) |
| _import_var_as(DottedName(src_name, orig_name), |
| var_name, parent_docs) |
| |
| else: |
| ParseError("Bad from-import") |
| |
| def _process_fromstar_import(src, parent_docs): |
| """ |
| Handle a statement of the form: |
| >>> from <src> import * |
| |
| If L{IMPORT_HANDLING} is C{'parse'}, then first try to parse |
| the module C{M{<src>}}, and copy all of its exported variables |
| to C{parent_docs[-1]}. |
| |
| Otherwise, try to determine the names of the variables exported by |
| C{M{<src>}}, and create a new variable for each export. If |
| L{IMPORT_STAR_HANDLING} is C{'parse'}, then the list of exports if |
| found by parsing C{M{<src>}}; if it is C{'introspect'}, then the |
| list of exports is found by importing and introspecting |
| C{M{<src>}}. |
| """ |
| # This is redundant: already checked by caller. |
| if not isinstance(parent_docs[-1], NamespaceDoc): return |
| |
| # If src is package-local, then convert it to a global name. |
| src = _global_name(src, parent_docs) |
| |
| # Record the import |
| parent_docs[0].imports.append(src) # mark that it's .*?? |
| |
| # [xx] add check for if we already have the source docs in our |
| # cache?? |
| |
| if (IMPORT_HANDLING == 'parse' or |
| IMPORT_STAR_HANDLING == 'parse'): # [xx] is this ok? |
| try: module_doc = _find(src) |
| except ImportError: module_doc = None |
| if isinstance(module_doc, ModuleDoc): |
| for name, imp_var in module_doc.variables.items(): |
| # [xx] this is not exactly correct, but close. It |
| # does the wrong thing if a __var__ is explicitly |
| # listed in __all__. |
| if (imp_var.is_public and |
| not (name.startswith('__') and name.endswith('__'))): |
| var_doc = _add_import_var(DottedName(src, name), name, |
| parent_docs[-1]) |
| if IMPORT_HANDLING == 'parse': |
| var_doc.value = imp_var.value |
| |
| # If we got here, then either IMPORT_HANDLING='link' or we |
| # failed to parse the `src` module. |
| if IMPORT_STAR_HANDLING == 'introspect': |
| try: module = __import__(str(src), {}, {}, [0]) |
| except: return # We couldn't import it. |
| if module is None: return # We couldn't import it. |
| if hasattr(module, '__all__'): |
| names = list(module.__all__) |
| else: |
| names = [n for n in dir(module) if not n.startswith('_')] |
| for name in names: |
| _add_import_var(DottedName(src, name), name, parent_docs[-1]) |
| |
| def _import_var(name, parent_docs): |
| """ |
| Handle a statement of the form: |
| >>> import <name> |
| |
| If L{IMPORT_HANDLING} is C{'parse'}, then first try to find |
| the value by parsing; and create an appropriate variable in |
| parentdoc. |
| |
| Otherwise, add a variable for the imported variable. (More than |
| one variable may be created for cases like C{'import a.b'}, where |
| we need to create a variable C{'a'} in parentdoc containing a |
| proxy module; and a variable C{'b'} in the proxy module. |
| """ |
| # This is redundant: already checked by caller. |
| if not isinstance(parent_docs[-1], NamespaceDoc): return |
| |
| # If name is package-local, then convert it to a global name. |
| src = _global_name(name, parent_docs) |
| src_prefix = src[:len(src)-len(name)] |
| |
| # Record the import |
| parent_docs[0].imports.append(name) |
| |
| # [xx] add check for if we already have the source docs in our |
| # cache?? |
| |
| if IMPORT_HANDLING == 'parse': |
| # Check to make sure that we can actually find the value. |
| try: val_doc = _find(src) |
| except ImportError: val_doc = None |
| if val_doc is not None: |
| # We found it; but it's not the value itself we want to |
| # import, but the module containing it; so import that |
| # module (=top_mod) and create a variable for it. |
| top_mod = src_prefix+name[0] |
| var_doc = _add_import_var(top_mod, name[0], parent_docs[-1]) |
| var_doc.value = _find(DottedName(name[0])) |
| return |
| |
| # If we got here, then either IMPORT_HANDLING='link', or we |
| # did not successfully find the value's docs by parsing; use |
| # a variable with an UNKNOWN value. |
| |
| # Create any necessary intermediate proxy module values. |
| container = parent_docs[-1] |
| for i, identifier in enumerate(name[:-1]): |
| if (identifier not in container.variables or |
| not isinstance(container.variables[identifier], ModuleDoc)): |
| var_doc = _add_import_var(name[:i+1], identifier, container) |
| var_doc.value = ModuleDoc(variables={}, sort_spec=[], |
| proxy_for=src_prefix+name[:i+1], |
| submodules={}, |
| docs_extracted_by='parser') |
| container = container.variables[identifier].value |
| |
| # Add the variable to the container. |
| _add_import_var(src, name[-1], container) |
| |
| def _import_var_as(src, name, parent_docs): |
| """ |
| Handle a statement of the form: |
| >>> import src as name |
| |
| If L{IMPORT_HANDLING} is C{'parse'}, then first try to find |
| the value by parsing; and create an appropriate variable in |
| parentdoc. |
| |
| Otherwise, create a variables with its C{imported_from} attribute |
| pointing to the imported object. |
| """ |
| # This is redundant: already checked by caller. |
| if not isinstance(parent_docs[-1], NamespaceDoc): return |
| |
| # If src is package-local, then convert it to a global name. |
| src = _global_name(src, parent_docs) |
| |
| # Record the import |
| parent_docs[0].imports.append(src) |
| |
| if IMPORT_HANDLING == 'parse': |
| # Parse the value and create a variable for it. |
| try: val_doc = _find(src) |
| except ImportError: val_doc = None |
| if val_doc is not None: |
| var_doc = VariableDoc(name=name, value=val_doc, |
| is_imported=True, is_alias=False, |
| imported_from=src, |
| docs_extracted_by='parser') |
| set_variable(parent_docs[-1], var_doc) |
| return |
| |
| # If we got here, then either IMPORT_HANDLING='link', or we |
| # did not successfully find the value's docs by parsing; use a |
| # variable with a proxy value. |
| _add_import_var(src, name, parent_docs[-1]) |
| |
| def _add_import_var(src, name, container): |
| """ |
| Add a new imported variable named C{name} to C{container}, with |
| C{imported_from=src}. |
| """ |
| var_doc = VariableDoc(name=name, is_imported=True, is_alias=False, |
| imported_from=src, docs_extracted_by='parser') |
| set_variable(container, var_doc) |
| return var_doc |
| |
| def _global_name(name, parent_docs): |
| """ |
| If the given name is package-local (relative to the current |
| context, as determined by C{parent_docs}), then convert it |
| to a global name. |
| """ |
| # Get the containing package from parent_docs. |
| if parent_docs[0].is_package: |
| package = parent_docs[0] |
| else: |
| package = parent_docs[0].package |
| |
| # Check each package (from closest to furthest) to see if it |
| # contains a module named name[0]; if so, then treat `name` as |
| # relative to that package. |
| while package not in (None, UNKNOWN): |
| try: |
| fp = imp.find_module(name[0], package.path)[0] |
| if fp is not None: fp.close() |
| except ImportError: |
| # No submodule found here; try the next package up. |
| package = package.package |
| continue |
| # A submodule was found; return its name. |
| return package.canonical_name + name |
| |
| # We didn't find any package containing `name`; so just return |
| # `name` as-is. |
| return name |
| |
| #///////////////////////////////////////////////////////////////// |
| # Line handler: assignment |
| #///////////////////////////////////////////////////////////////// |
| |
| def process_assignment(line, parent_docs, prev_line_doc, lineno, |
| comments, decorators, encoding): |
| # Divide the assignment statement into its pieces. |
| pieces = split_on(line, (token.OP, '=')) |
| |
| lhs_pieces = pieces[:-1] |
| rhs = pieces[-1] |
| |
| # Decide whether the variable is an instance variable or not. |
| # If it's an instance var, then discard the value. |
| is_instvar = lhs_is_instvar(lhs_pieces, parent_docs) |
| |
| # if it's not an instance var, and we're not in a namespace, |
| # then it's just a local var -- so ignore it. |
| if not (is_instvar or isinstance(parent_docs[-1], NamespaceDoc)): |
| return None |
| |
| # Evaluate the right hand side. |
| if not is_instvar: |
| rhs_val, is_alias = rhs_to_valuedoc(rhs, parent_docs) |
| else: |
| rhs_val, is_alias = UNKNOWN, False |
| |
| # Assign the right hand side value to each left hand side. |
| # (Do the rightmost assignment first) |
| lhs_pieces.reverse() |
| for lhs in lhs_pieces: |
| # Try treating the LHS as a simple dotted name. |
| try: lhs_name = parse_dotted_name(lhs) |
| except: lhs_name = None |
| if lhs_name is not None: |
| lhs_parent = get_lhs_parent(lhs_name, parent_docs) |
| if lhs_parent is None: continue |
| |
| # Skip a special class variable. |
| if lhs_name[-1] == '__slots__': |
| continue |
| |
| # Create the VariableDoc. |
| var_doc = VariableDoc(name=lhs_name[-1], value=rhs_val, |
| is_imported=False, is_alias=is_alias, |
| is_instvar=is_instvar, |
| docs_extracted_by='parser') |
| # Extract a docstring from the comments, when present, |
| # but only if there's a single LHS. |
| if len(lhs_pieces) == 1: |
| add_docstring_from_comments(var_doc, comments) |
| |
| # Assign the variable to the containing namespace, |
| # *unless* the variable is an instance variable |
| # without a comment docstring. In that case, we'll |
| # only want to add it if we later discover that it's |
| # followed by a variable docstring. If it is, then |
| # process_docstring will take care of adding it to the |
| # containing clas. (This is a little hackish, but |
| # unfortunately is necessary because we won't know if |
| # this assignment line is followed by a docstring |
| # until later.) |
| if (not is_instvar) or comments: |
| set_variable(lhs_parent, var_doc, True) |
| |
| # If it's the only var, then return the VarDoc for use |
| # as the new `prev_line_doc`. |
| if (len(lhs_pieces) == 1 and |
| (len(lhs_name) == 1 or is_instvar)): |
| return var_doc |
| |
| # Otherwise, the LHS must be a complex expression; use |
| # dotted_names_in() to decide what variables it contains, |
| # and create VariableDoc's for all of them (with UNKNOWN |
| # value). |
| else: |
| for lhs_name in dotted_names_in(lhs_pieces): |
| lhs_parent = get_lhs_parent(lhs_name, parent_docs) |
| if lhs_parent is None: continue |
| var_doc = VariableDoc(name=lhs_name[-1], |
| is_imported=False, |
| is_alias=is_alias, |
| is_instvar=is_instvar, |
| docs_extracted_by='parser') |
| set_variable(lhs_parent, var_doc, True) |
| |
| # If we have multiple left-hand-sides, then all but the |
| # rightmost one are considered aliases. |
| is_alias = True |
| |
| |
| def lhs_is_instvar(lhs_pieces, parent_docs): |
| if not isinstance(parent_docs[-1], RoutineDoc): |
| return False |
| # make sure that lhs_pieces is <self>.<name>, where <self> is |
| # the name of the first arg to the containing routinedoc, and |
| # <name> is a simple name. |
| posargs = parent_docs[-1].posargs |
| if posargs is UNKNOWN: return False |
| if not (len(lhs_pieces)==1 and len(posargs) > 0 and |
| len(lhs_pieces[0]) == 3 and |
| lhs_pieces[0][0] == (token.NAME, posargs[0]) and |
| lhs_pieces[0][1] == (token.OP, '.') and |
| lhs_pieces[0][2][0] == token.NAME): |
| return False |
| # Make sure we're in an instance method, and not a |
| # module-level function. |
| for i in range(len(parent_docs)-1, -1, -1): |
| if isinstance(parent_docs[i], ClassDoc): |
| return True |
| elif parent_docs[i] != parent_docs[-1]: |
| return False |
| return False |
| |
| def rhs_to_valuedoc(rhs, parent_docs): |
| # Dotted variable: |
| try: |
| rhs_name = parse_dotted_name(rhs) |
| rhs_val = lookup_value(rhs_name, parent_docs) |
| if rhs_val is not None and rhs_val is not UNKNOWN: |
| return rhs_val, True |
| except ParseError: |
| pass |
| |
| # Decorators: |
| if (len(rhs)==2 and rhs[0][0] == token.NAME and |
| isinstance(rhs[1], list)): |
| arg_val, _ = rhs_to_valuedoc(rhs[1][1:-1], parent_docs) |
| if isinstance(arg_val, RoutineDoc): |
| doc = apply_decorator(DottedName(rhs[0][1]), arg_val) |
| doc.canonical_name = UNKNOWN |
| doc.parse_repr = pp_toktree(rhs) |
| return doc, False |
| |
| # Nothing else to do: make a val with the source as its repr. |
| return GenericValueDoc(parse_repr=pp_toktree(rhs), toktree=rhs, |
| defining_module=parent_docs[0], |
| docs_extracted_by='parser'), False |
| |
| def get_lhs_parent(lhs_name, parent_docs): |
| assert isinstance(lhs_name, DottedName) |
| |
| # For instance vars inside an __init__ method: |
| if isinstance(parent_docs[-1], RoutineDoc): |
| for i in range(len(parent_docs)-1, -1, -1): |
| if isinstance(parent_docs[i], ClassDoc): |
| return parent_docs[i] |
| else: |
| raise ValueError("%r is not a namespace or method" % |
| parent_docs[-1]) |
| |
| # For local variables: |
| if len(lhs_name) == 1: |
| return parent_docs[-1] |
| |
| # For non-local variables: |
| return lookup_value(lhs_name.container(), parent_docs) |
| |
| #///////////////////////////////////////////////////////////////// |
| # Line handler: single-line blocks |
| #///////////////////////////////////////////////////////////////// |
| |
| def process_one_line_block(line, parent_docs, prev_line_doc, lineno, |
| comments, decorators, encoding): |
| """ |
| The line handler for single-line blocks, such as: |
| |
| >>> def f(x): return x*2 |
| |
| This handler calls L{process_line} twice: once for the tokens |
| up to and including the colon, and once for the remaining |
| tokens. The comment docstring is applied to the first line |
| only. |
| @return: C{None} |
| """ |
| i = line.index((token.OP, ':')) |
| doc1 = process_line(line[:i+1], parent_docs, prev_line_doc, |
| lineno, comments, decorators, encoding) |
| doc2 = process_line(line[i+1:], parent_docs+[doc1], |
| doc1, lineno, None, [], encoding) |
| return doc1 |
| |
| #///////////////////////////////////////////////////////////////// |
| # Line handler: semicolon-separated statements |
| #///////////////////////////////////////////////////////////////// |
| |
| def process_multi_stmt(line, parent_docs, prev_line_doc, lineno, |
| comments, decorators, encoding): |
| """ |
| The line handler for semicolon-separated statements, such as: |
| |
| >>> x=1; y=2; z=3 |
| |
| This handler calls L{process_line} once for each statement. |
| The comment docstring is not passed on to any of the |
| sub-statements. |
| @return: C{None} |
| """ |
| for statement in split_on(line, (token.OP, ';')): |
| if not statement: continue |
| doc = process_line(statement, parent_docs, prev_line_doc, |
| lineno, None, decorators, encoding) |
| prev_line_doc = doc |
| decorators = [] |
| return None |
| |
| #///////////////////////////////////////////////////////////////// |
| # Line handler: delete statements |
| #///////////////////////////////////////////////////////////////// |
| |
| def process_del(line, parent_docs, prev_line_doc, lineno, |
| comments, decorators, encoding): |
| """ |
| The line handler for delete statements, such as: |
| |
| >>> del x, y.z |
| |
| This handler calls L{del_variable} for each dotted variable in |
| the variable list. The variable list may be nested. Complex |
| expressions in the variable list (such as C{x[3]}) are ignored. |
| @return: C{None} |
| """ |
| # If we're not in a namespace, then ignore it. |
| parent_doc = parent_docs[-1] |
| if not isinstance(parent_doc, NamespaceDoc): return |
| |
| var_list = split_on(line[1:], (token.OP, ',')) |
| for var_name in dotted_names_in(var_list): |
| del_variable(parent_docs[-1], var_name) |
| |
| return None |
| |
| #///////////////////////////////////////////////////////////////// |
| # Line handler: docstrings |
| #///////////////////////////////////////////////////////////////// |
| |
| def process_docstring(line, parent_docs, prev_line_doc, lineno, |
| comments, decorators, encoding): |
| """ |
| The line handler for bare string literals. If |
| C{prev_line_doc} is not C{None}, then the string literal is |
| added to that C{APIDoc} as a docstring. If it already has a |
| docstring (from comment docstrings), then the new docstring |
| will be appended to the old one. |
| """ |
| if prev_line_doc is None: return |
| docstring = parse_string(line) |
| |
| # If the docstring is a str, then convert it to unicode. |
| # According to a strict reading of PEP 263, this might not be the |
| # right thing to do; but it will almost always be what the |
| # module's author intended. |
| if isinstance(docstring, str): |
| try: |
| docstring = docstring.decode(encoding) |
| except UnicodeDecodeError: |
| # If decoding failed, then fall back on using |
| # decode_with_backslashreplace, which will map e.g. |
| # "\xe9" -> u"\\xe9". |
| docstring = decode_with_backslashreplace(docstring) |
| log.warning("While parsing %s: docstring is not a unicode " |
| "string, but it contains non-ascii data." % |
| prev_line_doc.canonical_name) |
| |
| # If the modified APIDoc is an instance variable, and it has |
| # not yet been added to its class's C{variables} list, |
| # then add it now. This is done here, rather than in the |
| # process_assignment() call that created the variable, because |
| # we only want to add instance variables if they have an |
| # associated docstring. (For more info, see the comment above |
| # the set_variable() call in process_assignment().) |
| added_instvar = False |
| if (isinstance(prev_line_doc, VariableDoc) and |
| prev_line_doc.is_instvar and |
| prev_line_doc.docstring in (None, UNKNOWN)): |
| for i in range(len(parent_docs)-1, -1, -1): |
| if isinstance(parent_docs[i], ClassDoc): |
| set_variable(parent_docs[i], prev_line_doc, True) |
| added_instvar = True |
| break |
| |
| if prev_line_doc.docstring not in (None, UNKNOWN): |
| log.warning("%s has both a comment-docstring and a normal " |
| "(string) docstring; ignoring the comment-" |
| "docstring." % prev_line_doc.canonical_name) |
| |
| prev_line_doc.docstring = docstring |
| prev_line_doc.docstring_lineno = lineno |
| |
| # If the modified APIDoc is an instance variable, and we added it |
| # to the class's variables list here, then it still needs to be |
| # grouped too; so return it for use as the new "prev_line_doc." |
| if added_instvar: |
| return prev_line_doc |
| |
| |
| #///////////////////////////////////////////////////////////////// |
| # Line handler: function declarations |
| #///////////////////////////////////////////////////////////////// |
| |
| def process_funcdef(line, parent_docs, prev_line_doc, lineno, |
| comments, decorators, encoding): |
| """ |
| The line handler for function declaration lines, such as: |
| |
| >>> def f(a, b=22, (c,d)): |
| |
| This handler creates and initializes a new C{VariableDoc} |
| containing a C{RoutineDoc}, adds the C{VariableDoc} to the |
| containing namespace, and returns the C{RoutineDoc}. |
| """ |
| # Check syntax. |
| if len(line) != 4 or line[3] != (token.OP, ':'): |
| raise ParseError("Bad function definition line") |
| |
| # If we're not in a namespace, then ignore it. |
| parent_doc = parent_docs[-1] |
| if not isinstance(parent_doc, NamespaceDoc): return |
| |
| # Get the function's name |
| func_name = parse_name(line[1]) |
| canonical_name = DottedName(parent_doc.canonical_name, func_name) |
| |
| # Create the function's RoutineDoc. |
| func_doc = RoutineDoc(canonical_name=canonical_name, |
| defining_module=parent_docs[0], |
| lineno=lineno, docs_extracted_by='parser') |
| |
| # Process the signature. |
| init_arglist(func_doc, line[2]) |
| |
| # If the preceeding comment includes a docstring, then add it. |
| add_docstring_from_comments(func_doc, comments) |
| |
| # Apply any decorators. |
| func_doc.decorators = [pp_toktree(deco[1:]) for deco in decorators] |
| decorators.reverse() |
| for decorator in decorators: |
| try: |
| deco_name = parse_dotted_name(decorator[1:]) |
| except ParseError: |
| deco_name = None |
| if func_doc.canonical_name is not UNKNOWN: |
| deco_repr = '%s(%s)' % (pp_toktree(decorator[1:]), |
| func_doc.canonical_name) |
| elif func_doc.parse_repr not in (None, UNKNOWN): |
| # [xx] this case should be improved.. when will func_doc |
| # have a known parse_repr?? |
| deco_repr = '%s(%s)' % (pp_toktree(decorator[1:]), |
| func_doc.parse_repr) |
| else: |
| deco_repr = UNKNOWN |
| func_doc = apply_decorator(deco_name, func_doc) |
| func_doc.parse_repr = deco_repr |
| # [XX] Is there a reson the following should be done? It |
| # causes the grouping code to break. Presumably the canonical |
| # name should remain valid if we're just applying a standard |
| # decorator. |
| #func_doc.canonical_name = UNKNOWN |
| |
| # Add a variable to the containing namespace. |
| var_doc = VariableDoc(name=func_name, value=func_doc, |
| is_imported=False, is_alias=False, |
| docs_extracted_by='parser') |
| set_variable(parent_doc, var_doc) |
| |
| # Return the new ValueDoc. |
| return func_doc |
| |
| def apply_decorator(decorator_name, func_doc): |
| # [xx] what if func_doc is not a RoutineDoc? |
| if decorator_name == DottedName('staticmethod'): |
| return StaticMethodDoc(**func_doc.__dict__) |
| elif decorator_name == DottedName('classmethod'): |
| return ClassMethodDoc(**func_doc.__dict__) |
| elif DEFAULT_DECORATOR_BEHAVIOR == 'transparent': |
| return func_doc.__class__(**func_doc.__dict__) # make a copy. |
| elif DEFAULT_DECORATOR_BEHAVIOR == 'opaque': |
| return GenericValueDoc(docs_extracted_by='parser') |
| else: |
| raise ValueError, 'Bad value for DEFAULT_DECORATOR_BEHAVIOR' |
| |
| def init_arglist(func_doc, arglist): |
| if not isinstance(arglist, list) or arglist[0] != (token.OP, '('): |
| raise ParseError("Bad argument list") |
| |
| # Initialize to defaults. |
| func_doc.posargs = [] |
| func_doc.posarg_defaults = [] |
| func_doc.vararg = None |
| func_doc.kwarg = None |
| |
| # Divide the arglist into individual args. |
| args = split_on(arglist[1:-1], (token.OP, ',')) |
| |
| # Keyword argument. |
| if args and args[-1][0] == (token.OP, '**'): |
| if len(args[-1]) != 2 or args[-1][1][0] != token.NAME: |
| raise ParseError("Expected name after ** in argument list") |
| func_doc.kwarg = args[-1][1][1] |
| args.pop() |
| |
| # Vararg argument. |
| if args and args[-1][0] == (token.OP, '*'): |
| if len(args[-1]) != 2 or args[-1][1][0] != token.NAME: |
| raise ParseError("Expected name after * in argument list") |
| func_doc.vararg = args[-1][1][1] |
| args.pop() |
| |
| # Positional arguments. |
| for arg in args: |
| func_doc.posargs.append(parse_funcdef_arg(arg[0])) |
| if len(arg) == 1: |
| func_doc.posarg_defaults.append(None) |
| elif arg[1] != (token.OP, '=') or len(arg) == 2: |
| raise ParseError("Bad argument list") |
| else: |
| default_repr = pp_toktree(arg[2:], 'tight') |
| default_val = GenericValueDoc(parse_repr=default_repr, |
| docs_extracted_by='parser') |
| func_doc.posarg_defaults.append(default_val) |
| |
| #///////////////////////////////////////////////////////////////// |
| # Line handler: class declarations |
| #///////////////////////////////////////////////////////////////// |
| |
| def process_classdef(line, parent_docs, prev_line_doc, lineno, |
| comments, decorators, encoding): |
| """ |
| The line handler for class declaration lines, such as: |
| |
| >>> class Foo(Bar, Baz): |
| |
| This handler creates and initializes a new C{VariableDoc} |
| containing a C{ClassDoc}, adds the C{VariableDoc} to the |
| containing namespace, and returns the C{ClassDoc}. |
| """ |
| # Check syntax |
| if len(line)<3 or len(line)>4 or line[-1] != (token.OP, ':'): |
| raise ParseError("Bad class definition line") |
| |
| # If we're not in a namespace, then ignore it. |
| parent_doc = parent_docs[-1] |
| if not isinstance(parent_doc, NamespaceDoc): return |
| |
| # Get the class's name |
| class_name = parse_name(line[1]) |
| canonical_name = DottedName(parent_doc.canonical_name, class_name) |
| |
| # Create the class's ClassDoc & VariableDoc. |
| class_doc = ClassDoc(variables={}, sort_spec=[], |
| bases=[], subclasses=[], |
| canonical_name=canonical_name, |
| defining_module=parent_docs[0], |
| docs_extracted_by='parser') |
| var_doc = VariableDoc(name=class_name, value=class_doc, |
| is_imported=False, is_alias=False, |
| docs_extracted_by='parser') |
| |
| # Add the bases. |
| if len(line) == 4: |
| if (not isinstance(line[2], list) or |
| line[2][0] != (token.OP, '(')): |
| raise ParseError("Expected base list") |
| try: |
| for base_name in parse_classdef_bases(line[2]): |
| class_doc.bases.append(find_base(base_name, parent_docs)) |
| except ParseError, e: |
| log.warning("Unable to extract the base list for %s: %s" % |
| (canonical_name, e)) |
| class_doc.bases = UNKNOWN |
| else: |
| class_doc.bases = [] |
| |
| # Register ourselves as a subclass to our bases. |
| if class_doc.bases is not UNKNOWN: |
| for basedoc in class_doc.bases: |
| if isinstance(basedoc, ClassDoc): |
| # This test avoids that a subclass gets listed twice when |
| # both introspection and parsing. |
| # [XXX] This check only works because currently parsing is |
| # always performed just after introspection of the same |
| # class. A more complete fix shuld be independent from |
| # calling order; probably the subclasses list should be |
| # replaced by a ClassDoc set or a {name: ClassDoc} mapping. |
| if (basedoc.subclasses |
| and basedoc.subclasses[-1].canonical_name |
| != class_doc.canonical_name): |
| basedoc.subclasses.append(class_doc) |
| |
| # If the preceeding comment includes a docstring, then add it. |
| add_docstring_from_comments(class_doc, comments) |
| |
| # Add the VariableDoc to our container. |
| set_variable(parent_doc, var_doc) |
| |
| return class_doc |
| |
| def _proxy_base(**attribs): |
| return ClassDoc(variables={}, sort_spec=[], bases=[], subclasses=[], |
| docs_extracted_by='parser', **attribs) |
| |
| def find_base(name, parent_docs): |
| assert isinstance(name, DottedName) |
| |
| # Find the variable containing the base. |
| base_var = lookup_variable(name, parent_docs) |
| if base_var is None: |
| # If we didn't find it, then it must have been imported. |
| # First, check if it looks like it's contained in any |
| # known imported variable: |
| if len(name) > 1: |
| src = lookup_name(name[0], parent_docs) |
| if (src is not None and |
| src.imported_from not in (None, UNKNOWN)): |
| base_src = DottedName(src.imported_from, name[1:]) |
| base_var = VariableDoc(name=name[-1], is_imported=True, |
| is_alias=False, imported_from=base_src, |
| docs_extracted_by='parser') |
| # Otherwise, it must have come from an "import *" statement |
| # (or from magic, such as direct manipulation of the module's |
| # dictionary), so we don't know where it came from. So |
| # there's nothing left but to use an empty proxy. |
| if base_var is None: |
| return _proxy_base(parse_repr=str(name)) |
| #raise ParseError("Could not find %s" % name) |
| |
| # If the variable has a value, return that value. |
| if base_var.value is not UNKNOWN: |
| return base_var.value |
| |
| # Otherwise, if BASE_HANDLING is 'parse', try parsing the docs for |
| # the base class; if that fails, or if BASE_HANDLING is 'link', |
| # just make a proxy object. |
| if base_var.imported_from not in (None, UNKNOWN): |
| if BASE_HANDLING == 'parse': |
| old_sys_path = sys.path |
| try: |
| dirname = os.path.split(parent_docs[0].filename)[0] |
| sys.path = [dirname] + sys.path |
| try: |
| return parse_docs(name=str(base_var.imported_from)) |
| except ParseError: |
| log.info('Unable to parse base', base_var.imported_from) |
| except ImportError: |
| log.info('Unable to find base', base_var.imported_from) |
| finally: |
| sys.path = old_sys_path |
| |
| # Either BASE_HANDLING='link' or parsing the base class failed; |
| # return a proxy value for the base class. |
| return _proxy_base(proxy_for=base_var.imported_from) |
| else: |
| return _proxy_base(parse_repr=str(name)) |
| |
| #///////////////////////////////////////////////////////////////// |
| #{ Parsing |
| #///////////////////////////////////////////////////////////////// |
| |
| def dotted_names_in(elt_list): |
| """ |
| Return a list of all simple dotted names in the given |
| expression. |
| """ |
| names = [] |
| while elt_list: |
| elt = elt_list.pop() |
| if len(elt) == 1 and isinstance(elt[0], list): |
| # Nested list: process the contents |
| elt_list.extend(split_on(elt[0][1:-1], (token.OP, ','))) |
| else: |
| try: |
| names.append(parse_dotted_name(elt)) |
| except ParseError: |
| pass # complex expression -- ignore |
| return names |
| |
| def parse_name(elt, strip_parens=False): |
| """ |
| If the given token tree element is a name token, then return |
| that name as a string. Otherwise, raise ParseError. |
| @param strip_parens: If true, then if elt is a single name |
| enclosed in parenthases, then return that name. |
| """ |
| if strip_parens and isinstance(elt, list): |
| while (isinstance(elt, list) and len(elt) == 3 and |
| elt[0] == (token.OP, '(') and |
| elt[-1] == (token.OP, ')')): |
| elt = elt[1] |
| if isinstance(elt, list) or elt[0] != token.NAME: |
| raise ParseError("Bad name") |
| return elt[1] |
| |
| def parse_dotted_name(elt_list, strip_parens=True, parent_name=None): |
| """ |
| @param parent_name: canonical name of referring module, to resolve |
| relative imports. |
| @type parent_name: L{DottedName} |
| @bug: does not handle 'x.(y).z' |
| """ |
| if len(elt_list) == 0: raise ParseError("Bad dotted name") |
| |
| # Handle ((x.y).z). (If the contents of the parens include |
| # anything other than dotted names, such as (x,y), then we'll |
| # catch it below and raise a ParseError. |
| while (isinstance(elt_list[0], list) and |
| len(elt_list[0]) >= 3 and |
| elt_list[0][0] == (token.OP, '(') and |
| elt_list[0][-1] == (token.OP, ')')): |
| elt_list[:1] = elt_list[0][1:-1] |
| |
| # Convert a relative import into an absolute name. |
| prefix_name = None |
| if parent_name is not None and elt_list[0][-1] == '.': |
| items = 1 |
| while len(elt_list) > items and elt_list[items][-1] == '.': |
| items += 1 |
| |
| elt_list = elt_list[items:] |
| prefix_name = parent_name[:-items] |
| |
| # >>> from . import foo |
| if not elt_list: |
| if prefix_name == []: |
| raise ParseError("Attempted relative import in non-package, " |
| "or beyond toplevel package") |
| return prefix_name |
| |
| if len(elt_list) % 2 != 1: raise ParseError("Bad dotted name") |
| name = DottedName(parse_name(elt_list[0], True)) |
| if prefix_name is not None: |
| name = prefix_name + name |
| |
| for i in range(2, len(elt_list), 2): |
| dot, identifier = elt_list[i-1], elt_list[i] |
| if dot != (token.OP, '.'): |
| raise ParseError("Bad dotted name") |
| name = DottedName(name, parse_name(identifier, True)) |
| return name |
| |
| def split_on(elt_list, split_tok): |
| # [xx] add code to guarantee each elt is non-empty. |
| result = [[]] |
| for elt in elt_list: |
| if elt == split_tok: |
| if result[-1] == []: raise ParseError("Empty element from split") |
| result.append([]) |
| else: |
| result[-1].append(elt) |
| if result[-1] == []: result.pop() |
| return result |
| |
| def parse_funcdef_arg(elt): |
| """ |
| If the given tree token element contains a valid function |
| definition argument (i.e., an identifier token or nested list |
| of identifiers), then return a corresponding string identifier |
| or nested list of string identifiers. Otherwise, raise a |
| ParseError. |
| """ |
| if isinstance(elt, list): |
| if elt[0] == (token.OP, '('): |
| if len(elt) == 3: |
| return parse_funcdef_arg(elt[1]) |
| else: |
| return [parse_funcdef_arg(e) |
| for e in elt[1:-1] |
| if e != (token.OP, ',')] |
| else: |
| raise ParseError("Bad argument -- expected name or tuple") |
| elif elt[0] == token.NAME: |
| return elt[1] |
| else: |
| raise ParseError("Bad argument -- expected name or tuple") |
| |
| def parse_classdef_bases(elt): |
| """ |
| If the given tree token element contains a valid base list |
| (that contains only dotted names), then return a corresponding |
| list of L{DottedName}s. Otherwise, raise a ParseError. |
| |
| @bug: Does not handle either of:: |
| - class A( (base.in.parens) ): pass |
| - class B( (lambda:calculated.base)() ): pass |
| """ |
| if (not isinstance(elt, list) or |
| elt[0] != (token.OP, '(')): |
| raise ParseError("Bad base list") |
| |
| return [parse_dotted_name(n) |
| for n in split_on(elt[1:-1], (token.OP, ','))] |
| |
| # Used by: base list; 'del'; ... |
| def parse_dotted_name_list(elt_list): |
| """ |
| If the given list of tree token elements contains a |
| comma-separated list of dotted names, then return a |
| corresponding list of L{DottedName} objects. Otherwise, raise |
| ParseError. |
| """ |
| names = [] |
| |
| state = 0 |
| for elt in elt_list: |
| # State 0 -- Expecting a name, or end of arglist |
| if state == 0: |
| # Make sure it's a name |
| if isinstance(elt, tuple) and elt[0] == token.NAME: |
| names.append(DottedName(elt[1])) |
| state = 1 |
| else: |
| raise ParseError("Expected a name") |
| # State 1 -- Expecting comma, period, or end of arglist |
| elif state == 1: |
| if elt == (token.OP, '.'): |
| state = 2 |
| elif elt == (token.OP, ','): |
| state = 0 |
| else: |
| raise ParseError("Expected '.' or ',' or end of list") |
| # State 2 -- Continuation of dotted name. |
| elif state == 2: |
| if isinstance(elt, tuple) and elt[0] == token.NAME: |
| names[-1] = DottedName(names[-1], elt[1]) |
| state = 1 |
| else: |
| raise ParseError("Expected a name") |
| if state == 2: |
| raise ParseError("Expected a name") |
| return names |
| |
| def parse_string(elt_list): |
| if len(elt_list) == 1 and elt_list[0][0] == token.STRING: |
| # [xx] use something safer here? But it needs to deal with |
| # any string type (eg r"foo\bar" etc). |
| return eval(elt_list[0][1]) |
| else: |
| raise ParseError("Expected a string") |
| |
| # ['1', 'b', 'c'] |
| def parse_string_list(elt_list): |
| if (len(elt_list) == 1 and isinstance(elt_list, list) and |
| elt_list[0][0][1] in ('(', '[')): |
| elt_list = elt_list[0][1:-1] |
| |
| string_list = [] |
| for string_elt in split_on(elt_list, (token.OP, ',')): |
| string_list.append(parse_string(string_elt)) |
| |
| return string_list |
| |
| #///////////////////////////////////////////////////////////////// |
| #{ Variable Manipulation |
| #///////////////////////////////////////////////////////////////// |
| |
| def set_variable(namespace, var_doc, preserve_docstring=False): |
| """ |
| Add var_doc to namespace. If namespace already contains a |
| variable with the same name, then discard the old variable. If |
| C{preserve_docstring} is true, then keep the old variable's |
| docstring when overwriting a variable. |
| """ |
| # Choose which dictionary we'll be storing the variable in. |
| if not isinstance(namespace, NamespaceDoc): |
| return |
| |
| # This happens when the class definition has not been parsed, e.g. in |
| # sf bug #1693253 on ``Exception.x = y`` |
| if namespace.sort_spec is UNKNOWN: |
| namespace.sort_spec = namespace.variables.keys() |
| |
| # If we already have a variable with this name, then remove the |
| # old VariableDoc from the sort_spec list; and if we gave its |
| # value a canonical name, then delete it. |
| if var_doc.name in namespace.variables: |
| namespace.sort_spec.remove(var_doc.name) |
| old_var_doc = namespace.variables[var_doc.name] |
| if (old_var_doc.is_alias == False and |
| old_var_doc.value is not UNKNOWN): |
| old_var_doc.value.canonical_name = UNKNOWN |
| if (preserve_docstring and var_doc.docstring in (None, UNKNOWN) and |
| old_var_doc.docstring not in (None, UNKNOWN)): |
| var_doc.docstring = old_var_doc.docstring |
| var_doc.docstring_lineno = old_var_doc.docstring_lineno |
| # Add the variable to the namespace. |
| namespace.variables[var_doc.name] = var_doc |
| namespace.sort_spec.append(var_doc.name) |
| assert var_doc.container is UNKNOWN |
| var_doc.container = namespace |
| |
| def del_variable(namespace, name): |
| if not isinstance(namespace, NamespaceDoc): |
| return |
| |
| if name[0] in namespace.variables: |
| if len(name) == 1: |
| var_doc = namespace.variables[name[0]] |
| namespace.sort_spec.remove(name[0]) |
| del namespace.variables[name[0]] |
| if not var_doc.is_alias and var_doc.value is not UNKNOWN: |
| var_doc.value.canonical_name = UNKNOWN |
| else: |
| del_variable(namespace.variables[name[0]].value, name[1:]) |
| |
| #///////////////////////////////////////////////////////////////// |
| #{ Name Lookup |
| #///////////////////////////////////////////////////////////////// |
| |
| def lookup_name(identifier, parent_docs): |
| """ |
| Find and return the documentation for the variable named by |
| the given identifier. |
| |
| @rtype: L{VariableDoc} or C{None} |
| """ |
| # We need to check 3 namespaces: locals, globals, and builtins. |
| # Note that this is true even if we're in a version of python with |
| # nested scopes, because nested scope lookup does not apply to |
| # nested class definitions, and we're not worried about variables |
| # in nested functions. |
| if not isinstance(identifier, basestring): |
| raise TypeError('identifier must be a string') |
| |
| # Locals |
| if isinstance(parent_docs[-1], NamespaceDoc): |
| if identifier in parent_docs[-1].variables: |
| return parent_docs[-1].variables[identifier] |
| |
| # Globals (aka the containing module) |
| if isinstance(parent_docs[0], NamespaceDoc): |
| if identifier in parent_docs[0].variables: |
| return parent_docs[0].variables[identifier] |
| |
| # Builtins |
| builtins = epydoc.docintrospecter.introspect_docs(__builtin__) |
| if isinstance(builtins, NamespaceDoc): |
| if identifier in builtins.variables: |
| return builtins.variables[identifier] |
| |
| # We didn't find it; return None. |
| return None |
| |
| def lookup_variable(dotted_name, parent_docs): |
| assert isinstance(dotted_name, DottedName) |
| # If it's a simple identifier, use lookup_name. |
| if len(dotted_name) == 1: |
| return lookup_name(dotted_name[0], parent_docs) |
| |
| # If it's a dotted name with multiple pieces, look up the |
| # namespace containing the var (=parent) first; and then |
| # look for the var in that namespace. |
| else: |
| parent = lookup_value(dotted_name[:-1], parent_docs) |
| if (isinstance(parent, NamespaceDoc) and |
| dotted_name[-1] in parent.variables): |
| return parent.variables[dotted_name[-1]] |
| else: |
| return None # var not found. |
| |
| def lookup_value(dotted_name, parent_docs): |
| """ |
| Find and return the documentation for the value contained in |
| the variable with the given name in the current namespace. |
| """ |
| assert isinstance(dotted_name, DottedName) |
| var_doc = lookup_name(dotted_name[0], parent_docs) |
| |
| for i in range(1, len(dotted_name)): |
| if var_doc is None: return None |
| |
| if isinstance(var_doc.value, NamespaceDoc): |
| var_dict = var_doc.value.variables |
| elif (var_doc.value is UNKNOWN and |
| var_doc.imported_from not in (None, UNKNOWN)): |
| src_name = var_doc.imported_from + dotted_name[i:] |
| # [xx] do I want to create a proxy here?? |
| return GenericValueDoc(proxy_for=src_name, |
| parse_repr=str(dotted_name), |
| docs_extracted_by='parser') |
| else: |
| return None |
| |
| var_doc = var_dict.get(dotted_name[i]) |
| |
| if var_doc is None: return None |
| return var_doc.value |
| |
| #///////////////////////////////////////////////////////////////// |
| #{ Docstring Comments |
| #///////////////////////////////////////////////////////////////// |
| |
| def add_docstring_from_comments(api_doc, comments): |
| if api_doc is None or not comments: return |
| api_doc.docstring = '\n'.join([line for (line, lineno) in comments]) |
| api_doc.docstring_lineno = comments[0][1] |
| |
| #///////////////////////////////////////////////////////////////// |
| #{ Tree tokens |
| #///////////////////////////////////////////////////////////////// |
| |
| def _join_toktree(s1, s2): |
| # Join them. s1 = left side; s2 = right side. |
| if (s2=='' or s1=='' or |
| s1 in ('-','`') or s2 in ('}',']',')','`',':') or |
| s2[0] in ('.',',') or s1[-1] in ('(','[','{','.','\n',' ') or |
| (s2[0] == '(' and s1[-1] not in (',','='))): |
| return '%s%s' % (s1,s2) |
| elif (spacing=='tight' and |
| s1[-1] in '+-*/=,' or s2[0] in '+-*/=,'): |
| return '%s%s' % (s1, s2) |
| else: |
| return '%s %s' % (s1, s2) |
| |
| def _pp_toktree_add_piece(spacing, pieces, piece): |
| s1 = pieces[-1] |
| s2 = piece |
| |
| if (s2=='' or s1=='' or |
| s1 in ('-','`') or s2 in ('}',']',')','`',':') or |
| s2[0] in ('.',',') or s1[-1] in ('(','[','{','.','\n',' ') or |
| (s2[0] == '(' and s1[-1] not in (',','='))): |
| pass |
| elif (spacing=='tight' and |
| s1[-1] in '+-*/=,' or s2[0] in '+-*/=,'): |
| pass |
| else: |
| pieces.append(' ') |
| |
| pieces.append(piece) |
| |
| def pp_toktree(elts, spacing='normal', indent=0): |
| pieces = [''] |
| _pp_toktree(elts, spacing, indent, pieces) |
| return ''.join(pieces) |
| |
| def _pp_toktree(elts, spacing, indent, pieces): |
| add_piece = _pp_toktree_add_piece |
| |
| for elt in elts: |
| # Put a blank line before class & def statements. |
| if elt == (token.NAME, 'class') or elt == (token.NAME, 'def'): |
| add_piece(spacing, pieces, '\n%s' % (' '*indent)) |
| |
| if isinstance(elt, tuple): |
| if elt[0] == token.NEWLINE: |
| add_piece(spacing, pieces, ' '+elt[1]) |
| add_piece(spacing, pieces, '\n%s' % (' '*indent)) |
| elif elt[0] == token.INDENT: |
| add_piece(spacing, pieces, ' ') |
| indent += 1 |
| elif elt[0] == token.DEDENT: |
| assert pieces[-1] == ' ' |
| pieces.pop() |
| indent -= 1 |
| elif elt[0] == tokenize.COMMENT: |
| add_piece(spacing, pieces, elt[1].rstrip() + '\n') |
| add_piece(' '*indent) |
| else: |
| add_piece(spacing, pieces, elt[1]) |
| else: |
| _pp_toktree(elt, spacing, indent, pieces) |
| |
| #///////////////////////////////////////////////////////////////// |
| #{ Helper Functions |
| #///////////////////////////////////////////////////////////////// |
| |
| def get_module_encoding(filename): |
| """ |
| @see: U{PEP 263<http://www.python.org/peps/pep-0263.html>} |
| """ |
| module_file = open(filename, 'rU') |
| try: |
| lines = [module_file.readline() for i in range(2)] |
| if lines[0].startswith('\xef\xbb\xbf'): |
| return 'utf-8' |
| else: |
| for line in lines: |
| m = re.search("coding[:=]\s*([-\w.]+)", line) |
| if m: return m.group(1) |
| |
| # Fall back on Python's default encoding. |
| return 'iso-8859-1' # aka 'latin-1' |
| finally: |
| module_file.close() |
| |
| def _get_module_name(filename, package_doc): |
| """ |
| Return (dotted_name, is_package) |
| """ |
| name = re.sub(r'.py\w?$', '', os.path.split(filename)[1]) |
| if name == '__init__': |
| is_package = True |
| name = os.path.split(os.path.split(filename)[0])[1] |
| else: |
| is_package = False |
| |
| # [XX] if the module contains a script, then `name` may not |
| # necessarily be a valid identifier -- which will cause |
| # DottedName to raise an exception. Is that what I want? |
| if package_doc is None: |
| dotted_name = DottedName(name) |
| else: |
| dotted_name = DottedName(package_doc.canonical_name, name) |
| |
| # Check if the module looks like it's shadowed by a variable. |
| # If so, then add a "'" to the end of its canonical name, to |
| # distinguish it from the variable. |
| if package_doc is not None and name in package_doc.variables: |
| vardoc = package_doc.variables[name] |
| if (vardoc.value not in (None, UNKNOWN) and |
| vardoc.imported_from != dotted_name): |
| log.warning("Module %s might be shadowed by a variable with " |
| "the same name." % dotted_name) |
| dotted_name = DottedName(str(dotted_name)+"'") |
| |
| return dotted_name, is_package |
| |
| def flatten(lst, out=None): |
| """ |
| @return: a flat list containing the leaves of the given nested |
| list. |
| @param lst: The nested list that should be flattened. |
| """ |
| if out is None: out = [] |
| for elt in lst: |
| if isinstance(elt, (list, tuple)): |
| flatten(elt, out) |
| else: |
| out.append(elt) |
| return out |
| |