| # epydoc -- Utility functions |
| # |
| # Copyright (C) 2005 Edward Loper |
| # Author: Edward Loper <[email protected]> |
| # URL: <http://epydoc.sf.net> |
| # |
| # $Id: util.py 1671 2008-01-29 02:55:49Z edloper $ |
| |
| """ |
| Miscellaneous utility functions that are used by multiple modules. |
| |
| @group Python source types: is_module_file, is_package_dir, is_pyname, |
| py_src_filename |
| @group Text processing: wordwrap, decode_with_backslashreplace, |
| plaintext_to_html |
| """ |
| __docformat__ = 'epytext en' |
| |
| import os, os.path, re |
| |
| ###################################################################### |
| ## Python Source Types |
| ###################################################################### |
| |
| PY_SRC_EXTENSIONS = ['.py', '.pyw'] |
| PY_BIN_EXTENSIONS = ['.pyc', '.so', '.pyd'] |
| |
| def is_module_file(path): |
| # Make sure it's a file name. |
| if not isinstance(path, basestring): |
| return False |
| (dir, filename) = os.path.split(path) |
| (basename, extension) = os.path.splitext(filename) |
| return (os.path.isfile(path) and |
| re.match('[a-zA-Z_]\w*$', basename) and |
| extension in PY_SRC_EXTENSIONS+PY_BIN_EXTENSIONS) |
| |
| def is_src_filename(filename): |
| if not isinstance(filename, basestring): return False |
| if not os.path.exists(filename): return False |
| return os.path.splitext(filename)[1] in PY_SRC_EXTENSIONS |
| |
| def is_package_dir(dirname): |
| """ |
| Return true if the given directory is a valid package directory |
| (i.e., it names a directory that contains a valid __init__ file, |
| and its name is a valid identifier). |
| """ |
| # Make sure it's a directory name. |
| if not isinstance(dirname, basestring): |
| return False |
| if not os.path.isdir(dirname): |
| return False |
| dirname = os.path.abspath(dirname) |
| # Make sure it's a valid identifier. (Special case for |
| # "foo/", where os.path.split -> ("foo", "").) |
| (parent, dir) = os.path.split(dirname) |
| if dir == '': (parent, dir) = os.path.split(parent) |
| |
| # The following constraint was removed because of sourceforge |
| # bug #1787028 -- in some cases (eg eggs), it's too strict. |
| #if not re.match('\w+$', dir): |
| # return False |
| |
| for name in os.listdir(dirname): |
| filename = os.path.join(dirname, name) |
| if name.startswith('__init__.') and is_module_file(filename): |
| return True |
| else: |
| return False |
| |
| def is_pyname(name): |
| return re.match(r"\w+(\.\w+)*$", name) |
| |
| def py_src_filename(filename): |
| basefile, extension = os.path.splitext(filename) |
| if extension in PY_SRC_EXTENSIONS: |
| return filename |
| else: |
| for ext in PY_SRC_EXTENSIONS: |
| if os.path.isfile('%s%s' % (basefile, ext)): |
| return '%s%s' % (basefile, ext) |
| else: |
| raise ValueError('Could not find a corresponding ' |
| 'Python source file for %r.' % filename) |
| |
| def munge_script_name(filename): |
| name = os.path.split(filename)[1] |
| name = re.sub(r'\W', '_', name) |
| return 'script-'+name |
| |
| ###################################################################### |
| ## Text Processing |
| ###################################################################### |
| |
| def decode_with_backslashreplace(s): |
| r""" |
| Convert the given 8-bit string into unicode, treating any |
| character c such that ord(c)<128 as an ascii character, and |
| converting any c such that ord(c)>128 into a backslashed escape |
| sequence. |
| |
| >>> decode_with_backslashreplace('abc\xff\xe8') |
| u'abc\\xff\\xe8' |
| """ |
| # s.encode('string-escape') is not appropriate here, since it |
| # also adds backslashes to some ascii chars (eg \ and '). |
| assert isinstance(s, str) |
| return (s |
| .decode('latin1') |
| .encode('ascii', 'backslashreplace') |
| .decode('ascii')) |
| |
| def wordwrap(str, indent=0, right=75, startindex=0, splitchars=''): |
| """ |
| Word-wrap the given string. I.e., add newlines to the string such |
| that any lines that are longer than C{right} are broken into |
| shorter lines (at the first whitespace sequence that occurs before |
| index C{right}). If the given string contains newlines, they will |
| I{not} be removed. Any lines that begin with whitespace will not |
| be wordwrapped. |
| |
| @param indent: If specified, then indent each line by this number |
| of spaces. |
| @type indent: C{int} |
| @param right: The right margin for word wrapping. Lines that are |
| longer than C{right} will be broken at the first whitespace |
| sequence before the right margin. |
| @type right: C{int} |
| @param startindex: If specified, then assume that the first line |
| is already preceeded by C{startindex} characters. |
| @type startindex: C{int} |
| @param splitchars: A list of non-whitespace characters which can |
| be used to split a line. (E.g., use '/\\' to allow path names |
| to be split over multiple lines.) |
| @rtype: C{str} |
| """ |
| if splitchars: |
| chunks = re.split(r'( +|\n|[^ \n%s]*[%s])' % |
| (re.escape(splitchars), re.escape(splitchars)), |
| str.expandtabs()) |
| else: |
| chunks = re.split(r'( +|\n)', str.expandtabs()) |
| result = [' '*(indent-startindex)] |
| charindex = max(indent, startindex) |
| for chunknum, chunk in enumerate(chunks): |
| if (charindex+len(chunk) > right and charindex > 0) or chunk == '\n': |
| result.append('\n' + ' '*indent) |
| charindex = indent |
| if chunk[:1] not in ('\n', ' '): |
| result.append(chunk) |
| charindex += len(chunk) |
| else: |
| result.append(chunk) |
| charindex += len(chunk) |
| return ''.join(result).rstrip()+'\n' |
| |
| def plaintext_to_html(s): |
| """ |
| @return: An HTML string that encodes the given plaintext string. |
| In particular, special characters (such as C{'<'} and C{'&'}) |
| are escaped. |
| @rtype: C{string} |
| """ |
| s = s.replace('&', '&').replace('"', '"') |
| s = s.replace('<', '<').replace('>', '>') |
| return s |
| |
| def plaintext_to_latex(str, nbsp=0, breakany=0): |
| """ |
| @return: A LaTeX string that encodes the given plaintext string. |
| In particular, special characters (such as C{'$'} and C{'_'}) |
| are escaped, and tabs are expanded. |
| @rtype: C{string} |
| @param breakany: Insert hyphenation marks, so that LaTeX can |
| break the resulting string at any point. This is useful for |
| small boxes (e.g., the type box in the variable list table). |
| @param nbsp: Replace every space with a non-breaking space |
| (C{'~'}). |
| """ |
| # These get converted to hyphenation points later |
| if breakany: str = re.sub('(.)', '\\1\1', str) |
| |
| # These get converted to \textbackslash later. |
| str = str.replace('\\', '\0') |
| |
| # Expand tabs |
| str = str.expandtabs() |
| |
| # These elements need to be backslashed. |
| str = re.sub(r'([#$&%_\${}])', r'\\\1', str) |
| |
| # These elements have special names. |
| str = str.replace('|', '{\\textbar}') |
| str = str.replace('<', '{\\textless}') |
| str = str.replace('>', '{\\textgreater}') |
| str = str.replace('^', '{\\textasciicircum}') |
| str = str.replace('~', '{\\textasciitilde}') |
| str = str.replace('\0', r'{\textbackslash}') |
| |
| # replace spaces with non-breaking spaces |
| if nbsp: str = str.replace(' ', '~') |
| |
| # Convert \1's to hyphenation points. |
| if breakany: str = str.replace('\1', r'\-') |
| |
| return str |
| |
| class RunSubprocessError(OSError): |
| def __init__(self, cmd, out, err): |
| OSError.__init__(self, '%s failed' % cmd[0]) |
| self.out = out |
| self.err = err |
| |
| def run_subprocess(cmd, data=None): |
| """ |
| Execute the command C{cmd} in a subprocess. |
| |
| @param cmd: The command to execute, specified as a list |
| of string. |
| @param data: A string containing data to send to the |
| subprocess. |
| @return: A tuple C{(out, err)}. |
| @raise OSError: If there is any problem executing the |
| command, or if its exitval is not 0. |
| """ |
| if isinstance(cmd, basestring): |
| cmd = cmd.split() |
| |
| # Under Python 2.4+, use subprocess |
| try: |
| from subprocess import Popen, PIPE |
| pipe = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE) |
| out, err = pipe.communicate(data) |
| if hasattr(pipe, 'returncode'): |
| if pipe.returncode == 0: |
| return out, err |
| else: |
| raise RunSubprocessError(cmd, out, err) |
| else: |
| # Assume that there was an error iff anything was written |
| # to the child's stderr. |
| if err == '': |
| return out, err |
| else: |
| raise RunSubprocessError(cmd, out, err) |
| except ImportError: |
| pass |
| |
| # Under Python 2.3 or earlier, on unix, use popen2.Popen3 so we |
| # can access the return value. |
| import popen2 |
| if hasattr(popen2, 'Popen3'): |
| pipe = popen2.Popen3(' '.join(cmd), True) |
| to_child = pipe.tochild |
| from_child = pipe.fromchild |
| child_err = pipe.childerr |
| if data: |
| to_child.write(data) |
| to_child.close() |
| out = err = '' |
| while pipe.poll() is None: |
| out += from_child.read() |
| err += child_err.read() |
| out += from_child.read() |
| err += child_err.read() |
| if pipe.wait() == 0: |
| return out, err |
| else: |
| raise RunSubprocessError(cmd, out, err) |
| |
| # Under Python 2.3 or earlier, on non-unix, use os.popen3 |
| else: |
| to_child, from_child, child_err = os.popen3(' '.join(cmd), 'b') |
| if data: |
| try: |
| to_child.write(data) |
| # Guard for a broken pipe error |
| except IOError, e: |
| raise OSError(e) |
| to_child.close() |
| out = from_child.read() |
| err = child_err.read() |
| # Assume that there was an error iff anything was written |
| # to the child's stderr. |
| if err == '': |
| return out, err |
| else: |
| raise RunSubprocessError(cmd, out, err) |