| # $Id: peps.py 4564 2006-05-21 20:44:42Z wiemann $ |
| # Author: David Goodger <[email protected]> |
| # Copyright: This module has been placed in the public domain. |
| |
| """ |
| Transforms for PEP processing. |
| |
| - `Headers`: Used to transform a PEP's initial RFC-2822 header. It remains a |
| field list, but some entries get processed. |
| - `Contents`: Auto-inserts a table of contents. |
| - `PEPZero`: Special processing for PEP 0. |
| """ |
| |
| __docformat__ = 'reStructuredText' |
| |
| import sys |
| import os |
| import re |
| import time |
| from docutils import nodes, utils, languages |
| from docutils import ApplicationError, DataError |
| from docutils.transforms import Transform, TransformError |
| from docutils.transforms import parts, references, misc |
| |
| |
| class Headers(Transform): |
| |
| """ |
| Process fields in a PEP's initial RFC-2822 header. |
| """ |
| |
| default_priority = 360 |
| |
| pep_url = 'pep-%04d' |
| pep_cvs_url = ('http://svn.python.org/view/*checkout*' |
| '/peps/trunk/pep-%04d.txt') |
| rcs_keyword_substitutions = ( |
| (re.compile(r'\$' r'RCSfile: (.+),v \$$', re.IGNORECASE), r'\1'), |
| (re.compile(r'\$[a-zA-Z]+: (.+) \$$'), r'\1'),) |
| |
| def apply(self): |
| if not len(self.document): |
| # @@@ replace these DataErrors with proper system messages |
| raise DataError('Document tree is empty.') |
| header = self.document[0] |
| if not isinstance(header, nodes.field_list) or \ |
| 'rfc2822' not in header['classes']: |
| raise DataError('Document does not begin with an RFC-2822 ' |
| 'header; it is not a PEP.') |
| pep = None |
| for field in header: |
| if field[0].astext().lower() == 'pep': # should be the first field |
| value = field[1].astext() |
| try: |
| pep = int(value) |
| cvs_url = self.pep_cvs_url % pep |
| except ValueError: |
| pep = value |
| cvs_url = None |
| msg = self.document.reporter.warning( |
| '"PEP" header must contain an integer; "%s" is an ' |
| 'invalid value.' % pep, base_node=field) |
| msgid = self.document.set_id(msg) |
| prb = nodes.problematic(value, value or '(none)', |
| refid=msgid) |
| prbid = self.document.set_id(prb) |
| msg.add_backref(prbid) |
| if len(field[1]): |
| field[1][0][:] = [prb] |
| else: |
| field[1] += nodes.paragraph('', '', prb) |
| break |
| if pep is None: |
| raise DataError('Document does not contain an RFC-2822 "PEP" ' |
| 'header.') |
| if pep == 0: |
| # Special processing for PEP 0. |
| pending = nodes.pending(PEPZero) |
| self.document.insert(1, pending) |
| self.document.note_pending(pending) |
| if len(header) < 2 or header[1][0].astext().lower() != 'title': |
| raise DataError('No title!') |
| for field in header: |
| name = field[0].astext().lower() |
| body = field[1] |
| if len(body) > 1: |
| raise DataError('PEP header field body contains multiple ' |
| 'elements:\n%s' % field.pformat(level=1)) |
| elif len(body) == 1: |
| if not isinstance(body[0], nodes.paragraph): |
| raise DataError('PEP header field body may only contain ' |
| 'a single paragraph:\n%s' |
| % field.pformat(level=1)) |
| elif name == 'last-modified': |
| date = time.strftime( |
| '%d-%b-%Y', |
| time.localtime(os.stat(self.document['source'])[8])) |
| if cvs_url: |
| body += nodes.paragraph( |
| '', '', nodes.reference('', date, refuri=cvs_url)) |
| else: |
| # empty |
| continue |
| para = body[0] |
| if name == 'author': |
| for node in para: |
| if isinstance(node, nodes.reference): |
| node.replace_self(mask_email(node)) |
| elif name == 'discussions-to': |
| for node in para: |
| if isinstance(node, nodes.reference): |
| node.replace_self(mask_email(node, pep)) |
| elif name in ('replaces', 'replaced-by', 'requires'): |
| newbody = [] |
| space = nodes.Text(' ') |
| for refpep in re.split(',?\s+', body.astext()): |
| pepno = int(refpep) |
| newbody.append(nodes.reference( |
| refpep, refpep, |
| refuri=(self.document.settings.pep_base_url |
| + self.pep_url % pepno))) |
| newbody.append(space) |
| para[:] = newbody[:-1] # drop trailing space |
| elif name == 'last-modified': |
| utils.clean_rcs_keywords(para, self.rcs_keyword_substitutions) |
| if cvs_url: |
| date = para.astext() |
| para[:] = [nodes.reference('', date, refuri=cvs_url)] |
| elif name == 'content-type': |
| pep_type = para.astext() |
| uri = self.document.settings.pep_base_url + self.pep_url % 12 |
| para[:] = [nodes.reference('', pep_type, refuri=uri)] |
| elif name == 'version' and len(body): |
| utils.clean_rcs_keywords(para, self.rcs_keyword_substitutions) |
| |
| |
| class Contents(Transform): |
| |
| """ |
| Insert an empty table of contents topic and a transform placeholder into |
| the document after the RFC 2822 header. |
| """ |
| |
| default_priority = 380 |
| |
| def apply(self): |
| language = languages.get_language(self.document.settings.language_code) |
| name = language.labels['contents'] |
| title = nodes.title('', name) |
| topic = nodes.topic('', title, classes=['contents']) |
| name = nodes.fully_normalize_name(name) |
| if not self.document.has_name(name): |
| topic['names'].append(name) |
| self.document.note_implicit_target(topic) |
| pending = nodes.pending(parts.Contents) |
| topic += pending |
| self.document.insert(1, topic) |
| self.document.note_pending(pending) |
| |
| |
| class TargetNotes(Transform): |
| |
| """ |
| Locate the "References" section, insert a placeholder for an external |
| target footnote insertion transform at the end, and schedule the |
| transform to run immediately. |
| """ |
| |
| default_priority = 520 |
| |
| def apply(self): |
| doc = self.document |
| i = len(doc) - 1 |
| refsect = copyright = None |
| while i >= 0 and isinstance(doc[i], nodes.section): |
| title_words = doc[i][0].astext().lower().split() |
| if 'references' in title_words: |
| refsect = doc[i] |
| break |
| elif 'copyright' in title_words: |
| copyright = i |
| i -= 1 |
| if not refsect: |
| refsect = nodes.section() |
| refsect += nodes.title('', 'References') |
| doc.set_id(refsect) |
| if copyright: |
| # Put the new "References" section before "Copyright": |
| doc.insert(copyright, refsect) |
| else: |
| # Put the new "References" section at end of doc: |
| doc.append(refsect) |
| pending = nodes.pending(references.TargetNotes) |
| refsect.append(pending) |
| self.document.note_pending(pending, 0) |
| pending = nodes.pending(misc.CallBack, |
| details={'callback': self.cleanup_callback}) |
| refsect.append(pending) |
| self.document.note_pending(pending, 1) |
| |
| def cleanup_callback(self, pending): |
| """ |
| Remove an empty "References" section. |
| |
| Called after the `references.TargetNotes` transform is complete. |
| """ |
| if len(pending.parent) == 2: # <title> and <pending> |
| pending.parent.parent.remove(pending.parent) |
| |
| |
| class PEPZero(Transform): |
| |
| """ |
| Special processing for PEP 0. |
| """ |
| |
| default_priority =760 |
| |
| def apply(self): |
| visitor = PEPZeroSpecial(self.document) |
| self.document.walk(visitor) |
| self.startnode.parent.remove(self.startnode) |
| |
| |
| class PEPZeroSpecial(nodes.SparseNodeVisitor): |
| |
| """ |
| Perform the special processing needed by PEP 0: |
| |
| - Mask email addresses. |
| |
| - Link PEP numbers in the second column of 4-column tables to the PEPs |
| themselves. |
| """ |
| |
| pep_url = Headers.pep_url |
| |
| def unknown_visit(self, node): |
| pass |
| |
| def visit_reference(self, node): |
| node.replace_self(mask_email(node)) |
| |
| def visit_field_list(self, node): |
| if 'rfc2822' in node['classes']: |
| raise nodes.SkipNode |
| |
| def visit_tgroup(self, node): |
| self.pep_table = node['cols'] == 4 |
| self.entry = 0 |
| |
| def visit_colspec(self, node): |
| self.entry += 1 |
| if self.pep_table and self.entry == 2: |
| node['classes'].append('num') |
| |
| def visit_row(self, node): |
| self.entry = 0 |
| |
| def visit_entry(self, node): |
| self.entry += 1 |
| if self.pep_table and self.entry == 2 and len(node) == 1: |
| node['classes'].append('num') |
| p = node[0] |
| if isinstance(p, nodes.paragraph) and len(p) == 1: |
| text = p.astext() |
| try: |
| pep = int(text) |
| ref = (self.document.settings.pep_base_url |
| + self.pep_url % pep) |
| p[0] = nodes.reference(text, text, refuri=ref) |
| except ValueError: |
| pass |
| |
| |
| non_masked_addresses = ('[email protected]', |
| '[email protected]', |
| '[email protected]') |
| |
| def mask_email(ref, pepno=None): |
| """ |
| Mask the email address in `ref` and return a replacement node. |
| |
| `ref` is returned unchanged if it contains no email address. |
| |
| For email addresses such as "user@host", mask the address as "user at |
| host" (text) to thwart simple email address harvesters (except for those |
| listed in `non_masked_addresses`). If a PEP number (`pepno`) is given, |
| return a reference including a default email subject. |
| """ |
| if ref.hasattr('refuri') and ref['refuri'].startswith('mailto:'): |
| if ref['refuri'][8:] in non_masked_addresses: |
| replacement = ref[0] |
| else: |
| replacement_text = ref.astext().replace('@', ' at ') |
| replacement = nodes.raw('', replacement_text, format='html') |
| if pepno is None: |
| return replacement |
| else: |
| ref['refuri'] += '?subject=PEP%%20%s' % pepno |
| ref[:] = [replacement] |
| return ref |
| else: |
| return ref |