python-3.6.zip added from Github

README.cosmo contains the necessary links.
2025-06-04 03:32:27 +00:00 · 2021-08-08 09:38:33 +05:30 · 2021-08-08 09:38:33 +05:30 · 0c4c56ff39
commit 0c4c56ff39
parent 75fc601ff5
4219 changed files with 1968626 additions and 0 deletions
--- a/third_party/python/Doc/tools/extensions/c_annotations.py
+++ b/third_party/python/Doc/tools/extensions/c_annotations.py
@ -0,0 +1,121 @@
+# -*- coding: utf-8 -*-
+"""
+    c_annotations.py
+    ~~~~~~~~~~~~~~~~
+
+    Supports annotations for C API elements:
+
+    * reference count annotations for C API functions.  Based on
+      refcount.py and anno-api.py in the old Python documentation tools.
+
+    * stable API annotations
+
+    Usage: Set the `refcount_file` config value to the path to the reference
+    count data file.
+
+    :copyright: Copyright 2007-2014 by Georg Brandl.
+    :license: Python license.
+"""
+
+from os import path
+from docutils import nodes
+from docutils.parsers.rst import directives
+
+from sphinx import addnodes
+from sphinx.domains.c import CObject
+
+
+class RCEntry:
+    def __init__(self, name):
+        self.name = name
+        self.args = []
+        self.result_type = ''
+        self.result_refs = None
+
+
+class Annotations(dict):
+    @classmethod
+    def fromfile(cls, filename):
+        d = cls()
+        fp = open(filename, 'r')
+        try:
+            for line in fp:
+                line = line.strip()
+                if line[:1] in ("", "#"):
+                    # blank lines and comments
+                    continue
+                parts = line.split(":", 4)
+                if len(parts) != 5:
+                    raise ValueError("Wrong field count in %r" % line)
+                function, type, arg, refcount, comment = parts
+                # Get the entry, creating it if needed:
+                try:
+                    entry = d[function]
+                except KeyError:
+                    entry = d[function] = RCEntry(function)
+                if not refcount or refcount == "null":
+                    refcount = None
+                else:
+                    refcount = int(refcount)
+                # Update the entry with the new parameter or the result
+                # information.
+                if arg:
+                    entry.args.append((arg, type, refcount))
+                else:
+                    entry.result_type = type
+                    entry.result_refs = refcount
+        finally:
+            fp.close()
+        return d
+
+    def add_annotations(self, app, doctree):
+        for node in doctree.traverse(addnodes.desc_content):
+            par = node.parent
+            if par['domain'] != 'c':
+                continue
+            if par['stableabi']:
+                node.insert(0, nodes.emphasis(' Part of the stable ABI.',
+                                              ' Part of the stable ABI.',
+                                              classes=['stableabi']))
+            if par['objtype'] != 'function':
+                continue
+            if not par[0].has_key('names') or not par[0]['names']:
+                continue
+            name = par[0]['names'][0]
+            if name.startswith("c."):
+                name = name[2:]
+            entry = self.get(name)
+            if not entry:
+                continue
+            elif entry.result_type not in ("PyObject*", "PyVarObject*"):
+                continue
+            if entry.result_refs is None:
+                rc = 'Return value: Always NULL.'
+            elif entry.result_refs:
+                rc = 'Return value: New reference.'
+            else:
+                rc = 'Return value: Borrowed reference.'
+            node.insert(0, nodes.emphasis(rc, rc, classes=['refcount']))
+
+
+def init_annotations(app):
+    refcounts = Annotations.fromfile(
+        path.join(app.srcdir, app.config.refcount_file))
+    app.connect('doctree-read', refcounts.add_annotations)
+
+
+def setup(app):
+    app.add_config_value('refcount_file', '', True)
+    app.connect('builder-inited', init_annotations)
+
+    # monkey-patch C object...
+    CObject.option_spec = {
+        'noindex': directives.flag,
+        'stableabi': directives.flag,
+    }
+    old_handle_signature = CObject.handle_signature
+    def new_handle_signature(self, sig, signode):
+        signode.parent['stableabi'] = 'stableabi' in self.options
+        return old_handle_signature(self, sig, signode)
+    CObject.handle_signature = new_handle_signature
+    return {'version': '1.0', 'parallel_read_safe': True}
--- a/third_party/python/Doc/tools/extensions/escape4chm.py
+++ b/third_party/python/Doc/tools/extensions/escape4chm.py
@ -0,0 +1,60 @@
+"""
+Escape the `body` part of .chm source file to 7-bit ASCII, to fix visual
+effect on some MBCS Windows systems.
+
+https://bugs.python.org/issue32174
+"""
+
+import re
+from html.entities import codepoint2name
+
+try:  # sphinx>=1.6
+    from sphinx.util.logging import getLogger
+except ImportError:  # sphinx<1.6
+    from logging import getLogger
+
+# escape the characters which codepoint > 0x7F
+def _process(string):
+    def escape(matchobj):
+        codepoint = ord(matchobj.group(0))
+
+        name = codepoint2name.get(codepoint)
+        if name is None:
+            return '&#%d;' % codepoint
+        else:
+            return '&%s;' % name
+
+    return re.sub(r'[^\x00-\x7F]', escape, string)
+
+def escape_for_chm(app, pagename, templatename, context, doctree):
+    # only works for .chm output
+    if getattr(app.builder, 'name', '') != 'htmlhelp':
+        return
+
+    # escape the `body` part to 7-bit ASCII
+    body = context.get('body')
+    if body is not None:
+        context['body'] = _process(body)
+
+def fixup_keywords(app, exception):
+    # only works for .chm output
+    if getattr(app.builder, 'name', '') != 'htmlhelp' or exception:
+        return
+
+    getLogger(__name__).info('fixing HTML escapes in keywords file...')
+    outdir = app.builder.outdir
+    outname = app.builder.config.htmlhelp_basename
+    with app.builder.open_file(outdir, outname + '.hhk', 'r') as f:
+        index = f.read()
+    with app.builder.open_file(outdir, outname + '.hhk', 'w') as f:
+        f.write(index.replace('&#x27;', '&#39;'))
+
+def setup(app):
+    # `html-page-context` event emitted when the HTML builder has
+    # created a context dictionary to render a template with.
+    app.connect('html-page-context', escape_for_chm)
+    # `build-finished` event emitted when all the files have been
+    # output.
+    app.connect('build-finished', fixup_keywords)
+
+    return {'version': '1.0', 'parallel_read_safe': True}
--- a/third_party/python/Doc/tools/extensions/patchlevel.py
+++ b/third_party/python/Doc/tools/extensions/patchlevel.py
@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+"""
+    patchlevel.py
+    ~~~~~~~~~~~~~
+
+    Extract version info from Include/patchlevel.h.
+    Adapted from Doc/tools/getversioninfo.
+
+    :copyright: 2007-2008 by Georg Brandl.
+    :license: Python license.
+"""
+
+from __future__ import print_function
+
+import os
+import re
+import sys
+
+def get_header_version_info(srcdir):
+    patchlevel_h = os.path.join(srcdir, '..', 'Include', 'patchlevel.h')
+
+    # This won't pick out all #defines, but it will pick up the ones we
+    # care about.
+    rx = re.compile(r'\s*#define\s+([a-zA-Z][a-zA-Z_0-9]*)\s+([a-zA-Z_0-9]+)')
+
+    d = {}
+    f = open(patchlevel_h)
+    try:
+        for line in f:
+            m = rx.match(line)
+            if m is not None:
+                name, value = m.group(1, 2)
+                d[name] = value
+    finally:
+        f.close()
+
+    release = version = '%s.%s' % (d['PY_MAJOR_VERSION'], d['PY_MINOR_VERSION'])
+    micro = int(d['PY_MICRO_VERSION'])
+    release += '.' + str(micro)
+
+    level = d['PY_RELEASE_LEVEL']
+    suffixes = {
+        'PY_RELEASE_LEVEL_ALPHA': 'a',
+        'PY_RELEASE_LEVEL_BETA':  'b',
+        'PY_RELEASE_LEVEL_GAMMA': 'rc',
+        }
+    if level != 'PY_RELEASE_LEVEL_FINAL':
+        release += suffixes[level] + str(int(d['PY_RELEASE_SERIAL']))
+    return version, release
+
+
+def get_sys_version_info():
+    major, minor, micro, level, serial = sys.version_info
+    release = version = '%s.%s' % (major, minor)
+    release += '.%s' % micro
+    if level != 'final':
+        release += '%s%s' % (level[0], serial)
+    return version, release
+
+
+def get_version_info():
+    try:
+        return get_header_version_info('.')
+    except (IOError, OSError):
+        version, release = get_sys_version_info()
+        print('Can\'t get version info from Include/patchlevel.h, ' \
+              'using version of this interpreter (%s).' % release, file=sys.stderr)
+        return version, release
+
+if __name__ == '__main__':
+    print(get_header_version_info('.')[1])
--- a/third_party/python/Doc/tools/extensions/pyspecific.py
+++ b/third_party/python/Doc/tools/extensions/pyspecific.py
@ -0,0 +1,406 @@
+# -*- coding: utf-8 -*-
+"""
+    pyspecific.py
+    ~~~~~~~~~~~~~
+
+    Sphinx extension with Python doc-specific markup.
+
+    :copyright: 2008-2014 by Georg Brandl.
+    :license: Python license.
+"""
+
+import re
+import codecs
+from os import getenv, path
+from time import asctime
+from pprint import pformat
+from docutils.io import StringOutput
+from docutils.parsers.rst import Directive
+from docutils.utils import new_document
+
+from docutils import nodes, utils
+
+from sphinx import addnodes
+from sphinx.builders import Builder
+from sphinx.locale import translators
+from sphinx.util.nodes import split_explicit_title
+from sphinx.writers.html import HTMLTranslator
+from sphinx.writers.text import TextWriter, TextTranslator
+from sphinx.writers.latex import LaTeXTranslator
+from sphinx.domains.python import PyModulelevel, PyClassmember
+
+# Support for checking for suspicious markup
+
+import suspicious
+
+
+ISSUE_URI = 'https://bugs.python.org/issue%s'
+SOURCE_URI = 'https://github.com/python/cpython/tree/3.6/%s'
+
+# monkey-patch reST parser to disable alphabetic and roman enumerated lists
+from docutils.parsers.rst.states import Body
+Body.enum.converters['loweralpha'] = \
+    Body.enum.converters['upperalpha'] = \
+    Body.enum.converters['lowerroman'] = \
+    Body.enum.converters['upperroman'] = lambda x: None
+
+# monkey-patch HTML and LaTeX translators to keep doctest blocks in the
+# doctest docs themselves
+orig_visit_literal_block = HTMLTranslator.visit_literal_block
+orig_depart_literal_block = LaTeXTranslator.depart_literal_block
+
+
+def new_visit_literal_block(self, node):
+    meta = self.builder.env.metadata[self.builder.current_docname]
+    old_trim_doctest_flags = self.highlighter.trim_doctest_flags
+    if 'keepdoctest' in meta:
+        self.highlighter.trim_doctest_flags = False
+    try:
+        orig_visit_literal_block(self, node)
+    finally:
+        self.highlighter.trim_doctest_flags = old_trim_doctest_flags
+
+
+def new_depart_literal_block(self, node):
+    meta = self.builder.env.metadata[self.curfilestack[-1]]
+    old_trim_doctest_flags = self.highlighter.trim_doctest_flags
+    if 'keepdoctest' in meta:
+        self.highlighter.trim_doctest_flags = False
+    try:
+        orig_depart_literal_block(self, node)
+    finally:
+        self.highlighter.trim_doctest_flags = old_trim_doctest_flags
+
+
+HTMLTranslator.visit_literal_block = new_visit_literal_block
+LaTeXTranslator.depart_literal_block = new_depart_literal_block
+
+
+# Support for marking up and linking to bugs.python.org issues
+
+def issue_role(typ, rawtext, text, lineno, inliner, options={}, content=[]):
+    issue = utils.unescape(text)
+    text = 'bpo-' + issue
+    refnode = nodes.reference(text, text, refuri=ISSUE_URI % issue)
+    return [refnode], []
+
+
+# Support for linking to Python source files easily
+
+def source_role(typ, rawtext, text, lineno, inliner, options={}, content=[]):
+    has_t, title, target = split_explicit_title(text)
+    title = utils.unescape(title)
+    target = utils.unescape(target)
+    refnode = nodes.reference(title, title, refuri=SOURCE_URI % target)
+    return [refnode], []
+
+
+# Support for marking up implementation details
+
+class ImplementationDetail(Directive):
+
+    has_content = True
+    required_arguments = 0
+    optional_arguments = 1
+    final_argument_whitespace = True
+
+    # This text is copied to templates/dummy.html
+    label_text = 'CPython implementation detail:'
+
+    def run(self):
+        pnode = nodes.compound(classes=['impl-detail'])
+        label = translators['sphinx'].gettext(self.label_text)
+        content = self.content
+        add_text = nodes.strong(label, label)
+        if self.arguments:
+            n, m = self.state.inline_text(self.arguments[0], self.lineno)
+            pnode.append(nodes.paragraph('', '', *(n + m)))
+        self.state.nested_parse(content, self.content_offset, pnode)
+        if pnode.children and isinstance(pnode[0], nodes.paragraph):
+            content = nodes.inline(pnode[0].rawsource, translatable=True)
+            content.source = pnode[0].source
+            content.line = pnode[0].line
+            content += pnode[0].children
+            pnode[0].replace_self(nodes.paragraph('', '', content,
+                                                  translatable=False))
+            pnode[0].insert(0, add_text)
+            pnode[0].insert(1, nodes.Text(' '))
+        else:
+            pnode.insert(0, nodes.paragraph('', '', add_text))
+        return [pnode]
+
+
+# Support for documenting decorators
+
+class PyDecoratorMixin(object):
+    def handle_signature(self, sig, signode):
+        ret = super(PyDecoratorMixin, self).handle_signature(sig, signode)
+        signode.insert(0, addnodes.desc_addname('@', '@'))
+        return ret
+
+    def needs_arglist(self):
+        return False
+
+
+class PyDecoratorFunction(PyDecoratorMixin, PyModulelevel):
+    def run(self):
+        # a decorator function is a function after all
+        self.name = 'py:function'
+        return PyModulelevel.run(self)
+
+
+class PyDecoratorMethod(PyDecoratorMixin, PyClassmember):
+    def run(self):
+        self.name = 'py:method'
+        return PyClassmember.run(self)
+
+
+class PyCoroutineMixin(object):
+    def handle_signature(self, sig, signode):
+        ret = super(PyCoroutineMixin, self).handle_signature(sig, signode)
+        signode.insert(0, addnodes.desc_annotation('coroutine ', 'coroutine '))
+        return ret
+
+
+class PyCoroutineFunction(PyCoroutineMixin, PyModulelevel):
+    def run(self):
+        self.name = 'py:function'
+        return PyModulelevel.run(self)
+
+
+class PyCoroutineMethod(PyCoroutineMixin, PyClassmember):
+    def run(self):
+        self.name = 'py:method'
+        return PyClassmember.run(self)
+
+
+class PyAbstractMethod(PyClassmember):
+
+    def handle_signature(self, sig, signode):
+        ret = super(PyAbstractMethod, self).handle_signature(sig, signode)
+        signode.insert(0, addnodes.desc_annotation('abstractmethod ',
+                                                   'abstractmethod '))
+        return ret
+
+    def run(self):
+        self.name = 'py:method'
+        return PyClassmember.run(self)
+
+
+# Support for documenting version of removal in deprecations
+
+class DeprecatedRemoved(Directive):
+    has_content = True
+    required_arguments = 2
+    optional_arguments = 1
+    final_argument_whitespace = True
+    option_spec = {}
+
+    _label = 'Deprecated since version {deprecated}, will be removed in version {removed}'
+
+    def run(self):
+        node = addnodes.versionmodified()
+        node.document = self.state.document
+        node['type'] = 'deprecated-removed'
+        version = (self.arguments[0], self.arguments[1])
+        node['version'] = version
+        label = translators['sphinx'].gettext(self._label)
+        text = label.format(deprecated=self.arguments[0], removed=self.arguments[1])
+        if len(self.arguments) == 3:
+            inodes, messages = self.state.inline_text(self.arguments[2],
+                                                      self.lineno+1)
+            para = nodes.paragraph(self.arguments[2], '', *inodes, translatable=False)
+            node.append(para)
+        else:
+            messages = []
+        if self.content:
+            self.state.nested_parse(self.content, self.content_offset, node)
+        if len(node):
+            if isinstance(node[0], nodes.paragraph) and node[0].rawsource:
+                content = nodes.inline(node[0].rawsource, translatable=True)
+                content.source = node[0].source
+                content.line = node[0].line
+                content += node[0].children
+                node[0].replace_self(nodes.paragraph('', '', content, translatable=False))
+            node[0].insert(0, nodes.inline('', '%s: ' % text,
+                                           classes=['versionmodified']))
+        else:
+            para = nodes.paragraph('', '',
+                                   nodes.inline('', '%s.' % text,
+                                                classes=['versionmodified']),
+                                   translatable=False)
+            node.append(para)
+        env = self.state.document.settings.env
+        env.note_versionchange('deprecated', version[0], node, self.lineno)
+        return [node] + messages
+
+
+# Support for including Misc/NEWS
+
+issue_re = re.compile('(?:[Ii]ssue #|bpo-)([0-9]+)')
+whatsnew_re = re.compile(r"(?im)^what's new in (.*?)\??$")
+
+
+class MiscNews(Directive):
+    has_content = False
+    required_arguments = 1
+    optional_arguments = 0
+    final_argument_whitespace = False
+    option_spec = {}
+
+    def run(self):
+        fname = self.arguments[0]
+        source = self.state_machine.input_lines.source(
+            self.lineno - self.state_machine.input_offset - 1)
+        source_dir = getenv('PY_MISC_NEWS_DIR')
+        if not source_dir:
+            source_dir = path.dirname(path.abspath(source))
+        fpath = path.join(source_dir, fname)
+        self.state.document.settings.record_dependencies.add(fpath)
+        try:
+            fp = codecs.open(fpath, encoding='utf-8')
+            try:
+                content = fp.read()
+            finally:
+                fp.close()
+        except Exception:
+            text = 'The NEWS file is not available.'
+            node = nodes.strong(text, text)
+            return [node]
+        content = issue_re.sub(r'`bpo-\1 <https://bugs.python.org/issue\1>`__',
+                               content)
+        content = whatsnew_re.sub(r'\1', content)
+        # remove first 3 lines as they are the main heading
+        lines = ['.. default-role:: obj', ''] + content.splitlines()[3:]
+        self.state_machine.insert_input(lines, fname)
+        return []
+
+
+# Support for building "topic help" for pydoc
+
+pydoc_topic_labels = [
+    'assert', 'assignment', 'atom-identifiers', 'atom-literals',
+    'attribute-access', 'attribute-references', 'augassign', 'binary',
+    'bitwise', 'bltin-code-objects', 'bltin-ellipsis-object',
+    'bltin-null-object', 'bltin-type-objects', 'booleans',
+    'break', 'callable-types', 'calls', 'class', 'comparisons', 'compound',
+    'context-managers', 'continue', 'conversions', 'customization', 'debugger',
+    'del', 'dict', 'dynamic-features', 'else', 'exceptions', 'execmodel',
+    'exprlists', 'floating', 'for', 'formatstrings', 'function', 'global',
+    'id-classes', 'identifiers', 'if', 'imaginary', 'import', 'in', 'integers',
+    'lambda', 'lists', 'naming', 'nonlocal', 'numbers', 'numeric-types',
+    'objects', 'operator-summary', 'pass', 'power', 'raise', 'return',
+    'sequence-types', 'shifting', 'slicings', 'specialattrs', 'specialnames',
+    'string-methods', 'strings', 'subscriptions', 'truth', 'try', 'types',
+    'typesfunctions', 'typesmapping', 'typesmethods', 'typesmodules',
+    'typesseq', 'typesseq-mutable', 'unary', 'while', 'with', 'yield'
+]
+
+
+class PydocTopicsBuilder(Builder):
+    name = 'pydoc-topics'
+
+    default_translator_class = TextTranslator
+
+    def init(self):
+        self.topics = {}
+        self.secnumbers = {}
+
+    def get_outdated_docs(self):
+        return 'all pydoc topics'
+
+    def get_target_uri(self, docname, typ=None):
+        return ''  # no URIs
+
+    def write(self, *ignored):
+        try:  # sphinx>=1.6
+            from sphinx.util import status_iterator
+        except ImportError:  # sphinx<1.6
+            status_iterator = self.status_iterator
+
+        writer = TextWriter(self)
+        for label in status_iterator(pydoc_topic_labels,
+                                     'building topics... ',
+                                     length=len(pydoc_topic_labels)):
+            if label not in self.env.domaindata['std']['labels']:
+                self.warn('label %r not in documentation' % label)
+                continue
+            docname, labelid, sectname = self.env.domaindata['std']['labels'][label]
+            doctree = self.env.get_and_resolve_doctree(docname, self)
+            document = new_document('<section node>')
+            document.append(doctree.ids[labelid])
+            destination = StringOutput(encoding='utf-8')
+            writer.write(document, destination)
+            self.topics[label] = writer.output
+
+    def finish(self):
+        f = open(path.join(self.outdir, 'topics.py'), 'wb')
+        try:
+            f.write('# -*- coding: utf-8 -*-\n'.encode('utf-8'))
+            f.write(('# Autogenerated by Sphinx on %s\n' % asctime()).encode('utf-8'))
+            f.write(('topics = ' + pformat(self.topics) + '\n').encode('utf-8'))
+        finally:
+            f.close()
+
+
+# Support for documenting Opcodes
+
+opcode_sig_re = re.compile(r'(\w+(?:\+\d)?)(?:\s*\((.*)\))?')
+
+
+def parse_opcode_signature(env, sig, signode):
+    """Transform an opcode signature into RST nodes."""
+    m = opcode_sig_re.match(sig)
+    if m is None:
+        raise ValueError
+    opname, arglist = m.groups()
+    signode += addnodes.desc_name(opname, opname)
+    if arglist is not None:
+        paramlist = addnodes.desc_parameterlist()
+        signode += paramlist
+        paramlist += addnodes.desc_parameter(arglist, arglist)
+    return opname.strip()
+
+
+# Support for documenting pdb commands
+
+pdbcmd_sig_re = re.compile(r'([a-z()!]+)\s*(.*)')
+
+# later...
+# pdbargs_tokens_re = re.compile(r'''[a-zA-Z]+  |  # identifiers
+#                                   [.,:]+     |  # punctuation
+#                                   [\[\]()]   |  # parens
+#                                   \s+           # whitespace
+#                                   ''', re.X)
+
+
+def parse_pdb_command(env, sig, signode):
+    """Transform a pdb command signature into RST nodes."""
+    m = pdbcmd_sig_re.match(sig)
+    if m is None:
+        raise ValueError
+    name, args = m.groups()
+    fullname = name.replace('(', '').replace(')', '')
+    signode += addnodes.desc_name(name, name)
+    if args:
+        signode += addnodes.desc_addname(' '+args, ' '+args)
+    return fullname
+
+
+def setup(app):
+    app.add_role('issue', issue_role)
+    app.add_role('source', source_role)
+    app.add_directive('impl-detail', ImplementationDetail)
+    app.add_directive('deprecated-removed', DeprecatedRemoved)
+    app.add_builder(PydocTopicsBuilder)
+    app.add_builder(suspicious.CheckSuspiciousMarkupBuilder)
+    app.add_object_type('opcode', 'opcode', '%s (opcode)', parse_opcode_signature)
+    app.add_object_type('pdbcommand', 'pdbcmd', '%s (pdb command)', parse_pdb_command)
+    app.add_object_type('2to3fixer', '2to3fixer', '%s (2to3 fixer)')
+    app.add_directive_to_domain('py', 'decorator', PyDecoratorFunction)
+    app.add_directive_to_domain('py', 'decoratormethod', PyDecoratorMethod)
+    app.add_directive_to_domain('py', 'coroutinefunction', PyCoroutineFunction)
+    app.add_directive_to_domain('py', 'coroutinemethod', PyCoroutineMethod)
+    app.add_directive_to_domain('py', 'abstractmethod', PyAbstractMethod)
+    app.add_directive('miscnews', MiscNews)
+    return {'version': '1.0', 'parallel_read_safe': True}
--- a/third_party/python/Doc/tools/extensions/suspicious.py
+++ b/third_party/python/Doc/tools/extensions/suspicious.py
@ -0,0 +1,282 @@
+"""
+Try to detect suspicious constructs, resembling markup
+that has leaked into the final output.
+
+Suspicious lines are reported in a comma-separated-file,
+``suspicious.csv``, located in the output directory.
+
+The file is utf-8 encoded, and each line contains four fields:
+
+ * document name (normalized)
+ * line number in the source document
+ * problematic text
+ * complete line showing the problematic text in context
+
+It is common to find many false positives. To avoid reporting them
+again and again, they may be added to the ``ignored.csv`` file
+(located in the configuration directory). The file has the same
+format as ``suspicious.csv`` with a few differences:
+
+  - each line defines a rule; if the rule matches, the issue
+    is ignored.
+  - line number may be empty (that is, nothing between the
+    commas: ",,"). In this case, line numbers are ignored (the
+    rule matches anywhere in the file).
+  - the last field does not have to be a complete line; some
+    surrounding text (never more than a line) is enough for
+    context.
+
+Rules are processed sequentially. A rule matches when:
+
+ * document names are the same
+ * problematic texts are the same
+ * line numbers are close to each other (5 lines up or down)
+ * the rule text is completely contained into the source line
+
+The simplest way to create the ignored.csv file is by copying
+undesired entries from suspicious.csv (possibly trimming the last
+field.)
+
+Copyright 2009 Gabriel A. Genellina
+
+"""
+
+import os
+import re
+import csv
+import sys
+
+from docutils import nodes
+from sphinx.builders import Builder
+import sphinx.util
+
+try:  # sphinx>=1.6
+    from sphinx.util.logging import getLogger
+except ImportError:  # sphinx<1.6
+    from logging import getLogger
+
+
+detect_all = re.compile(r'''
+    ::(?=[^=])|            # two :: (but NOT ::=)
+    :[a-zA-Z][a-zA-Z0-9]+| # :foo
+    `|                     # ` (seldom used by itself)
+    (?<!\.)\.\.[ \t]*\w+:  # .. foo: (but NOT ... else:)
+    ''', re.UNICODE | re.VERBOSE).finditer
+
+py3 = sys.version_info >= (3, 0)
+
+
+class Rule:
+    def __init__(self, docname, lineno, issue, line):
+        """A rule for ignoring issues"""
+        self.docname = docname # document to which this rule applies
+        self.lineno = lineno   # line number in the original source;
+                               # this rule matches only near that.
+                               # None -> don't care
+        self.issue = issue     # the markup fragment that triggered this rule
+        self.line = line       # text of the container element (single line only)
+        self.used = False
+
+    def __repr__(self):
+        return '{0.docname},,{0.issue},{0.line}'.format(self)
+
+
+
+class dialect(csv.excel):
+    """Our dialect: uses only linefeed as newline."""
+    lineterminator = '\n'
+
+
+class CheckSuspiciousMarkupBuilder(Builder):
+    """
+    Checks for possibly invalid markup that may leak into the output.
+    """
+    name = 'suspicious'
+    logger = getLogger("CheckSuspiciousMarkupBuilder")
+
+    def init(self):
+        # create output file
+        self.log_file_name = os.path.join(self.outdir, 'suspicious.csv')
+        open(self.log_file_name, 'w').close()
+        # load database of previously ignored issues
+        self.load_rules(os.path.join(os.path.dirname(__file__), '..',
+                                     'susp-ignored.csv'))
+
+    def get_outdated_docs(self):
+        return self.env.found_docs
+
+    def get_target_uri(self, docname, typ=None):
+        return ''
+
+    def prepare_writing(self, docnames):
+        pass
+
+    def write_doc(self, docname, doctree):
+        # set when any issue is encountered in this document
+        self.any_issue = False
+        self.docname = docname
+        visitor = SuspiciousVisitor(doctree, self)
+        doctree.walk(visitor)
+
+    def finish(self):
+        unused_rules = [rule for rule in self.rules if not rule.used]
+        if unused_rules:
+            self.warn('Found %s/%s unused rules:' %
+                      (len(unused_rules), len(self.rules)))
+            for rule in unused_rules:
+                self.logger.info(repr(rule))
+        return
+
+    def check_issue(self, line, lineno, issue):
+        if not self.is_ignored(line, lineno, issue):
+            self.report_issue(line, lineno, issue)
+
+    def is_ignored(self, line, lineno, issue):
+        """Determine whether this issue should be ignored."""
+        docname = self.docname
+        for rule in self.rules:
+            if rule.docname != docname: continue
+            if rule.issue != issue: continue
+            # Both lines must match *exactly*. This is rather strict,
+            # and probably should be improved.
+            # Doing fuzzy matches with levenshtein distance could work,
+            # but that means bringing other libraries...
+            # Ok, relax that requirement: just check if the rule fragment
+            # is contained in the document line
+            if rule.line not in line: continue
+            # Check both line numbers. If they're "near"
+            # this rule matches. (lineno=None means "don't care")
+            if (rule.lineno is not None) and \
+                abs(rule.lineno - lineno) > 5: continue
+            # if it came this far, the rule matched
+            rule.used = True
+            return True
+        return False
+
+    def report_issue(self, text, lineno, issue):
+        if not self.any_issue: self.logger.info()
+        self.any_issue = True
+        self.write_log_entry(lineno, issue, text)
+        if py3:
+            self.warn('[%s:%d] "%s" found in "%-.120s"' %
+                      (self.docname, lineno, issue, text))
+        else:
+            self.warn('[%s:%d] "%s" found in "%-.120s"' % (
+                self.docname.encode(sys.getdefaultencoding(),'replace'),
+                lineno,
+                issue.encode(sys.getdefaultencoding(),'replace'),
+                text.strip().encode(sys.getdefaultencoding(),'replace')))
+        self.app.statuscode = 1
+
+    def write_log_entry(self, lineno, issue, text):
+        if py3:
+            f = open(self.log_file_name, 'a')
+            writer = csv.writer(f, dialect)
+            writer.writerow([self.docname, lineno, issue, text.strip()])
+            f.close()
+        else:
+            f = open(self.log_file_name, 'ab')
+            writer = csv.writer(f, dialect)
+            writer.writerow([self.docname.encode('utf-8'),
+                             lineno,
+                             issue.encode('utf-8'),
+                             text.strip().encode('utf-8')])
+            f.close()
+
+    def load_rules(self, filename):
+        """Load database of previously ignored issues.
+
+        A csv file, with exactly the same format as suspicious.csv
+        Fields: document name (normalized), line number, issue, surrounding text
+        """
+        self.logger.info("loading ignore rules... ", nonl=1)
+        self.rules = rules = []
+        try:
+            if py3:
+                f = open(filename, 'r')
+            else:
+                f = open(filename, 'rb')
+        except IOError:
+            return
+        for i, row in enumerate(csv.reader(f)):
+            if len(row) != 4:
+                raise ValueError(
+                    "wrong format in %s, line %d: %s" % (filename, i+1, row))
+            docname, lineno, issue, text = row
+            if lineno:
+                lineno = int(lineno)
+            else:
+                lineno = None
+            if not py3:
+                docname = docname.decode('utf-8')
+                issue = issue.decode('utf-8')
+                text = text.decode('utf-8')
+            rule = Rule(docname, lineno, issue, text)
+            rules.append(rule)
+        f.close()
+        self.logger.info('done, %d rules loaded' % len(self.rules))
+
+
+def get_lineno(node):
+    """Obtain line number information for a node."""
+    lineno = None
+    while lineno is None and node:
+        node = node.parent
+        lineno = node.line
+    return lineno
+
+
+def extract_line(text, index):
+    """text may be a multiline string; extract
+    only the line containing the given character index.
+
+    >>> extract_line("abc\ndefgh\ni", 6)
+    >>> 'defgh'
+    >>> for i in (0, 2, 3, 4, 10):
+    ...   print extract_line("abc\ndefgh\ni", i)
+    abc
+    abc
+    abc
+    defgh
+    defgh
+    i
+    """
+    p = text.rfind('\n', 0, index) + 1
+    q = text.find('\n', index)
+    if q < 0:
+        q = len(text)
+    return text[p:q]
+
+
+class SuspiciousVisitor(nodes.GenericNodeVisitor):
+
+    lastlineno = 0
+
+    def __init__(self, document, builder):
+        nodes.GenericNodeVisitor.__init__(self, document)
+        self.builder = builder
+
+    def default_visit(self, node):
+        if isinstance(node, (nodes.Text, nodes.image)): # direct text containers
+            text = node.astext()
+            # lineno seems to go backwards sometimes (?)
+            self.lastlineno = lineno = max(get_lineno(node) or 0, self.lastlineno)
+            seen = set() # don't report the same issue more than only once per line
+            for match in detect_all(text):
+                issue = match.group()
+                line = extract_line(text, match.start())
+                if (issue, line) not in seen:
+                    self.builder.check_issue(line, lineno, issue)
+                    seen.add((issue, line))
+
+    unknown_visit = default_visit
+
+    def visit_document(self, node):
+        self.lastlineno = 0
+
+    def visit_comment(self, node):
+        # ignore comments -- too much false positives.
+        # (although doing this could miss some errors;
+        # there were two sections "commented-out" by mistake
+        # in the Python docs that would not be caught)
+        raise nodes.SkipNode