python-3.6.zip added from Github

README.cosmo contains the necessary links.
2025-05-22 21:32:31 +00:00 · 2021-08-08 09:38:33 +05:30 · 2021-08-08 09:38:33 +05:30 · 0c4c56ff39
commit 0c4c56ff39
parent 75fc601ff5
4219 changed files with 1968626 additions and 0 deletions
--- a/third_party/python/Parser/Python.asdl
+++ b/third_party/python/Parser/Python.asdl
@ -0,0 +1,132 @@
+-- ASDL's 7 builtin types are:
+-- identifier, int, string, bytes, object, singleton, constant
+--
+-- singleton: None, True or False
+-- constant can be None, whereas None means "no value" for object.
+
+module Python
+{
+    mod = Module(stmt* body)
+        | Interactive(stmt* body)
+        | Expression(expr body)
+
+        -- not really an actual node but useful in Jython's typesystem.
+        | Suite(stmt* body)
+
+    stmt = FunctionDef(identifier name, arguments args,
+                       stmt* body, expr* decorator_list, expr? returns)
+          | AsyncFunctionDef(identifier name, arguments args,
+                             stmt* body, expr* decorator_list, expr? returns)
+
+          | ClassDef(identifier name,
+             expr* bases,
+             keyword* keywords,
+             stmt* body,
+             expr* decorator_list)
+          | Return(expr? value)
+
+          | Delete(expr* targets)
+          | Assign(expr* targets, expr value)
+          | AugAssign(expr target, operator op, expr value)
+          -- 'simple' indicates that we annotate simple name without parens
+          | AnnAssign(expr target, expr annotation, expr? value, int simple)
+
+          -- use 'orelse' because else is a keyword in target languages
+          | For(expr target, expr iter, stmt* body, stmt* orelse)
+          | AsyncFor(expr target, expr iter, stmt* body, stmt* orelse)
+          | While(expr test, stmt* body, stmt* orelse)
+          | If(expr test, stmt* body, stmt* orelse)
+          | With(withitem* items, stmt* body)
+          | AsyncWith(withitem* items, stmt* body)
+
+          | Raise(expr? exc, expr? cause)
+          | Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody)
+          | Assert(expr test, expr? msg)
+
+          | Import(alias* names)
+          | ImportFrom(identifier? module, alias* names, int? level)
+
+          | Global(identifier* names)
+          | Nonlocal(identifier* names)
+          | Expr(expr value)
+          | Pass | Break | Continue
+
+          -- XXX Jython will be different
+          -- col_offset is the byte offset in the utf8 string the parser uses
+          attributes (int lineno, int col_offset)
+
+          -- BoolOp() can use left & right?
+    expr = BoolOp(boolop op, expr* values)
+         | BinOp(expr left, operator op, expr right)
+         | UnaryOp(unaryop op, expr operand)
+         | Lambda(arguments args, expr body)
+         | IfExp(expr test, expr body, expr orelse)
+         | Dict(expr* keys, expr* values)
+         | Set(expr* elts)
+         | ListComp(expr elt, comprehension* generators)
+         | SetComp(expr elt, comprehension* generators)
+         | DictComp(expr key, expr value, comprehension* generators)
+         | GeneratorExp(expr elt, comprehension* generators)
+         -- the grammar constrains where yield expressions can occur
+         | Await(expr value)
+         | Yield(expr? value)
+         | YieldFrom(expr value)
+         -- need sequences for compare to distinguish between
+         -- x < 4 < 3 and (x < 4) < 3
+         | Compare(expr left, cmpop* ops, expr* comparators)
+         | Call(expr func, expr* args, keyword* keywords)
+         | Num(object n) -- a number as a PyObject.
+         | Str(string s) -- need to specify raw, unicode, etc?
+         | FormattedValue(expr value, int? conversion, expr? format_spec)
+         | JoinedStr(expr* values)
+         | Bytes(bytes s)
+         | NameConstant(singleton value)
+         | Ellipsis
+         | Constant(constant value)
+
+         -- the following expression can appear in assignment context
+         | Attribute(expr value, identifier attr, expr_context ctx)
+         | Subscript(expr value, slice slice, expr_context ctx)
+         | Starred(expr value, expr_context ctx)
+         | Name(identifier id, expr_context ctx)
+         | List(expr* elts, expr_context ctx)
+         | Tuple(expr* elts, expr_context ctx)
+
+          -- col_offset is the byte offset in the utf8 string the parser uses
+          attributes (int lineno, int col_offset)
+
+    expr_context = Load | Store | Del | AugLoad | AugStore | Param
+
+    slice = Slice(expr? lower, expr? upper, expr? step)
+          | ExtSlice(slice* dims)
+          | Index(expr value)
+
+    boolop = And | Or
+
+    operator = Add | Sub | Mult | MatMult | Div | Mod | Pow | LShift
+                 | RShift | BitOr | BitXor | BitAnd | FloorDiv
+
+    unaryop = Invert | Not | UAdd | USub
+
+    cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn
+
+    comprehension = (expr target, expr iter, expr* ifs, int is_async)
+
+    excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body)
+                    attributes (int lineno, int col_offset)
+
+    arguments = (arg* args, arg? vararg, arg* kwonlyargs, expr* kw_defaults,
+                 arg? kwarg, expr* defaults)
+
+    arg = (identifier arg, expr? annotation)
+           attributes (int lineno, int col_offset)
+
+    -- keyword arguments supplied to call (NULL identifier for **kwargs)
+    keyword = (identifier? arg, expr value)
+
+    -- import name with optional 'as' alias.
+    alias = (identifier name, identifier? asname)
+
+    withitem = (expr context_expr, expr? optional_vars)
+}
+
--- a/third_party/python/Parser/acceler.c
+++ b/third_party/python/Parser/acceler.c
@ -0,0 +1,125 @@
+
+/* Parser accelerator module */
+
+/* The parser as originally conceived had disappointing performance.
+   This module does some precomputation that speeds up the selection
+   of a DFA based upon a token, turning a search through an array
+   into a simple indexing operation.  The parser now cannot work
+   without the accelerators installed.  Note that the accelerators
+   are installed dynamically when the parser is initialized, they
+   are not part of the static data structure written on graminit.[ch]
+   by the parser generator. */
+
+#include "pgenheaders.h"
+#include "grammar.h"
+#include "node.h"
+#include "token.h"
+#include "parser.h"
+
+/* Forward references */
+static void fixdfa(grammar *, dfa *);
+static void fixstate(grammar *, state *);
+
+void
+PyGrammar_AddAccelerators(grammar *g)
+{
+    dfa *d;
+    int i;
+    d = g->g_dfa;
+    for (i = g->g_ndfas; --i >= 0; d++)
+        fixdfa(g, d);
+    g->g_accel = 1;
+}
+
+void
+PyGrammar_RemoveAccelerators(grammar *g)
+{
+    dfa *d;
+    int i;
+    g->g_accel = 0;
+    d = g->g_dfa;
+    for (i = g->g_ndfas; --i >= 0; d++) {
+        state *s;
+        int j;
+        s = d->d_state;
+        for (j = 0; j < d->d_nstates; j++, s++) {
+            if (s->s_accel)
+                PyObject_FREE(s->s_accel);
+            s->s_accel = NULL;
+        }
+    }
+}
+
+static void
+fixdfa(grammar *g, dfa *d)
+{
+    state *s;
+    int j;
+    s = d->d_state;
+    for (j = 0; j < d->d_nstates; j++, s++)
+        fixstate(g, s);
+}
+
+static void
+fixstate(grammar *g, state *s)
+{
+    arc *a;
+    int k;
+    int *accel;
+    int nl = g->g_ll.ll_nlabels;
+    s->s_accept = 0;
+    accel = (int *) PyObject_MALLOC(nl * sizeof(int));
+    if (accel == NULL) {
+        fprintf(stderr, "no mem to build parser accelerators\n");
+        exit(1);
+    }
+    for (k = 0; k < nl; k++)
+        accel[k] = -1;
+    a = s->s_arc;
+    for (k = s->s_narcs; --k >= 0; a++) {
+        int lbl = a->a_lbl;
+        label *l = &g->g_ll.ll_label[lbl];
+        int type = l->lb_type;
+        if (a->a_arrow >= (1 << 7)) {
+            printf("XXX too many states!\n");
+            continue;
+        }
+        if (ISNONTERMINAL(type)) {
+            dfa *d1 = PyGrammar_FindDFA(g, type);
+            int ibit;
+            if (type - NT_OFFSET >= (1 << 7)) {
+                printf("XXX too high nonterminal number!\n");
+                continue;
+            }
+            for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) {
+                if (testbit(d1->d_first, ibit)) {
+                    if (accel[ibit] != -1)
+                        printf("XXX ambiguity!\n");
+                    accel[ibit] = a->a_arrow | (1 << 7) |
+                        ((type - NT_OFFSET) << 8);
+                }
+            }
+        }
+        else if (lbl == EMPTY)
+            s->s_accept = 1;
+        else if (lbl >= 0 && lbl < nl)
+            accel[lbl] = a->a_arrow;
+    }
+    while (nl > 0 && accel[nl-1] == -1)
+        nl--;
+    for (k = 0; k < nl && accel[k] == -1;)
+        k++;
+    if (k < nl) {
+        int i;
+        s->s_accel = (int *) PyObject_MALLOC((nl-k) * sizeof(int));
+        if (s->s_accel == NULL) {
+            fprintf(stderr, "no mem to add parser accelerators\n");
+            exit(1);
+        }
+        s->s_lower = k;
+        s->s_upper = nl;
+        for (i = 0; k < nl; i++, k++)
+            s->s_accel[i] = accel[k];
+    }
+    PyObject_FREE(accel);
+}
--- a/third_party/python/Parser/asdl.py
+++ b/third_party/python/Parser/asdl.py
@ -0,0 +1,376 @@
+#-------------------------------------------------------------------------------
+# Parser for ASDL [1] definition files. Reads in an ASDL description and parses
+# it into an AST that describes it.
+#
+# The EBNF we're parsing here: Figure 1 of the paper [1]. Extended to support
+# modules and attributes after a product. Words starting with Capital letters
+# are terminals. Literal tokens are in "double quotes". Others are
+# non-terminals. Id is either TokenId or ConstructorId.
+#
+# module        ::= "module" Id "{" [definitions] "}"
+# definitions   ::= { TypeId "=" type }
+# type          ::= product | sum
+# product       ::= fields ["attributes" fields]
+# fields        ::= "(" { field, "," } field ")"
+# field         ::= TypeId ["?" | "*"] [Id]
+# sum           ::= constructor { "|" constructor } ["attributes" fields]
+# constructor   ::= ConstructorId [fields]
+#
+# [1] "The Zephyr Abstract Syntax Description Language" by Wang, et. al. See
+#     http://asdl.sourceforge.net/
+#-------------------------------------------------------------------------------
+from collections import namedtuple
+import re
+
+__all__ = [
+    'builtin_types', 'parse', 'AST', 'Module', 'Type', 'Constructor',
+    'Field', 'Sum', 'Product', 'VisitorBase', 'Check', 'check']
+
+# The following classes define nodes into which the ASDL description is parsed.
+# Note: this is a "meta-AST". ASDL files (such as Python.asdl) describe the AST
+# structure used by a programming language. But ASDL files themselves need to be
+# parsed. This module parses ASDL files and uses a simple AST to represent them.
+# See the EBNF at the top of the file to understand the logical connection
+# between the various node types.
+
+builtin_types = {'identifier', 'string', 'bytes', 'int', 'object', 'singleton',
+                 'constant'}
+
+class AST:
+    def __repr__(self):
+        raise NotImplementedError
+
+class Module(AST):
+    def __init__(self, name, dfns):
+        self.name = name
+        self.dfns = dfns
+        self.types = {type.name: type.value for type in dfns}
+
+    def __repr__(self):
+        return 'Module({0.name}, {0.dfns})'.format(self)
+
+class Type(AST):
+    def __init__(self, name, value):
+        self.name = name
+        self.value = value
+
+    def __repr__(self):
+        return 'Type({0.name}, {0.value})'.format(self)
+
+class Constructor(AST):
+    def __init__(self, name, fields=None):
+        self.name = name
+        self.fields = fields or []
+
+    def __repr__(self):
+        return 'Constructor({0.name}, {0.fields})'.format(self)
+
+class Field(AST):
+    def __init__(self, type, name=None, seq=False, opt=False):
+        self.type = type
+        self.name = name
+        self.seq = seq
+        self.opt = opt
+
+    def __repr__(self):
+        if self.seq:
+            extra = ", seq=True"
+        elif self.opt:
+            extra = ", opt=True"
+        else:
+            extra = ""
+        if self.name is None:
+            return 'Field({0.type}{1})'.format(self, extra)
+        else:
+            return 'Field({0.type}, {0.name}{1})'.format(self, extra)
+
+class Sum(AST):
+    def __init__(self, types, attributes=None):
+        self.types = types
+        self.attributes = attributes or []
+
+    def __repr__(self):
+        if self.attributes:
+            return 'Sum({0.types}, {0.attributes})'.format(self)
+        else:
+            return 'Sum({0.types})'.format(self)
+
+class Product(AST):
+    def __init__(self, fields, attributes=None):
+        self.fields = fields
+        self.attributes = attributes or []
+
+    def __repr__(self):
+        if self.attributes:
+            return 'Product({0.fields}, {0.attributes})'.format(self)
+        else:
+            return 'Product({0.fields})'.format(self)
+
+# A generic visitor for the meta-AST that describes ASDL. This can be used by
+# emitters. Note that this visitor does not provide a generic visit method, so a
+# subclass needs to define visit methods from visitModule to as deep as the
+# interesting node.
+# We also define a Check visitor that makes sure the parsed ASDL is well-formed.
+
+class VisitorBase(object):
+    """Generic tree visitor for ASTs."""
+    def __init__(self):
+        self.cache = {}
+
+    def visit(self, obj, *args):
+        klass = obj.__class__
+        meth = self.cache.get(klass)
+        if meth is None:
+            methname = "visit" + klass.__name__
+            meth = getattr(self, methname, None)
+            self.cache[klass] = meth
+        if meth:
+            try:
+                meth(obj, *args)
+            except Exception as e:
+                print("Error visiting %r: %s" % (obj, e))
+                raise
+
+class Check(VisitorBase):
+    """A visitor that checks a parsed ASDL tree for correctness.
+
+    Errors are printed and accumulated.
+    """
+    def __init__(self):
+        super(Check, self).__init__()
+        self.cons = {}
+        self.errors = 0
+        self.types = {}
+
+    def visitModule(self, mod):
+        for dfn in mod.dfns:
+            self.visit(dfn)
+
+    def visitType(self, type):
+        self.visit(type.value, str(type.name))
+
+    def visitSum(self, sum, name):
+        for t in sum.types:
+            self.visit(t, name)
+
+    def visitConstructor(self, cons, name):
+        key = str(cons.name)
+        conflict = self.cons.get(key)
+        if conflict is None:
+            self.cons[key] = name
+        else:
+            print('Redefinition of constructor {}'.format(key))
+            print('Defined in {} and {}'.format(conflict, name))
+            self.errors += 1
+        for f in cons.fields:
+            self.visit(f, key)
+
+    def visitField(self, field, name):
+        key = str(field.type)
+        l = self.types.setdefault(key, [])
+        l.append(name)
+
+    def visitProduct(self, prod, name):
+        for f in prod.fields:
+            self.visit(f, name)
+
+def check(mod):
+    """Check the parsed ASDL tree for correctness.
+
+    Return True if success. For failure, the errors are printed out and False
+    is returned.
+    """
+    v = Check()
+    v.visit(mod)
+
+    for t in v.types:
+        if t not in mod.types and not t in builtin_types:
+            v.errors += 1
+            uses = ", ".join(v.types[t])
+            print('Undefined type {}, used in {}'.format(t, uses))
+    return not v.errors
+
+# The ASDL parser itself comes next. The only interesting external interface
+# here is the top-level parse function.
+
+def parse(filename):
+    """Parse ASDL from the given file and return a Module node describing it."""
+    with open(filename) as f:
+        parser = ASDLParser()
+        return parser.parse(f.read())
+
+# Types for describing tokens in an ASDL specification.
+class TokenKind:
+    """TokenKind is provides a scope for enumerated token kinds."""
+    (ConstructorId, TypeId, Equals, Comma, Question, Pipe, Asterisk,
+     LParen, RParen, LBrace, RBrace) = range(11)
+
+    operator_table = {
+        '=': Equals, ',': Comma,    '?': Question, '|': Pipe,    '(': LParen,
+        ')': RParen, '*': Asterisk, '{': LBrace,   '}': RBrace}
+
+Token = namedtuple('Token', 'kind value lineno')
+
+class ASDLSyntaxError(Exception):
+    def __init__(self, msg, lineno=None):
+        self.msg = msg
+        self.lineno = lineno or '<unknown>'
+
+    def __str__(self):
+        return 'Syntax error on line {0.lineno}: {0.msg}'.format(self)
+
+def tokenize_asdl(buf):
+    """Tokenize the given buffer. Yield Token objects."""
+    for lineno, line in enumerate(buf.splitlines(), 1):
+        for m in re.finditer(r'\s*(\w+|--.*|.)', line.strip()):
+            c = m.group(1)
+            if c[0].isalpha():
+                # Some kind of identifier
+                if c[0].isupper():
+                    yield Token(TokenKind.ConstructorId, c, lineno)
+                else:
+                    yield Token(TokenKind.TypeId, c, lineno)
+            elif c[:2] == '--':
+                # Comment
+                break
+            else:
+                # Operators
+                try:
+                    op_kind = TokenKind.operator_table[c]
+                except KeyError:
+                    raise ASDLSyntaxError('Invalid operator %s' % c, lineno)
+                yield Token(op_kind, c, lineno)
+
+class ASDLParser:
+    """Parser for ASDL files.
+
+    Create, then call the parse method on a buffer containing ASDL.
+    This is a simple recursive descent parser that uses tokenize_asdl for the
+    lexing.
+    """
+    def __init__(self):
+        self._tokenizer = None
+        self.cur_token = None
+
+    def parse(self, buf):
+        """Parse the ASDL in the buffer and return an AST with a Module root.
+        """
+        self._tokenizer = tokenize_asdl(buf)
+        self._advance()
+        return self._parse_module()
+
+    def _parse_module(self):
+        if self._at_keyword('module'):
+            self._advance()
+        else:
+            raise ASDLSyntaxError(
+                'Expected "module" (found {})'.format(self.cur_token.value),
+                self.cur_token.lineno)
+        name = self._match(self._id_kinds)
+        self._match(TokenKind.LBrace)
+        defs = self._parse_definitions()
+        self._match(TokenKind.RBrace)
+        return Module(name, defs)
+
+    def _parse_definitions(self):
+        defs = []
+        while self.cur_token.kind == TokenKind.TypeId:
+            typename = self._advance()
+            self._match(TokenKind.Equals)
+            type = self._parse_type()
+            defs.append(Type(typename, type))
+        return defs
+
+    def _parse_type(self):
+        if self.cur_token.kind == TokenKind.LParen:
+            # If we see a (, it's a product
+            return self._parse_product()
+        else:
+            # Otherwise it's a sum. Look for ConstructorId
+            sumlist = [Constructor(self._match(TokenKind.ConstructorId),
+                                   self._parse_optional_fields())]
+            while self.cur_token.kind  == TokenKind.Pipe:
+                # More constructors
+                self._advance()
+                sumlist.append(Constructor(
+                                self._match(TokenKind.ConstructorId),
+                                self._parse_optional_fields()))
+            return Sum(sumlist, self._parse_optional_attributes())
+
+    def _parse_product(self):
+        return Product(self._parse_fields(), self._parse_optional_attributes())
+
+    def _parse_fields(self):
+        fields = []
+        self._match(TokenKind.LParen)
+        while self.cur_token.kind == TokenKind.TypeId:
+            typename = self._advance()
+            is_seq, is_opt = self._parse_optional_field_quantifier()
+            id = (self._advance() if self.cur_token.kind in self._id_kinds
+                                  else None)
+            fields.append(Field(typename, id, seq=is_seq, opt=is_opt))
+            if self.cur_token.kind == TokenKind.RParen:
+                break
+            elif self.cur_token.kind == TokenKind.Comma:
+                self._advance()
+        self._match(TokenKind.RParen)
+        return fields
+
+    def _parse_optional_fields(self):
+        if self.cur_token.kind == TokenKind.LParen:
+            return self._parse_fields()
+        else:
+            return None
+
+    def _parse_optional_attributes(self):
+        if self._at_keyword('attributes'):
+            self._advance()
+            return self._parse_fields()
+        else:
+            return None
+
+    def _parse_optional_field_quantifier(self):
+        is_seq, is_opt = False, False
+        if self.cur_token.kind == TokenKind.Asterisk:
+            is_seq = True
+            self._advance()
+        elif self.cur_token.kind == TokenKind.Question:
+            is_opt = True
+            self._advance()
+        return is_seq, is_opt
+
+    def _advance(self):
+        """ Return the value of the current token and read the next one into
+            self.cur_token.
+        """
+        cur_val = None if self.cur_token is None else self.cur_token.value
+        try:
+            self.cur_token = next(self._tokenizer)
+        except StopIteration:
+            self.cur_token = None
+        return cur_val
+
+    _id_kinds = (TokenKind.ConstructorId, TokenKind.TypeId)
+
+    def _match(self, kind):
+        """The 'match' primitive of RD parsers.
+
+        * Verifies that the current token is of the given kind (kind can
+          be a tuple, in which the kind must match one of its members).
+        * Returns the value of the current token
+        * Reads in the next token
+        """
+        if (isinstance(kind, tuple) and self.cur_token.kind in kind or
+            self.cur_token.kind == kind
+            ):
+            value = self.cur_token.value
+            self._advance()
+            return value
+        else:
+            raise ASDLSyntaxError(
+                'Unmatched {} (found {})'.format(kind, self.cur_token.kind),
+                self.cur_token.lineno)
+
+    def _at_keyword(self, keyword):
+        return (self.cur_token.kind == TokenKind.TypeId and
+                self.cur_token.value == keyword)
--- a/third_party/python/Parser/asdl_c.py
+++ b/third_party/python/Parser/asdl_c.py
--- a/third_party/python/Parser/bitset.c
+++ b/third_party/python/Parser/bitset.c
@ -0,0 +1,66 @@
+
+/* Bitset primitives used by the parser generator */
+
+#include "pgenheaders.h"
+#include "bitset.h"
+
+bitset
+newbitset(int nbits)
+{
+    int nbytes = NBYTES(nbits);
+    bitset ss = (char *)PyObject_MALLOC(sizeof(BYTE) *  nbytes);
+
+    if (ss == NULL)
+        Py_FatalError("no mem for bitset");
+
+    ss += nbytes;
+    while (--nbytes >= 0)
+        *--ss = 0;
+    return ss;
+}
+
+void
+delbitset(bitset ss)
+{
+    PyObject_FREE(ss);
+}
+
+int
+addbit(bitset ss, int ibit)
+{
+    int ibyte = BIT2BYTE(ibit);
+    BYTE mask = BIT2MASK(ibit);
+
+    if (ss[ibyte] & mask)
+        return 0; /* Bit already set */
+    ss[ibyte] |= mask;
+    return 1;
+}
+
+#if 0 /* Now a macro */
+int
+testbit(bitset ss, int ibit)
+{
+    return (ss[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0;
+}
+#endif
+
+int
+samebitset(bitset ss1, bitset ss2, int nbits)
+{
+    int i;
+
+    for (i = NBYTES(nbits); --i >= 0; )
+        if (*ss1++ != *ss2++)
+            return 0;
+    return 1;
+}
+
+void
+mergebitset(bitset ss1, bitset ss2, int nbits)
+{
+    int i;
+
+    for (i = NBYTES(nbits); --i >= 0; )
+        *ss1++ |= *ss2++;
+}
--- a/third_party/python/Parser/firstsets.c
+++ b/third_party/python/Parser/firstsets.c
@ -0,0 +1,113 @@
+
+/* Computation of FIRST stets */
+
+#include "pgenheaders.h"
+#include "grammar.h"
+#include "token.h"
+
+extern int Py_DebugFlag;
+
+/* Forward */
+static void calcfirstset(grammar *, dfa *);
+
+void
+addfirstsets(grammar *g)
+{
+    int i;
+    dfa *d;
+
+    if (Py_DebugFlag)
+        printf("Adding FIRST sets ...\n");
+    for (i = 0; i < g->g_ndfas; i++) {
+        d = &g->g_dfa[i];
+        if (d->d_first == NULL)
+            calcfirstset(g, d);
+    }
+}
+
+static void
+calcfirstset(grammar *g, dfa *d)
+{
+    int i, j;
+    state *s;
+    arc *a;
+    int nsyms;
+    int *sym;
+    int nbits;
+    static bitset dummy;
+    bitset result;
+    int type;
+    dfa *d1;
+    label *l0;
+
+    if (Py_DebugFlag)
+        printf("Calculate FIRST set for '%s'\n", d->d_name);
+
+    if (dummy == NULL)
+        dummy = newbitset(1);
+    if (d->d_first == dummy) {
+        fprintf(stderr, "Left-recursion for '%s'\n", d->d_name);
+        return;
+    }
+    if (d->d_first != NULL) {
+        fprintf(stderr, "Re-calculating FIRST set for '%s' ???\n",
+            d->d_name);
+    }
+    d->d_first = dummy;
+
+    l0 = g->g_ll.ll_label;
+    nbits = g->g_ll.ll_nlabels;
+    result = newbitset(nbits);
+
+    sym = (int *)PyObject_MALLOC(sizeof(int));
+    if (sym == NULL)
+        Py_FatalError("no mem for new sym in calcfirstset");
+    nsyms = 1;
+    sym[0] = findlabel(&g->g_ll, d->d_type, (char *)NULL);
+
+    s = &d->d_state[d->d_initial];
+    for (i = 0; i < s->s_narcs; i++) {
+        a = &s->s_arc[i];
+        for (j = 0; j < nsyms; j++) {
+            if (sym[j] == a->a_lbl)
+                break;
+        }
+        if (j >= nsyms) { /* New label */
+            sym = (int *)PyObject_REALLOC(sym,
+                                    sizeof(int) * (nsyms + 1));
+            if (sym == NULL)
+                Py_FatalError(
+                    "no mem to resize sym in calcfirstset");
+            sym[nsyms++] = a->a_lbl;
+            type = l0[a->a_lbl].lb_type;
+            if (ISNONTERMINAL(type)) {
+                d1 = PyGrammar_FindDFA(g, type);
+                if (d1->d_first == dummy) {
+                    fprintf(stderr,
+                        "Left-recursion below '%s'\n",
+                        d->d_name);
+                }
+                else {
+                    if (d1->d_first == NULL)
+                        calcfirstset(g, d1);
+                    mergebitset(result,
+                                d1->d_first, nbits);
+                }
+            }
+            else if (ISTERMINAL(type)) {
+                addbit(result, a->a_lbl);
+            }
+        }
+    }
+    d->d_first = result;
+    if (Py_DebugFlag) {
+        printf("FIRST set for '%s': {", d->d_name);
+        for (i = 0; i < nbits; i++) {
+            if (testbit(result, i))
+                printf(" %s", PyGrammar_LabelRepr(&l0[i]));
+        }
+        printf(" }\n");
+    }
+
+    PyObject_FREE(sym);
+}
--- a/third_party/python/Parser/grammar.c
+++ b/third_party/python/Parser/grammar.c
@ -0,0 +1,273 @@
+
+/* Grammar implementation */
+
+#include "Python.h"
+#include "pgenheaders.h"
+
+#include <ctype.h>
+
+#include "token.h"
+#include "grammar.h"
+
+extern int Py_DebugFlag;
+
+grammar *
+newgrammar(int start)
+{
+    grammar *g;
+
+    g = (grammar *)PyObject_MALLOC(sizeof(grammar));
+    if (g == NULL)
+        Py_FatalError("no mem for new grammar");
+    g->g_ndfas = 0;
+    g->g_dfa = NULL;
+    g->g_start = start;
+    g->g_ll.ll_nlabels = 0;
+    g->g_ll.ll_label = NULL;
+    g->g_accel = 0;
+    return g;
+}
+
+void
+freegrammar(grammar *g)
+{
+    int i;
+    for (i = 0; i < g->g_ndfas; i++) {
+        free(g->g_dfa[i].d_name);
+        for (int j = 0; j < g->g_dfa[i].d_nstates; j++)
+            PyObject_FREE(g->g_dfa[i].d_state[j].s_arc);
+        PyObject_FREE(g->g_dfa[i].d_state);
+    }
+    PyObject_FREE(g->g_dfa);
+    for (i = 0; i < g->g_ll.ll_nlabels; i++)
+        free(g->g_ll.ll_label[i].lb_str);
+    PyObject_FREE(g->g_ll.ll_label);
+    PyObject_FREE(g);
+}
+
+dfa *
+adddfa(grammar *g, int type, const char *name)
+{
+    dfa *d;
+
+    g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa,
+                                        sizeof(dfa) * (g->g_ndfas + 1));
+    if (g->g_dfa == NULL)
+        Py_FatalError("no mem to resize dfa in adddfa");
+    d = &g->g_dfa[g->g_ndfas++];
+    d->d_type = type;
+    d->d_name = strdup(name);
+    d->d_nstates = 0;
+    d->d_state = NULL;
+    d->d_initial = -1;
+    d->d_first = NULL;
+    return d; /* Only use while fresh! */
+}
+
+int
+addstate(dfa *d)
+{
+    state *s;
+
+    d->d_state = (state *)PyObject_REALLOC(d->d_state,
+                                  sizeof(state) * (d->d_nstates + 1));
+    if (d->d_state == NULL)
+        Py_FatalError("no mem to resize state in addstate");
+    s = &d->d_state[d->d_nstates++];
+    s->s_narcs = 0;
+    s->s_arc = NULL;
+    s->s_lower = 0;
+    s->s_upper = 0;
+    s->s_accel = NULL;
+    s->s_accept = 0;
+    return Py_SAFE_DOWNCAST(s - d->d_state, intptr_t, int);
+}
+
+void
+addarc(dfa *d, int from, int to, int lbl)
+{
+    state *s;
+    arc *a;
+
+    assert(0 <= from && from < d->d_nstates);
+    assert(0 <= to && to < d->d_nstates);
+
+    s = &d->d_state[from];
+    s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1));
+    if (s->s_arc == NULL)
+        Py_FatalError("no mem to resize arc list in addarc");
+    a = &s->s_arc[s->s_narcs++];
+    a->a_lbl = lbl;
+    a->a_arrow = to;
+}
+
+int
+addlabel(labellist *ll, int type, const char *str)
+{
+    int i;
+    label *lb;
+
+    for (i = 0; i < ll->ll_nlabels; i++) {
+        if (ll->ll_label[i].lb_type == type &&
+            strcmp(ll->ll_label[i].lb_str, str) == 0)
+            return i;
+    }
+    ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label,
+                                    sizeof(label) * (ll->ll_nlabels + 1));
+    if (ll->ll_label == NULL)
+        Py_FatalError("no mem to resize labellist in addlabel");
+    lb = &ll->ll_label[ll->ll_nlabels++];
+    lb->lb_type = type;
+    lb->lb_str = strdup(str);
+    if (Py_DebugFlag)
+        printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels,
+               PyGrammar_LabelRepr(lb));
+    return Py_SAFE_DOWNCAST(lb - ll->ll_label, intptr_t, int);
+}
+
+/* Same, but rather dies than adds */
+
+int
+findlabel(labellist *ll, int type, const char *str)
+{
+    int i;
+
+    for (i = 0; i < ll->ll_nlabels; i++) {
+        if (ll->ll_label[i].lb_type == type /*&&
+            strcmp(ll->ll_label[i].lb_str, str) == 0*/)
+            return i;
+    }
+    fprintf(stderr, "Label %d/'%s' not found\n", type, str);
+    Py_FatalError("grammar.c:findlabel()");
+
+    /* Py_FatalError() is declared with __attribute__((__noreturn__)).
+       GCC emits a warning without "return 0;" (compiler bug!), but Clang is
+       smarter and emits a warning on the return... */
+#ifndef __clang__
+    return 0; /* Make gcc -Wall happy */
+#endif
+}
+
+/* Forward */
+static void translabel(grammar *, label *);
+
+void
+translatelabels(grammar *g)
+{
+    int i;
+
+#ifdef Py_DEBUG
+    printf("Translating labels ...\n");
+#endif
+    /* Don't translate EMPTY */
+    for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++)
+        translabel(g, &g->g_ll.ll_label[i]);
+}
+
+static void
+translabel(grammar *g, label *lb)
+{
+    int i;
+
+    if (Py_DebugFlag)
+        printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb));
+
+    if (lb->lb_type == NAME) {
+        for (i = 0; i < g->g_ndfas; i++) {
+            if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) {
+                if (Py_DebugFlag)
+                    printf(
+                        "Label %s is non-terminal %d.\n",
+                        lb->lb_str,
+                        g->g_dfa[i].d_type);
+                lb->lb_type = g->g_dfa[i].d_type;
+                free(lb->lb_str);
+                lb->lb_str = NULL;
+                return;
+            }
+        }
+        for (i = 0; i < (int)N_TOKENS; i++) {
+            if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) {
+                if (Py_DebugFlag)
+                    printf("Label %s is terminal %d.\n",
+                        lb->lb_str, i);
+                lb->lb_type = i;
+                free(lb->lb_str);
+                lb->lb_str = NULL;
+                return;
+            }
+        }
+        printf("Can't translate NAME label '%s'\n", lb->lb_str);
+        return;
+    }
+
+    if (lb->lb_type == STRING) {
+        if (isalpha(Py_CHARMASK(lb->lb_str[1])) ||
+            lb->lb_str[1] == '_') {
+            char *p;
+            char *src;
+            char *dest;
+            size_t name_len;
+            if (Py_DebugFlag)
+                printf("Label %s is a keyword\n", lb->lb_str);
+            lb->lb_type = NAME;
+            src = lb->lb_str + 1;
+            p = strchr(src, '\'');
+            if (p)
+                name_len = p - src;
+            else
+                name_len = strlen(src);
+            dest = (char *)malloc(name_len + 1);
+            if (!dest) {
+                printf("Can't alloc dest '%s'\n", src);
+                return;
+            }
+            strncpy(dest, src, name_len);
+            dest[name_len] = '\0';
+            free(lb->lb_str);
+            lb->lb_str = dest;
+        }
+        else if (lb->lb_str[2] == lb->lb_str[0]) {
+            int type = (int) PyToken_OneChar(lb->lb_str[1]);
+            if (type != OP) {
+                lb->lb_type = type;
+                free(lb->lb_str);
+                lb->lb_str = NULL;
+            }
+            else
+                printf("Unknown OP label %s\n",
+                    lb->lb_str);
+        }
+        else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) {
+            int type = (int) PyToken_TwoChars(lb->lb_str[1],
+                                       lb->lb_str[2]);
+            if (type != OP) {
+                lb->lb_type = type;
+                free(lb->lb_str);
+                lb->lb_str = NULL;
+            }
+            else
+                printf("Unknown OP label %s\n",
+                    lb->lb_str);
+        }
+        else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) {
+            int type = (int) PyToken_ThreeChars(lb->lb_str[1],
+                                                lb->lb_str[2],
+                                                lb->lb_str[3]);
+            if (type != OP) {
+                lb->lb_type = type;
+                free(lb->lb_str);
+                lb->lb_str = NULL;
+            }
+            else
+                printf("Unknown OP label %s\n",
+                    lb->lb_str);
+        }
+        else
+            printf("Can't translate STRING label %s\n",
+                lb->lb_str);
+    }
+    else
+        printf("Can't translate label '%s'\n",
+               PyGrammar_LabelRepr(lb));
+}
--- a/third_party/python/Parser/grammar1.c
+++ b/third_party/python/Parser/grammar1.c
@ -0,0 +1,61 @@
+
+/* Grammar subroutines needed by parser */
+
+#include "Python.h"
+#include "pgenheaders.h"
+#include "grammar.h"
+#include "token.h"
+
+/* Return the DFA for the given type */
+
+dfa *
+PyGrammar_FindDFA(grammar *g, int type)
+{
+    dfa *d;
+#if 1
+    /* Massive speed-up */
+    d = &g->g_dfa[type - NT_OFFSET];
+    assert(d->d_type == type);
+    return d;
+#else
+    /* Old, slow version */
+    int i;
+
+    for (i = g->g_ndfas, d = g->g_dfa; --i >= 0; d++) {
+        if (d->d_type == type)
+            return d;
+    }
+    assert(0);
+    /* NOTREACHED */
+#endif
+}
+
+const char *
+PyGrammar_LabelRepr(label *lb)
+{
+    static char buf[100];
+
+    if (lb->lb_type == ENDMARKER)
+        return "EMPTY";
+    else if (ISNONTERMINAL(lb->lb_type)) {
+        if (lb->lb_str == NULL) {
+            PyOS_snprintf(buf, sizeof(buf), "NT%d", lb->lb_type);
+            return buf;
+        }
+        else
+            return lb->lb_str;
+    }
+    else if (lb->lb_type < N_TOKENS) {
+        if (lb->lb_str == NULL)
+            return _PyParser_TokenNames[lb->lb_type];
+        else {
+            PyOS_snprintf(buf, sizeof(buf), "%.32s(%.32s)",
+                _PyParser_TokenNames[lb->lb_type], lb->lb_str);
+            return buf;
+        }
+    }
+    else {
+        Py_FatalError("invalid label");
+        return NULL;
+    }
+}
--- a/third_party/python/Parser/listnode.c
+++ b/third_party/python/Parser/listnode.c
@ -0,0 +1,66 @@
+
+/* List a node on a file */
+
+#include "pgenheaders.h"
+#include "token.h"
+#include "node.h"
+
+/* Forward */
+static void list1node(FILE *, node *);
+static void listnode(FILE *, node *);
+
+void
+PyNode_ListTree(node *n)
+{
+    listnode(stdout, n);
+}
+
+static int level, atbol;
+
+static void
+listnode(FILE *fp, node *n)
+{
+    level = 0;
+    atbol = 1;
+    list1node(fp, n);
+}
+
+static void
+list1node(FILE *fp, node *n)
+{
+    if (n == 0)
+        return;
+    if (ISNONTERMINAL(TYPE(n))) {
+        int i;
+        for (i = 0; i < NCH(n); i++)
+            list1node(fp, CHILD(n, i));
+    }
+    else if (ISTERMINAL(TYPE(n))) {
+        switch (TYPE(n)) {
+        case INDENT:
+            ++level;
+            break;
+        case DEDENT:
+            --level;
+            break;
+        default:
+            if (atbol) {
+                int i;
+                for (i = 0; i < level; ++i)
+                    fprintf(fp, "\t");
+                atbol = 0;
+            }
+            if (TYPE(n) == NEWLINE) {
+                if (STR(n) != NULL)
+                    fprintf(fp, "%s", STR(n));
+                fprintf(fp, "\n");
+                atbol = 1;
+            }
+            else
+                fprintf(fp, "%s ", STR(n));
+            break;
+        }
+    }
+    else
+        fprintf(fp, "? ");
+}
--- a/third_party/python/Parser/metagrammar.c
+++ b/third_party/python/Parser/metagrammar.c
@ -0,0 +1,159 @@
+
+#include "pgenheaders.h"
+#include "metagrammar.h"
+#include "grammar.h"
+#include "pgen.h"
+static arc arcs_0_0[3] = {
+    {2, 0},
+    {3, 0},
+    {4, 1},
+};
+static arc arcs_0_1[1] = {
+    {0, 1},
+};
+static state states_0[2] = {
+    {3, arcs_0_0},
+    {1, arcs_0_1},
+};
+static arc arcs_1_0[1] = {
+    {5, 1},
+};
+static arc arcs_1_1[1] = {
+    {6, 2},
+};
+static arc arcs_1_2[1] = {
+    {7, 3},
+};
+static arc arcs_1_3[1] = {
+    {3, 4},
+};
+static arc arcs_1_4[1] = {
+    {0, 4},
+};
+static state states_1[5] = {
+    {1, arcs_1_0},
+    {1, arcs_1_1},
+    {1, arcs_1_2},
+    {1, arcs_1_3},
+    {1, arcs_1_4},
+};
+static arc arcs_2_0[1] = {
+    {8, 1},
+};
+static arc arcs_2_1[2] = {
+    {9, 0},
+    {0, 1},
+};
+static state states_2[2] = {
+    {1, arcs_2_0},
+    {2, arcs_2_1},
+};
+static arc arcs_3_0[1] = {
+    {10, 1},
+};
+static arc arcs_3_1[2] = {
+    {10, 1},
+    {0, 1},
+};
+static state states_3[2] = {
+    {1, arcs_3_0},
+    {2, arcs_3_1},
+};
+static arc arcs_4_0[2] = {
+    {11, 1},
+    {13, 2},
+};
+static arc arcs_4_1[1] = {
+    {7, 3},
+};
+static arc arcs_4_2[3] = {
+    {14, 4},
+    {15, 4},
+    {0, 2},
+};
+static arc arcs_4_3[1] = {
+    {12, 4},
+};
+static arc arcs_4_4[1] = {
+    {0, 4},
+};
+static state states_4[5] = {
+    {2, arcs_4_0},
+    {1, arcs_4_1},
+    {3, arcs_4_2},
+    {1, arcs_4_3},
+    {1, arcs_4_4},
+};
+static arc arcs_5_0[3] = {
+    {5, 1},
+    {16, 1},
+    {17, 2},
+};
+static arc arcs_5_1[1] = {
+    {0, 1},
+};
+static arc arcs_5_2[1] = {
+    {7, 3},
+};
+static arc arcs_5_3[1] = {
+    {18, 1},
+};
+static state states_5[4] = {
+    {3, arcs_5_0},
+    {1, arcs_5_1},
+    {1, arcs_5_2},
+    {1, arcs_5_3},
+};
+static dfa dfas[6] = {
+    {256, "MSTART", 0, 2, states_0,
+     "\070\000\000"},
+    {257, "RULE", 0, 5, states_1,
+     "\040\000\000"},
+    {258, "RHS", 0, 2, states_2,
+     "\040\010\003"},
+    {259, "ALT", 0, 2, states_3,
+     "\040\010\003"},
+    {260, "ITEM", 0, 5, states_4,
+     "\040\010\003"},
+    {261, "ATOM", 0, 4, states_5,
+     "\040\000\003"},
+};
+static label labels[19] = {
+    {0, "EMPTY"},
+    {256, 0},
+    {257, 0},
+    {4, 0},
+    {0, 0},
+    {1, 0},
+    {11, 0},
+    {258, 0},
+    {259, 0},
+    {18, 0},
+    {260, 0},
+    {9, 0},
+    {10, 0},
+    {261, 0},
+    {16, 0},
+    {14, 0},
+    {3, 0},
+    {7, 0},
+    {8, 0},
+};
+static grammar _PyParser_Grammar = {
+    6,
+    dfas,
+    {19, labels},
+    256
+};
+
+grammar *
+meta_grammar(void)
+{
+    return &_PyParser_Grammar;
+}
+
+grammar *
+Py_meta_grammar(void)
+{
+  return meta_grammar();
+}
--- a/third_party/python/Parser/myreadline.c
+++ b/third_party/python/Parser/myreadline.c
@ -0,0 +1,399 @@
+
+/* Readline interface for tokenizer.c and [raw_]input() in bltinmodule.c.
+   By default, or when stdin is not a tty device, we have a super
+   simple my_readline function using fgets.
+   Optionally, we can use the GNU readline library.
+   my_readline() has a different return value from GNU readline():
+   - NULL if an interrupt occurred or if an error occurred
+   - a malloc'ed empty string if EOF was read
+   - a malloc'ed string ending in \n normally
+*/
+
+#include "Python.h"
+#ifdef MS_WINDOWS
+#define WIN32_LEAN_AND_MEAN
+#include "windows.h"
+#endif /* MS_WINDOWS */
+
+
+PyThreadState* _PyOS_ReadlineTState;
+
+#ifdef WITH_THREAD
+#include "pythread.h"
+static PyThread_type_lock _PyOS_ReadlineLock = NULL;
+#endif
+
+int (*PyOS_InputHook)(void) = NULL;
+
+/* This function restarts a fgets() after an EINTR error occurred
+   except if PyOS_InterruptOccurred() returns true. */
+
+static int
+my_fgets(char *buf, int len, FILE *fp)
+{
+#ifdef MS_WINDOWS
+    HANDLE hInterruptEvent;
+#endif
+    char *p;
+    int err;
+    while (1) {
+        if (PyOS_InputHook != NULL)
+            (void)(PyOS_InputHook)();
+        errno = 0;
+        clearerr(fp);
+        p = fgets(buf, len, fp);
+        if (p != NULL)
+            return 0; /* No error */
+        err = errno;
+#ifdef MS_WINDOWS
+        /* Ctrl-C anywhere on the line or Ctrl-Z if the only character
+           on a line will set ERROR_OPERATION_ABORTED. Under normal
+           circumstances Ctrl-C will also have caused the SIGINT handler
+           to fire which will have set the event object returned by
+           _PyOS_SigintEvent. This signal fires in another thread and
+           is not guaranteed to have occurred before this point in the
+           code.
+
+           Therefore: check whether the event is set with a small timeout.
+           If it is, assume this is a Ctrl-C and reset the event. If it
+           isn't set assume that this is a Ctrl-Z on its own and drop
+           through to check for EOF.
+        */
+        if (GetLastError()==ERROR_OPERATION_ABORTED) {
+            hInterruptEvent = _PyOS_SigintEvent();
+            switch (WaitForSingleObjectEx(hInterruptEvent, 10, FALSE)) {
+            case WAIT_OBJECT_0:
+                ResetEvent(hInterruptEvent);
+                return 1; /* Interrupt */
+            case WAIT_FAILED:
+                return -2; /* Error */
+            }
+        }
+#endif /* MS_WINDOWS */
+        if (feof(fp)) {
+            clearerr(fp);
+            return -1; /* EOF */
+        }
+#ifdef EINTR
+        if (err == EINTR) {
+            int s;
+#ifdef WITH_THREAD
+            PyEval_RestoreThread(_PyOS_ReadlineTState);
+#endif
+            s = PyErr_CheckSignals();
+#ifdef WITH_THREAD
+            PyEval_SaveThread();
+#endif
+            if (s < 0)
+                    return 1;
+        /* try again */
+            continue;
+        }
+#endif
+        if (PyOS_InterruptOccurred()) {
+            return 1; /* Interrupt */
+        }
+        return -2; /* Error */
+    }
+    /* NOTREACHED */
+}
+
+#ifdef MS_WINDOWS
+/* Readline implementation using ReadConsoleW */
+
+extern char _get_console_type(HANDLE handle);
+
+char *
+_PyOS_WindowsConsoleReadline(HANDLE hStdIn)
+{
+    static wchar_t wbuf_local[1024 * 16];
+    const DWORD chunk_size = 1024;
+
+    DWORD n_read, total_read, wbuflen, u8len;
+    wchar_t *wbuf;
+    char *buf = NULL;
+    int err = 0;
+
+    n_read = (DWORD)-1;
+    total_read = 0;
+    wbuf = wbuf_local;
+    wbuflen = sizeof(wbuf_local) / sizeof(wbuf_local[0]) - 1;
+    while (1) {
+        if (PyOS_InputHook != NULL) {
+            (void)(PyOS_InputHook)();
+        }
+        if (!ReadConsoleW(hStdIn, &wbuf[total_read], wbuflen - total_read, &n_read, NULL)) {
+            err = GetLastError();
+            goto exit;
+        }
+        if (n_read == (DWORD)-1 && (err = GetLastError()) == ERROR_OPERATION_ABORTED) {
+            break;
+        }
+        if (n_read == 0) {
+            int s;
+            err = GetLastError();
+            if (err != ERROR_OPERATION_ABORTED)
+                goto exit;
+            err = 0;
+            HANDLE hInterruptEvent = _PyOS_SigintEvent();
+            if (WaitForSingleObjectEx(hInterruptEvent, 100, FALSE)
+                    == WAIT_OBJECT_0) {
+                ResetEvent(hInterruptEvent);
+#ifdef WITH_THREAD
+                PyEval_RestoreThread(_PyOS_ReadlineTState);
+#endif
+                s = PyErr_CheckSignals();
+#ifdef WITH_THREAD
+                PyEval_SaveThread();
+#endif
+                if (s < 0)
+                    goto exit;
+            }
+            break;
+        }
+
+        total_read += n_read;
+        if (total_read == 0 || wbuf[total_read - 1] == L'\n') {
+            break;
+        }
+        wbuflen += chunk_size;
+        if (wbuf == wbuf_local) {
+            wbuf[total_read] = '\0';
+            wbuf = (wchar_t*)PyMem_RawMalloc(wbuflen * sizeof(wchar_t));
+            if (wbuf)
+                wcscpy_s(wbuf, wbuflen, wbuf_local);
+            else {
+                PyErr_NoMemory();
+                goto exit;
+            }
+        }
+        else {
+            wchar_t *tmp = PyMem_RawRealloc(wbuf, wbuflen * sizeof(wchar_t));
+            if (tmp == NULL) {
+                PyErr_NoMemory();
+                goto exit;
+            }
+            wbuf = tmp;
+        }
+    }
+
+    if (wbuf[0] == '\x1a') {
+        buf = PyMem_RawMalloc(1);
+        if (buf)
+            buf[0] = '\0';
+        else {
+            PyErr_NoMemory();
+        }
+        goto exit;
+    }
+
+    u8len = WideCharToMultiByte(CP_UTF8, 0, wbuf, total_read, NULL, 0, NULL, NULL);
+    buf = PyMem_RawMalloc(u8len + 1);
+    if (buf == NULL) {
+        PyErr_NoMemory();
+        goto exit;
+    }
+    u8len = WideCharToMultiByte(CP_UTF8, 0, wbuf, total_read, buf, u8len, NULL, NULL);
+    buf[u8len] = '\0';
+
+exit:
+    if (wbuf != wbuf_local)
+        PyMem_RawFree(wbuf);
+
+    if (err) {
+#ifdef WITH_THREAD
+        PyEval_RestoreThread(_PyOS_ReadlineTState);
+#endif
+        PyErr_SetFromWindowsErr(err);
+#ifdef WITH_THREAD
+        PyEval_SaveThread();
+#endif
+    }
+
+    return buf;
+}
+
+#endif
+
+
+/* Readline implementation using fgets() */
+
+char *
+PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
+{
+    size_t n;
+    char *p, *pr;
+
+#ifdef MS_WINDOWS
+    if (!Py_LegacyWindowsStdioFlag && sys_stdin == stdin) {
+        HANDLE hStdIn, hStdErr;
+
+        _Py_BEGIN_SUPPRESS_IPH
+        hStdIn = (HANDLE)_get_osfhandle(fileno(sys_stdin));
+        hStdErr = (HANDLE)_get_osfhandle(fileno(stderr));
+        _Py_END_SUPPRESS_IPH
+
+        if (_get_console_type(hStdIn) == 'r') {
+            fflush(sys_stdout);
+            if (prompt) {
+                if (_get_console_type(hStdErr) == 'w') {
+                    wchar_t *wbuf;
+                    int wlen;
+                    wlen = MultiByteToWideChar(CP_UTF8, 0, prompt, -1,
+                            NULL, 0);
+                    if (wlen) {
+                        wbuf = PyMem_RawMalloc(wlen * sizeof(wchar_t));
+                        if (wbuf == NULL) {
+                            PyErr_NoMemory();
+                            return NULL;
+                        }
+                        wlen = MultiByteToWideChar(CP_UTF8, 0, prompt, -1,
+                                wbuf, wlen);
+                        if (wlen) {
+                            DWORD n;
+                            fflush(stderr);
+                            /* wlen includes null terminator, so subtract 1 */
+                            WriteConsoleW(hStdErr, wbuf, wlen - 1, &n, NULL);
+                        }
+                        PyMem_RawFree(wbuf);
+                    }
+                } else {
+                    fprintf(stderr, "%s", prompt);
+                    fflush(stderr);
+                }
+            }
+            clearerr(sys_stdin);
+            return _PyOS_WindowsConsoleReadline(hStdIn);
+        }
+    }
+#endif
+
+    n = 100;
+    p = (char *)PyMem_RawMalloc(n);
+    if (p == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+
+    fflush(sys_stdout);
+    if (prompt)
+        fprintf(stderr, "%s", prompt);
+    fflush(stderr);
+
+    switch (my_fgets(p, (int)n, sys_stdin)) {
+    case 0: /* Normal case */
+        break;
+    case 1: /* Interrupt */
+        PyMem_RawFree(p);
+        return NULL;
+    case -1: /* EOF */
+    case -2: /* Error */
+    default: /* Shouldn't happen */
+        *p = '\0';
+        break;
+    }
+    n = strlen(p);
+    while (n > 0 && p[n-1] != '\n') {
+        size_t incr = n+2;
+        if (incr > INT_MAX) {
+            PyMem_RawFree(p);
+            PyErr_SetString(PyExc_OverflowError, "input line too long");
+            return NULL;
+        }
+        pr = (char *)PyMem_RawRealloc(p, n + incr);
+        if (pr == NULL) {
+            PyMem_RawFree(p);
+            PyErr_NoMemory();
+            return NULL;
+        }
+        p = pr;
+        if (my_fgets(p+n, (int)incr, sys_stdin) != 0)
+            break;
+        n += strlen(p+n);
+    }
+    pr = (char *)PyMem_RawRealloc(p, n+1);
+    if (pr == NULL) {
+        PyMem_RawFree(p);
+        PyErr_NoMemory();
+        return NULL;
+    }
+    return pr;
+}
+
+
+/* By initializing this function pointer, systems embedding Python can
+   override the readline function.
+
+   Note: Python expects in return a buffer allocated with PyMem_Malloc. */
+
+char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *);
+
+
+/* Interface used by tokenizer.c and bltinmodule.c */
+
+char *
+PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
+{
+    char *rv, *res;
+    size_t len;
+
+    if (_PyOS_ReadlineTState == PyThreadState_GET()) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "can't re-enter readline");
+        return NULL;
+    }
+
+
+    if (PyOS_ReadlineFunctionPointer == NULL) {
+        PyOS_ReadlineFunctionPointer = PyOS_StdioReadline;
+    }
+
+#ifdef WITH_THREAD
+    if (_PyOS_ReadlineLock == NULL) {
+        _PyOS_ReadlineLock = PyThread_allocate_lock();
+        if (_PyOS_ReadlineLock == NULL) {
+            PyErr_SetString(PyExc_MemoryError, "can't allocate lock");
+            return NULL;
+        }
+    }
+#endif
+
+    _PyOS_ReadlineTState = PyThreadState_GET();
+    Py_BEGIN_ALLOW_THREADS
+#ifdef WITH_THREAD
+    PyThread_acquire_lock(_PyOS_ReadlineLock, 1);
+#endif
+
+    /* This is needed to handle the unlikely case that the
+     * interpreter is in interactive mode *and* stdin/out are not
+     * a tty.  This can happen, for example if python is run like
+     * this: python -i < test1.py
+     */
+    if (!isatty (fileno (sys_stdin)) || !isatty (fileno (sys_stdout)))
+        rv = PyOS_StdioReadline (sys_stdin, sys_stdout, prompt);
+    else
+        rv = (*PyOS_ReadlineFunctionPointer)(sys_stdin, sys_stdout,
+                                             prompt);
+    Py_END_ALLOW_THREADS
+
+#ifdef WITH_THREAD
+    PyThread_release_lock(_PyOS_ReadlineLock);
+#endif
+
+    _PyOS_ReadlineTState = NULL;
+
+    if (rv == NULL)
+        return NULL;
+
+    len = strlen(rv) + 1;
+    res = PyMem_Malloc(len);
+    if (res != NULL) {
+        memcpy(res, rv, len);
+    }
+    else {
+        PyErr_NoMemory();
+    }
+    PyMem_RawFree(rv);
+
+    return res;
+}
--- a/third_party/python/Parser/node.c
+++ b/third_party/python/Parser/node.c
@ -0,0 +1,164 @@
+/* Parse tree node implementation */
+
+#include "Python.h"
+#include "node.h"
+#include "errcode.h"
+
+node *
+PyNode_New(int type)
+{
+    node *n = (node *) PyObject_MALLOC(1 * sizeof(node));
+    if (n == NULL)
+        return NULL;
+    n->n_type = type;
+    n->n_str = NULL;
+    n->n_lineno = 0;
+    n->n_nchildren = 0;
+    n->n_child = NULL;
+    return n;
+}
+
+/* See comments at XXXROUNDUP below.  Returns -1 on overflow. */
+static int
+fancy_roundup(int n)
+{
+    /* Round up to the closest power of 2 >= n. */
+    int result = 256;
+    assert(n > 128);
+    while (result < n) {
+        result <<= 1;
+        if (result <= 0)
+            return -1;
+    }
+    return result;
+}
+
+/* A gimmick to make massive numbers of reallocs quicker.  The result is
+ * a number >= the input.  In PyNode_AddChild, it's used like so, when
+ * we're about to add child number current_size + 1:
+ *
+ *     if XXXROUNDUP(current_size) < XXXROUNDUP(current_size + 1):
+ *         allocate space for XXXROUNDUP(current_size + 1) total children
+ *     else:
+ *         we already have enough space
+ *
+ * Since a node starts out empty, we must have
+ *
+ *     XXXROUNDUP(0) < XXXROUNDUP(1)
+ *
+ * so that we allocate space for the first child.  One-child nodes are very
+ * common (presumably that would change if we used a more abstract form
+ * of syntax tree), so to avoid wasting memory it's desirable that
+ * XXXROUNDUP(1) == 1.  That in turn forces XXXROUNDUP(0) == 0.
+ *
+ * Else for 2 <= n <= 128, we round up to the closest multiple of 4.  Why 4?
+ * Rounding up to a multiple of an exact power of 2 is very efficient, and
+ * most nodes with more than one child have <= 4 kids.
+ *
+ * Else we call fancy_roundup() to grow proportionately to n.  We've got an
+ * extreme case then (like test_longexp.py), and on many platforms doing
+ * anything less than proportional growth leads to exorbitant runtime
+ * (e.g., MacPython), or extreme fragmentation of user address space (e.g.,
+ * Win98).
+ *
+ * In a run of compileall across the 2.3a0 Lib directory, Andrew MacIntyre
+ * reported that, with this scheme, 89% of PyObject_REALLOC calls in
+ * PyNode_AddChild passed 1 for the size, and 9% passed 4.  So this usually
+ * wastes very little memory, but is very effective at sidestepping
+ * platform-realloc disasters on vulnerable platforms.
+ *
+ * Note that this would be straightforward if a node stored its current
+ * capacity.  The code is tricky to avoid that.
+ */
+#define XXXROUNDUP(n) ((n) <= 1 ? (n) :                         \
+               (n) <= 128 ? (int)_Py_SIZE_ROUND_UP((n), 4) :    \
+               fancy_roundup(n))
+
+
+int
+PyNode_AddChild(node *n1, int type, char *str, int lineno, int col_offset)
+{
+    const int nch = n1->n_nchildren;
+    int current_capacity;
+    int required_capacity;
+    node *n;
+
+    if (nch == INT_MAX || nch < 0)
+        return E_OVERFLOW;
+
+    current_capacity = XXXROUNDUP(nch);
+    required_capacity = XXXROUNDUP(nch + 1);
+    if (current_capacity < 0 || required_capacity < 0)
+        return E_OVERFLOW;
+    if (current_capacity < required_capacity) {
+        if ((size_t)required_capacity > SIZE_MAX / sizeof(node)) {
+            return E_NOMEM;
+        }
+        n = n1->n_child;
+        n = (node *) PyObject_REALLOC(n,
+                                      required_capacity * sizeof(node));
+        if (n == NULL)
+            return E_NOMEM;
+        n1->n_child = n;
+    }
+
+    n = &n1->n_child[n1->n_nchildren++];
+    n->n_type = type;
+    n->n_str = str;
+    n->n_lineno = lineno;
+    n->n_col_offset = col_offset;
+    n->n_nchildren = 0;
+    n->n_child = NULL;
+    return 0;
+}
+
+/* Forward */
+static void freechildren(node *);
+static Py_ssize_t sizeofchildren(node *n);
+
+
+void
+PyNode_Free(node *n)
+{
+    if (n != NULL) {
+        freechildren(n);
+        PyObject_FREE(n);
+    }
+}
+
+Py_ssize_t
+_PyNode_SizeOf(node *n)
+{
+    Py_ssize_t res = 0;
+
+    if (n != NULL)
+        res = sizeof(node) + sizeofchildren(n);
+    return res;
+}
+
+static void
+freechildren(node *n)
+{
+    int i;
+    for (i = NCH(n); --i >= 0; )
+        freechildren(CHILD(n, i));
+    if (n->n_child != NULL)
+        PyObject_FREE(n->n_child);
+    if (STR(n) != NULL)
+        PyObject_FREE(STR(n));
+}
+
+static Py_ssize_t
+sizeofchildren(node *n)
+{
+    Py_ssize_t res = 0;
+    int i;
+    for (i = NCH(n); --i >= 0; )
+        res += sizeofchildren(CHILD(n, i));
+    if (n->n_child != NULL)
+        /* allocated size of n->n_child array */
+        res += XXXROUNDUP(NCH(n)) * sizeof(node);
+    if (STR(n) != NULL)
+        res += strlen(STR(n)) + 1;
+    return res;
+}
--- a/third_party/python/Parser/parser.c
+++ b/third_party/python/Parser/parser.c
@ -0,0 +1,447 @@
+
+/* Parser implementation */
+
+/* For a description, see the comments at end of this file */
+
+/* XXX To do: error recovery */
+
+#include "Python.h"
+#include "pgenheaders.h"
+#include "token.h"
+#include "grammar.h"
+#include "node.h"
+#include "parser.h"
+#include "errcode.h"
+
+
+#ifdef Py_DEBUG
+extern int Py_DebugFlag;
+#define D(x) if (!Py_DebugFlag); else x
+#else
+#define D(x)
+#endif
+
+
+/* STACK DATA TYPE */
+
+static void s_reset(stack *);
+
+static void
+s_reset(stack *s)
+{
+    s->s_top = &s->s_base[MAXSTACK];
+}
+
+#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
+
+static int
+s_push(stack *s, dfa *d, node *parent)
+{
+    stackentry *top;
+    if (s->s_top == s->s_base) {
+        fprintf(stderr, "s_push: parser stack overflow\n");
+        return E_NOMEM;
+    }
+    top = --s->s_top;
+    top->s_dfa = d;
+    top->s_parent = parent;
+    top->s_state = 0;
+    return 0;
+}
+
+#ifdef Py_DEBUG
+
+static void
+s_pop(stack *s)
+{
+    if (s_empty(s))
+        Py_FatalError("s_pop: parser stack underflow -- FATAL");
+    s->s_top++;
+}
+
+#else /* !Py_DEBUG */
+
+#define s_pop(s) (s)->s_top++
+
+#endif
+
+
+/* PARSER CREATION */
+
+parser_state *
+PyParser_New(grammar *g, int start)
+{
+    parser_state *ps;
+
+    if (!g->g_accel)
+        PyGrammar_AddAccelerators(g);
+    ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
+    if (ps == NULL)
+        return NULL;
+    ps->p_grammar = g;
+#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
+    ps->p_flags = 0;
+#endif
+    ps->p_tree = PyNode_New(start);
+    if (ps->p_tree == NULL) {
+        PyMem_FREE(ps);
+        return NULL;
+    }
+    s_reset(&ps->p_stack);
+    (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
+    return ps;
+}
+
+void
+PyParser_Delete(parser_state *ps)
+{
+    /* NB If you want to save the parse tree,
+       you must set p_tree to NULL before calling delparser! */
+    PyNode_Free(ps->p_tree);
+    PyMem_FREE(ps);
+}
+
+
+/* PARSER STACK OPERATIONS */
+
+static int
+shift(stack *s, int type, char *str, int newstate, int lineno, int col_offset)
+{
+    int err;
+    assert(!s_empty(s));
+    err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
+    if (err)
+        return err;
+    s->s_top->s_state = newstate;
+    return 0;
+}
+
+static int
+push(stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
+{
+    int err;
+    node *n;
+    n = s->s_top->s_parent;
+    assert(!s_empty(s));
+    err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
+    if (err)
+        return err;
+    s->s_top->s_state = newstate;
+    return s_push(s, d, CHILD(n, NCH(n)-1));
+}
+
+
+/* PARSER PROPER */
+
+static int
+classify(parser_state *ps, int type, const char *str)
+{
+    grammar *g = ps->p_grammar;
+    int n = g->g_ll.ll_nlabels;
+
+    if (type == NAME) {
+        label *l = g->g_ll.ll_label;
+        int i;
+        for (i = n; i > 0; i--, l++) {
+            if (l->lb_type != NAME || l->lb_str == NULL ||
+                l->lb_str[0] != str[0] ||
+                strcmp(l->lb_str, str) != 0)
+                continue;
+#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
+#if 0
+            /* Leaving this in as an example */
+            if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
+                if (str[0] == 'w' && strcmp(str, "with") == 0)
+                    break; /* not a keyword yet */
+                else if (str[0] == 'a' && strcmp(str, "as") == 0)
+                    break; /* not a keyword yet */
+            }
+#endif
+#endif
+            D(printf("It's a keyword\n"));
+            return n - i;
+        }
+    }
+
+    {
+        label *l = g->g_ll.ll_label;
+        int i;
+        for (i = n; i > 0; i--, l++) {
+            if (l->lb_type == type && l->lb_str == NULL) {
+                D(printf("It's a token we know\n"));
+                return n - i;
+            }
+        }
+    }
+
+    D(printf("Illegal token\n"));
+    return -1;
+}
+
+#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
+#if 0
+/* Leaving this in as an example */
+static void
+future_hack(parser_state *ps)
+{
+    node *n = ps->p_stack.s_top->s_parent;
+    node *ch, *cch;
+    int i;
+
+    /* from __future__ import ..., must have at least 4 children */
+    n = CHILD(n, 0);
+    if (NCH(n) < 4)
+        return;
+    ch = CHILD(n, 0);
+    if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
+        return;
+    ch = CHILD(n, 1);
+    if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
+        strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
+        return;
+    ch = CHILD(n, 3);
+    /* ch can be a star, a parenthesis or import_as_names */
+    if (TYPE(ch) == STAR)
+        return;
+    if (TYPE(ch) == LPAR)
+        ch = CHILD(n, 4);
+
+    for (i = 0; i < NCH(ch); i += 2) {
+        cch = CHILD(ch, i);
+        if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
+            char *str_ch = STR(CHILD(cch, 0));
+            if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
+                ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
+            } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
+                ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
+            } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
+                ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
+            }
+        }
+    }
+}
+#endif
+#endif /* future keyword */
+
+int
+PyParser_AddToken(parser_state *ps, int type, char *str,
+                  int lineno, int col_offset, int *expected_ret)
+{
+    int ilabel;
+    int err;
+
+    D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
+
+    /* Find out which label this token is */
+    ilabel = classify(ps, type, str);
+    if (ilabel < 0)
+        return E_SYNTAX;
+
+    /* Loop until the token is shifted or an error occurred */
+    for (;;) {
+        /* Fetch the current dfa and state */
+        dfa *d = ps->p_stack.s_top->s_dfa;
+        state *s = &d->d_state[ps->p_stack.s_top->s_state];
+
+        D(printf(" DFA '%s', state %d:",
+            d->d_name, ps->p_stack.s_top->s_state));
+
+        /* Check accelerator */
+        if (s->s_lower <= ilabel && ilabel < s->s_upper) {
+            int x = s->s_accel[ilabel - s->s_lower];
+            if (x != -1) {
+                if (x & (1<<7)) {
+                    /* Push non-terminal */
+                    int nt = (x >> 8) + NT_OFFSET;
+                    int arrow = x & ((1<<7)-1);
+                    dfa *d1 = PyGrammar_FindDFA(
+                        ps->p_grammar, nt);
+                    if ((err = push(&ps->p_stack, nt, d1,
+                        arrow, lineno, col_offset)) > 0) {
+                        D(printf(" MemError: push\n"));
+                        return err;
+                    }
+                    D(printf(" Push ...\n"));
+                    continue;
+                }
+
+                /* Shift the token */
+                if ((err = shift(&ps->p_stack, type, str,
+                                x, lineno, col_offset)) > 0) {
+                    D(printf(" MemError: shift.\n"));
+                    return err;
+                }
+                D(printf(" Shift.\n"));
+                /* Pop while we are in an accept-only state */
+                while (s = &d->d_state
+                                [ps->p_stack.s_top->s_state],
+                    s->s_accept && s->s_narcs == 1) {
+                    D(printf("  DFA '%s', state %d: "
+                             "Direct pop.\n",
+                             d->d_name,
+                             ps->p_stack.s_top->s_state));
+#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
+#if 0
+                    if (d->d_name[0] == 'i' &&
+                        strcmp(d->d_name,
+                           "import_stmt") == 0)
+                        future_hack(ps);
+#endif
+#endif
+                    s_pop(&ps->p_stack);
+                    if (s_empty(&ps->p_stack)) {
+                        D(printf("  ACCEPT.\n"));
+                        return E_DONE;
+                    }
+                    d = ps->p_stack.s_top->s_dfa;
+                }
+                return E_OK;
+            }
+        }
+
+        if (s->s_accept) {
+#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
+#if 0
+            if (d->d_name[0] == 'i' &&
+                strcmp(d->d_name, "import_stmt") == 0)
+                future_hack(ps);
+#endif
+#endif
+            /* Pop this dfa and try again */
+            s_pop(&ps->p_stack);
+            D(printf(" Pop ...\n"));
+            if (s_empty(&ps->p_stack)) {
+                D(printf(" Error: bottom of stack.\n"));
+                return E_SYNTAX;
+            }
+            continue;
+        }
+
+        /* Stuck, report syntax error */
+        D(printf(" Error.\n"));
+        if (expected_ret) {
+            if (s->s_lower == s->s_upper - 1) {
+                /* Only one possible expected token */
+                *expected_ret = ps->p_grammar->
+                    g_ll.ll_label[s->s_lower].lb_type;
+            }
+            else
+                *expected_ret = -1;
+        }
+        return E_SYNTAX;
+    }
+}
+
+
+#ifdef Py_DEBUG
+
+/* DEBUG OUTPUT */
+
+void
+dumptree(grammar *g, node *n)
+{
+    int i;
+
+    if (n == NULL)
+        printf("NIL");
+    else {
+        label l;
+        l.lb_type = TYPE(n);
+        l.lb_str = STR(n);
+        printf("%s", PyGrammar_LabelRepr(&l));
+        if (ISNONTERMINAL(TYPE(n))) {
+            printf("(");
+            for (i = 0; i < NCH(n); i++) {
+                if (i > 0)
+                    printf(",");
+                dumptree(g, CHILD(n, i));
+            }
+            printf(")");
+        }
+    }
+}
+
+void
+showtree(grammar *g, node *n)
+{
+    int i;
+
+    if (n == NULL)
+        return;
+    if (ISNONTERMINAL(TYPE(n))) {
+        for (i = 0; i < NCH(n); i++)
+            showtree(g, CHILD(n, i));
+    }
+    else if (ISTERMINAL(TYPE(n))) {
+        printf("%s", _PyParser_TokenNames[TYPE(n)]);
+        if (TYPE(n) == NUMBER || TYPE(n) == NAME)
+            printf("(%s)", STR(n));
+        printf(" ");
+    }
+    else
+        printf("? ");
+}
+
+void
+printtree(parser_state *ps)
+{
+    if (Py_DebugFlag) {
+        printf("Parse tree:\n");
+        dumptree(ps->p_grammar, ps->p_tree);
+        printf("\n");
+        printf("Tokens:\n");
+        showtree(ps->p_grammar, ps->p_tree);
+        printf("\n");
+    }
+    printf("Listing:\n");
+    PyNode_ListTree(ps->p_tree);
+    printf("\n");
+}
+
+#endif /* Py_DEBUG */
+
+/*
+
+Description
+-----------
+
+The parser's interface is different than usual: the function addtoken()
+must be called for each token in the input.  This makes it possible to
+turn it into an incremental parsing system later.  The parsing system
+constructs a parse tree as it goes.
+
+A parsing rule is represented as a Deterministic Finite-state Automaton
+(DFA).  A node in a DFA represents a state of the parser; an arc represents
+a transition.  Transitions are either labeled with terminal symbols or
+with non-terminals.  When the parser decides to follow an arc labeled
+with a non-terminal, it is invoked recursively with the DFA representing
+the parsing rule for that as its initial state; when that DFA accepts,
+the parser that invoked it continues.  The parse tree constructed by the
+recursively called parser is inserted as a child in the current parse tree.
+
+The DFA's can be constructed automatically from a more conventional
+language description.  An extended LL(1) grammar (ELL(1)) is suitable.
+Certain restrictions make the parser's life easier: rules that can produce
+the empty string should be outlawed (there are other ways to put loops
+or optional parts in the language).  To avoid the need to construct
+FIRST sets, we can require that all but the last alternative of a rule
+(really: arc going out of a DFA's state) must begin with a terminal
+symbol.
+
+As an example, consider this grammar:
+
+expr:   term (OP term)*
+term:   CONSTANT | '(' expr ')'
+
+The DFA corresponding to the rule for expr is:
+
+------->.---term-->.------->
+    ^          |
+    |          |
+    \----OP----/
+
+The parse tree generated for the input a+b is:
+
+(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
+
+*/
--- a/third_party/python/Parser/parser.h
+++ b/third_party/python/Parser/parser.h
@ -0,0 +1,42 @@
+#ifndef Py_PARSER_H
+#define Py_PARSER_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* Parser interface */
+
+#define MAXSTACK 1500
+
+typedef struct {
+	int		 s_state;	/* State in current DFA */
+	dfa		*s_dfa;		/* Current DFA */
+	struct _node	*s_parent;	/* Where to add next node */
+} stackentry;
+
+typedef struct {
+	stackentry	*s_top;		/* Top entry */
+	stackentry	 s_base[MAXSTACK];/* Array of stack entries */
+					/* NB The stack grows down */
+} stack;
+
+typedef struct {
+	stack	 	p_stack;	/* Stack of parser states */
+	grammar		*p_grammar;	/* Grammar to use */
+	node		*p_tree;	/* Top of parse tree */
+#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
+	unsigned long	p_flags;	/* see co_flags in Include/code.h */
+#endif
+} parser_state;
+
+parser_state *PyParser_New(grammar *g, int start);
+void PyParser_Delete(parser_state *ps);
+int PyParser_AddToken(parser_state *ps, int type, char *str, int lineno, int col_offset,
+                      int *expected_ret);
+void PyGrammar_AddAccelerators(grammar *g);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_PARSER_H */
--- a/third_party/python/Parser/parsetok.c
+++ b/third_party/python/Parser/parsetok.c
@ -0,0 +1,384 @@
+
+/* Parser-tokenizer link implementation */
+
+#include "pgenheaders.h"
+#include "tokenizer.h"
+#include "node.h"
+#include "grammar.h"
+#include "parser.h"
+#include "parsetok.h"
+#include "errcode.h"
+#include "graminit.h"
+
+
+/* Forward */
+static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
+static int initerr(perrdetail *err_ret, PyObject * filename);
+
+/* Parse input coming from a string.  Return error code, print some errors. */
+node *
+PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
+{
+    return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
+}
+
+node *
+PyParser_ParseStringFlags(const char *s, grammar *g, int start,
+                          perrdetail *err_ret, int flags)
+{
+    return PyParser_ParseStringFlagsFilename(s, NULL,
+                                             g, start, err_ret, flags);
+}
+
+node *
+PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
+                          grammar *g, int start,
+                          perrdetail *err_ret, int flags)
+{
+    int iflags = flags;
+    return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
+                                               err_ret, &iflags);
+}
+
+node *
+PyParser_ParseStringObject(const char *s, PyObject *filename,
+                           grammar *g, int start,
+                           perrdetail *err_ret, int *flags)
+{
+    struct tok_state *tok;
+    int exec_input = start == file_input;
+
+    if (initerr(err_ret, filename) < 0)
+        return NULL;
+
+    if (*flags & PyPARSE_IGNORE_COOKIE)
+        tok = PyTokenizer_FromUTF8(s, exec_input);
+    else
+        tok = PyTokenizer_FromString(s, exec_input);
+    if (tok == NULL) {
+        err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
+        return NULL;
+    }
+
+#ifndef PGEN
+    Py_INCREF(err_ret->filename);
+    tok->filename = err_ret->filename;
+#endif
+    return parsetok(tok, g, start, err_ret, flags);
+}
+
+node *
+PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str,
+                          grammar *g, int start,
+                          perrdetail *err_ret, int *flags)
+{
+    node *n;
+    PyObject *filename = NULL;
+#ifndef PGEN
+    if (filename_str != NULL) {
+        filename = PyUnicode_DecodeFSDefault(filename_str);
+        if (filename == NULL) {
+            err_ret->error = E_ERROR;
+            return NULL;
+        }
+    }
+#endif
+    n = PyParser_ParseStringObject(s, filename, g, start, err_ret, flags);
+#ifndef PGEN
+    Py_XDECREF(filename);
+#endif
+    return n;
+}
+
+/* Parse input coming from a file.  Return error code, print some errors. */
+
+node *
+PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
+                   const char *ps1, const char *ps2,
+                   perrdetail *err_ret)
+{
+    return PyParser_ParseFileFlags(fp, filename, NULL,
+                                   g, start, ps1, ps2, err_ret, 0);
+}
+
+node *
+PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc,
+                        grammar *g, int start,
+                        const char *ps1, const char *ps2,
+                        perrdetail *err_ret, int flags)
+{
+    int iflags = flags;
+    return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1,
+                                     ps2, err_ret, &iflags);
+}
+
+node *
+PyParser_ParseFileObject(FILE *fp, PyObject *filename,
+                         const char *enc, grammar *g, int start,
+                         const char *ps1, const char *ps2,
+                         perrdetail *err_ret, int *flags)
+{
+    struct tok_state *tok;
+
+    if (initerr(err_ret, filename) < 0)
+        return NULL;
+
+    if ((tok = PyTokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) {
+        err_ret->error = E_NOMEM;
+        return NULL;
+    }
+#ifndef PGEN
+    Py_INCREF(err_ret->filename);
+    tok->filename = err_ret->filename;
+#endif
+    return parsetok(tok, g, start, err_ret, flags);
+}
+
+node *
+PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
+                          const char *enc, grammar *g, int start,
+                          const char *ps1, const char *ps2,
+                          perrdetail *err_ret, int *flags)
+{
+    node *n;
+    PyObject *fileobj = NULL;
+#ifndef PGEN
+    if (filename != NULL) {
+        fileobj = PyUnicode_DecodeFSDefault(filename);
+        if (fileobj == NULL) {
+            err_ret->error = E_ERROR;
+            return NULL;
+        }
+    }
+#endif
+    n = PyParser_ParseFileObject(fp, fileobj, enc, g,
+                                 start, ps1, ps2, err_ret, flags);
+#ifndef PGEN
+    Py_XDECREF(fileobj);
+#endif
+    return n;
+}
+
+#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
+#if 0
+static const char with_msg[] =
+"%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
+
+static const char as_msg[] =
+"%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
+
+static void
+warn(const char *msg, const char *filename, int lineno)
+{
+    if (filename == NULL)
+        filename = "<string>";
+    PySys_WriteStderr(msg, filename, lineno);
+}
+#endif
+#endif
+
+/* Parse input coming from the given tokenizer structure.
+   Return error code. */
+
+static node *
+parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
+         int *flags)
+{
+    parser_state *ps;
+    node *n;
+    int started = 0;
+
+    if ((ps = PyParser_New(g, start)) == NULL) {
+        err_ret->error = E_NOMEM;
+        PyTokenizer_Free(tok);
+        return NULL;
+    }
+#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
+    if (*flags & PyPARSE_BARRY_AS_BDFL)
+        ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
+#endif
+
+    for (;;) {
+        char *a, *b;
+        int type;
+        size_t len;
+        char *str;
+        int col_offset;
+
+        type = PyTokenizer_Get(tok, &a, &b);
+        if (type == ERRORTOKEN) {
+            err_ret->error = tok->done;
+            break;
+        }
+        if (type == ENDMARKER && started) {
+            type = NEWLINE; /* Add an extra newline */
+            started = 0;
+            /* Add the right number of dedent tokens,
+               except if a certain flag is given --
+               codeop.py uses this. */
+            if (tok->indent &&
+                !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
+            {
+                tok->pendin = -tok->indent;
+                tok->indent = 0;
+            }
+        }
+        else
+            started = 1;
+        len = (a != NULL && b != NULL) ? b - a : 0;
+        str = (char *) PyObject_MALLOC(len + 1);
+        if (str == NULL) {
+            err_ret->error = E_NOMEM;
+            break;
+        }
+        if (len > 0)
+            strncpy(str, a, len);
+        str[len] = '\0';
+
+#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
+        if (type == NOTEQUAL) {
+            if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
+                            strcmp(str, "!=")) {
+                PyObject_FREE(str);
+                err_ret->error = E_SYNTAX;
+                break;
+            }
+            else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
+                            strcmp(str, "<>")) {
+                PyObject_FREE(str);
+                err_ret->expected = NOTEQUAL;
+                err_ret->error = E_SYNTAX;
+                break;
+            }
+        }
+#endif
+        if (a != NULL && a >= tok->line_start) {
+            col_offset = Py_SAFE_DOWNCAST(a - tok->line_start,
+                                          intptr_t, int);
+        }
+        else {
+            col_offset = -1;
+        }
+
+        if ((err_ret->error =
+             PyParser_AddToken(ps, (int)type, str,
+                               tok->lineno, col_offset,
+                               &(err_ret->expected))) != E_OK) {
+            if (err_ret->error != E_DONE) {
+                PyObject_FREE(str);
+                err_ret->token = type;
+            }
+            break;
+        }
+    }
+
+    if (err_ret->error == E_DONE) {
+        n = ps->p_tree;
+        ps->p_tree = NULL;
+
+#ifndef PGEN
+        /* Check that the source for a single input statement really
+           is a single statement by looking at what is left in the
+           buffer after parsing.  Trailing whitespace and comments
+           are OK.  */
+        if (start == single_input) {
+            char *cur = tok->cur;
+            char c = *tok->cur;
+
+            for (;;) {
+                while (c == ' ' || c == '\t' || c == '\n' || c == '\014')
+                    c = *++cur;
+
+                if (!c)
+                    break;
+
+                if (c != '#') {
+                    err_ret->error = E_BADSINGLE;
+                    PyNode_Free(n);
+                    n = NULL;
+                    break;
+                }
+
+                /* Suck up comment. */
+                while (c && c != '\n')
+                    c = *++cur;
+            }
+        }
+#endif
+    }
+    else
+        n = NULL;
+
+#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
+    *flags = ps->p_flags;
+#endif
+    PyParser_Delete(ps);
+
+    if (n == NULL) {
+        if (tok->done == E_EOF)
+            err_ret->error = E_EOF;
+        err_ret->lineno = tok->lineno;
+        if (tok->buf != NULL) {
+            size_t len;
+            assert(tok->cur - tok->buf < INT_MAX);
+            err_ret->offset = (int)(tok->cur - tok->buf);
+            len = tok->inp - tok->buf;
+            err_ret->text = (char *) PyObject_MALLOC(len + 1);
+            if (err_ret->text != NULL) {
+                if (len > 0)
+                    strncpy(err_ret->text, tok->buf, len);
+                err_ret->text[len] = '\0';
+            }
+        }
+    } else if (tok->encoding != NULL) {
+        /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
+         * allocated using PyMem_
+         */
+        node* r = PyNode_New(encoding_decl);
+        if (r)
+            r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
+        if (!r || !r->n_str) {
+            err_ret->error = E_NOMEM;
+            if (r)
+                PyObject_FREE(r);
+            n = NULL;
+            goto done;
+        }
+        strcpy(r->n_str, tok->encoding);
+        PyMem_FREE(tok->encoding);
+        tok->encoding = NULL;
+        r->n_nchildren = 1;
+        r->n_child = n;
+        n = r;
+    }
+
+done:
+    PyTokenizer_Free(tok);
+
+    return n;
+}
+
+static int
+initerr(perrdetail *err_ret, PyObject *filename)
+{
+    err_ret->error = E_OK;
+    err_ret->lineno = 0;
+    err_ret->offset = 0;
+    err_ret->text = NULL;
+    err_ret->token = -1;
+    err_ret->expected = -1;
+#ifndef PGEN
+    if (filename) {
+        Py_INCREF(filename);
+        err_ret->filename = filename;
+    }
+    else {
+        err_ret->filename = PyUnicode_FromString("<string>");
+        if (err_ret->filename == NULL) {
+            err_ret->error = E_ERROR;
+            return -1;
+        }
+    }
+#endif
+    return 0;
+}
--- a/third_party/python/Parser/parsetok_pgen.c
+++ b/third_party/python/Parser/parsetok_pgen.c
@ -0,0 +1,2 @@
+#define PGEN
+#include "parsetok.c"
--- a/third_party/python/Parser/pgen.c
+++ b/third_party/python/Parser/pgen.c
@ -0,0 +1,724 @@
+/* Parser generator */
+
+/* For a description, see the comments at end of this file */
+
+#include "Python.h"
+#include "pgenheaders.h"
+#include "token.h"
+#include "node.h"
+#include "grammar.h"
+#include "metagrammar.h"
+#include "pgen.h"
+
+extern int Py_DebugFlag;
+extern int Py_IgnoreEnvironmentFlag; /* needed by Py_GETENV */
+
+
+/* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */
+
+typedef struct _nfaarc {
+    int         ar_label;
+    int         ar_arrow;
+} nfaarc;
+
+typedef struct _nfastate {
+    int         st_narcs;
+    nfaarc      *st_arc;
+} nfastate;
+
+typedef struct _nfa {
+    int                 nf_type;
+    char                *nf_name;
+    int                 nf_nstates;
+    nfastate            *nf_state;
+    int                 nf_start, nf_finish;
+} nfa;
+
+/* Forward */
+static void compile_rhs(labellist *ll,
+                        nfa *nf, node *n, int *pa, int *pb);
+static void compile_alt(labellist *ll,
+                        nfa *nf, node *n, int *pa, int *pb);
+static void compile_item(labellist *ll,
+                         nfa *nf, node *n, int *pa, int *pb);
+static void compile_atom(labellist *ll,
+                         nfa *nf, node *n, int *pa, int *pb);
+
+static int
+addnfastate(nfa *nf)
+{
+    nfastate *st;
+
+    nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state,
+                                sizeof(nfastate) * (nf->nf_nstates + 1));
+    if (nf->nf_state == NULL)
+        Py_FatalError("out of mem");
+    st = &nf->nf_state[nf->nf_nstates++];
+    st->st_narcs = 0;
+    st->st_arc = NULL;
+    return st - nf->nf_state;
+}
+
+static void
+addnfaarc(nfa *nf, int from, int to, int lbl)
+{
+    nfastate *st;
+    nfaarc *ar;
+
+    st = &nf->nf_state[from];
+    st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc,
+                                  sizeof(nfaarc) * (st->st_narcs + 1));
+    if (st->st_arc == NULL)
+        Py_FatalError("out of mem");
+    ar = &st->st_arc[st->st_narcs++];
+    ar->ar_label = lbl;
+    ar->ar_arrow = to;
+}
+
+static nfa *
+newnfa(char *name)
+{
+    nfa *nf;
+    static int type = NT_OFFSET; /* All types will be disjunct */
+
+    nf = (nfa *)PyObject_MALLOC(sizeof(nfa));
+    if (nf == NULL)
+        Py_FatalError("no mem for new nfa");
+    nf->nf_type = type++;
+    nf->nf_name = name; /* XXX strdup(name) ??? */
+    nf->nf_nstates = 0;
+    nf->nf_state = NULL;
+    nf->nf_start = nf->nf_finish = -1;
+    return nf;
+}
+
+typedef struct _nfagrammar {
+    int                 gr_nnfas;
+    nfa                 **gr_nfa;
+    labellist           gr_ll;
+} nfagrammar;
+
+/* Forward */
+static void compile_rule(nfagrammar *gr, node *n);
+
+static nfagrammar *
+newnfagrammar(void)
+{
+    nfagrammar *gr;
+
+    gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar));
+    if (gr == NULL)
+        Py_FatalError("no mem for new nfa grammar");
+    gr->gr_nnfas = 0;
+    gr->gr_nfa = NULL;
+    gr->gr_ll.ll_nlabels = 0;
+    gr->gr_ll.ll_label = NULL;
+    addlabel(&gr->gr_ll, ENDMARKER, "EMPTY");
+    return gr;
+}
+
+static void
+freenfagrammar(nfagrammar *gr)
+{
+    for (int i = 0; i < gr->gr_nnfas; i++) {
+        PyObject_FREE(gr->gr_nfa[i]->nf_state);
+    }
+    PyObject_FREE(gr->gr_nfa);
+    PyObject_FREE(gr);
+}
+
+static nfa *
+addnfa(nfagrammar *gr, char *name)
+{
+    nfa *nf;
+
+    nf = newnfa(name);
+    gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa,
+                                  sizeof(nfa*) * (gr->gr_nnfas + 1));
+    if (gr->gr_nfa == NULL)
+        Py_FatalError("out of mem");
+    gr->gr_nfa[gr->gr_nnfas++] = nf;
+    addlabel(&gr->gr_ll, NAME, nf->nf_name);
+    return nf;
+}
+
+#ifdef Py_DEBUG
+
+static const char REQNFMT[] = "metacompile: less than %d children\n";
+
+#define REQN(i, count) do { \
+    if (i < count) { \
+        fprintf(stderr, REQNFMT, count); \
+        Py_FatalError("REQN"); \
+    } \
+} while (0)
+
+#else
+#define REQN(i, count)  /* empty */
+#endif
+
+static nfagrammar *
+metacompile(node *n)
+{
+    nfagrammar *gr;
+    int i;
+
+    if (Py_DebugFlag)
+        printf("Compiling (meta-) parse tree into NFA grammar\n");
+    gr = newnfagrammar();
+    REQ(n, MSTART);
+    i = n->n_nchildren - 1; /* Last child is ENDMARKER */
+    n = n->n_child;
+    for (; --i >= 0; n++) {
+        if (n->n_type != NEWLINE)
+            compile_rule(gr, n);
+    }
+    return gr;
+}
+
+static void
+compile_rule(nfagrammar *gr, node *n)
+{
+    nfa *nf;
+
+    REQ(n, RULE);
+    REQN(n->n_nchildren, 4);
+    n = n->n_child;
+    REQ(n, NAME);
+    nf = addnfa(gr, n->n_str);
+    n++;
+    REQ(n, COLON);
+    n++;
+    REQ(n, RHS);
+    compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);
+    n++;
+    REQ(n, NEWLINE);
+}
+
+static void
+compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
+{
+    int i;
+    int a, b;
+
+    REQ(n, RHS);
+    i = n->n_nchildren;
+    REQN(i, 1);
+    n = n->n_child;
+    REQ(n, ALT);
+    compile_alt(ll, nf, n, pa, pb);
+    if (--i <= 0)
+        return;
+    n++;
+    a = *pa;
+    b = *pb;
+    *pa = addnfastate(nf);
+    *pb = addnfastate(nf);
+    addnfaarc(nf, *pa, a, EMPTY);
+    addnfaarc(nf, b, *pb, EMPTY);
+    for (; --i >= 0; n++) {
+        REQ(n, VBAR);
+        REQN(i, 1);
+        --i;
+        n++;
+        REQ(n, ALT);
+        compile_alt(ll, nf, n, &a, &b);
+        addnfaarc(nf, *pa, a, EMPTY);
+        addnfaarc(nf, b, *pb, EMPTY);
+    }
+}
+
+static void
+compile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
+{
+    int i;
+    int a, b;
+
+    REQ(n, ALT);
+    i = n->n_nchildren;
+    REQN(i, 1);
+    n = n->n_child;
+    REQ(n, ITEM);
+    compile_item(ll, nf, n, pa, pb);
+    --i;
+    n++;
+    for (; --i >= 0; n++) {
+        REQ(n, ITEM);
+        compile_item(ll, nf, n, &a, &b);
+        addnfaarc(nf, *pb, a, EMPTY);
+        *pb = b;
+    }
+}
+
+static void
+compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
+{
+    int i;
+    int a, b;
+
+    REQ(n, ITEM);
+    i = n->n_nchildren;
+    REQN(i, 1);
+    n = n->n_child;
+    if (n->n_type == LSQB) {
+        REQN(i, 3);
+        n++;
+        REQ(n, RHS);
+        *pa = addnfastate(nf);
+        *pb = addnfastate(nf);
+        addnfaarc(nf, *pa, *pb, EMPTY);
+        compile_rhs(ll, nf, n, &a, &b);
+        addnfaarc(nf, *pa, a, EMPTY);
+        addnfaarc(nf, b, *pb, EMPTY);
+        REQN(i, 1);
+        n++;
+        REQ(n, RSQB);
+    }
+    else {
+        compile_atom(ll, nf, n, pa, pb);
+        if (--i <= 0)
+            return;
+        n++;
+        addnfaarc(nf, *pb, *pa, EMPTY);
+        if (n->n_type == STAR)
+            *pb = *pa;
+        else
+            REQ(n, PLUS);
+    }
+}
+
+static void
+compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
+{
+    int i;
+
+    REQ(n, ATOM);
+    i = n->n_nchildren;
+    (void)i; /* Don't warn about set but unused */
+    REQN(i, 1);
+    n = n->n_child;
+    if (n->n_type == LPAR) {
+        REQN(i, 3);
+        n++;
+        REQ(n, RHS);
+        compile_rhs(ll, nf, n, pa, pb);
+        n++;
+        REQ(n, RPAR);
+    }
+    else if (n->n_type == NAME || n->n_type == STRING) {
+        *pa = addnfastate(nf);
+        *pb = addnfastate(nf);
+        addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));
+    }
+    else
+        REQ(n, NAME);
+}
+
+static void
+dumpstate(labellist *ll, nfa *nf, int istate)
+{
+    nfastate *st;
+    int i;
+    nfaarc *ar;
+
+    printf("%c%2d%c",
+        istate == nf->nf_start ? '*' : ' ',
+        istate,
+        istate == nf->nf_finish ? '.' : ' ');
+    st = &nf->nf_state[istate];
+    ar = st->st_arc;
+    for (i = 0; i < st->st_narcs; i++) {
+        if (i > 0)
+            printf("\n    ");
+        printf("-> %2d  %s", ar->ar_arrow,
+            PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label]));
+        ar++;
+    }
+    printf("\n");
+}
+
+static void
+dumpnfa(labellist *ll, nfa *nf)
+{
+    int i;
+
+    printf("NFA '%s' has %d states; start %d, finish %d\n",
+        nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish);
+    for (i = 0; i < nf->nf_nstates; i++)
+        dumpstate(ll, nf, i);
+}
+
+
+/* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */
+
+static void
+addclosure(bitset ss, nfa *nf, int istate)
+{
+    if (addbit(ss, istate)) {
+        nfastate *st = &nf->nf_state[istate];
+        nfaarc *ar = st->st_arc;
+        int i;
+
+        for (i = st->st_narcs; --i >= 0; ) {
+            if (ar->ar_label == EMPTY)
+                addclosure(ss, nf, ar->ar_arrow);
+            ar++;
+        }
+    }
+}
+
+typedef struct _ss_arc {
+    bitset      sa_bitset;
+    int         sa_arrow;
+    int         sa_label;
+} ss_arc;
+
+typedef struct _ss_state {
+    bitset      ss_ss;
+    int         ss_narcs;
+    struct _ss_arc      *ss_arc;
+    int         ss_deleted;
+    int         ss_finish;
+    int         ss_rename;
+} ss_state;
+
+typedef struct _ss_dfa {
+    int         sd_nstates;
+    ss_state *sd_state;
+} ss_dfa;
+
+/* Forward */
+static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
+                       labellist *ll, const char *msg);
+static void simplify(int xx_nstates, ss_state *xx_state);
+static void convert(dfa *d, int xx_nstates, ss_state *xx_state);
+
+static void
+makedfa(nfagrammar *gr, nfa *nf, dfa *d)
+{
+    int nbits = nf->nf_nstates;
+    bitset ss;
+    int xx_nstates;
+    ss_state *xx_state, *yy;
+    ss_arc *zz;
+    int istate, jstate, iarc, jarc, ibit;
+    nfastate *st;
+    nfaarc *ar;
+
+    ss = newbitset(nbits);
+    addclosure(ss, nf, nf->nf_start);
+    xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state));
+    if (xx_state == NULL)
+        Py_FatalError("no mem for xx_state in makedfa");
+    xx_nstates = 1;
+    yy = &xx_state[0];
+    yy->ss_ss = ss;
+    yy->ss_narcs = 0;
+    yy->ss_arc = NULL;
+    yy->ss_deleted = 0;
+    yy->ss_finish = testbit(ss, nf->nf_finish);
+    if (yy->ss_finish)
+        printf("Error: nonterminal '%s' may produce empty.\n",
+            nf->nf_name);
+
+    /* This algorithm is from a book written before
+       the invention of structured programming... */
+
+    /* For each unmarked state... */
+    for (istate = 0; istate < xx_nstates; ++istate) {
+        size_t size;
+        yy = &xx_state[istate];
+        ss = yy->ss_ss;
+        /* For all its states... */
+        for (ibit = 0; ibit < nf->nf_nstates; ++ibit) {
+            if (!testbit(ss, ibit))
+                continue;
+            st = &nf->nf_state[ibit];
+            /* For all non-empty arcs from this state... */
+            for (iarc = 0; iarc < st->st_narcs; iarc++) {
+                ar = &st->st_arc[iarc];
+                if (ar->ar_label == EMPTY)
+                    continue;
+                /* Look up in list of arcs from this state */
+                for (jarc = 0; jarc < yy->ss_narcs; ++jarc) {
+                    zz = &yy->ss_arc[jarc];
+                    if (ar->ar_label == zz->sa_label)
+                        goto found;
+                }
+                /* Add new arc for this state */
+                size = sizeof(ss_arc) * (yy->ss_narcs + 1);
+                yy->ss_arc = (ss_arc *)PyObject_REALLOC(
+                                            yy->ss_arc, size);
+                if (yy->ss_arc == NULL)
+                    Py_FatalError("out of mem");
+                zz = &yy->ss_arc[yy->ss_narcs++];
+                zz->sa_label = ar->ar_label;
+                zz->sa_bitset = newbitset(nbits);
+                zz->sa_arrow = -1;
+             found:             ;
+                /* Add destination */
+                addclosure(zz->sa_bitset, nf, ar->ar_arrow);
+            }
+        }
+        /* Now look up all the arrow states */
+        for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) {
+            zz = &xx_state[istate].ss_arc[jarc];
+            for (jstate = 0; jstate < xx_nstates; jstate++) {
+                if (samebitset(zz->sa_bitset,
+                    xx_state[jstate].ss_ss, nbits)) {
+                    zz->sa_arrow = jstate;
+                    goto done;
+                }
+            }
+            size = sizeof(ss_state) * (xx_nstates + 1);
+            xx_state = (ss_state *)PyObject_REALLOC(xx_state,
+                                                        size);
+            if (xx_state == NULL)
+                Py_FatalError("out of mem");
+            zz->sa_arrow = xx_nstates;
+            yy = &xx_state[xx_nstates++];
+            yy->ss_ss = zz->sa_bitset;
+            yy->ss_narcs = 0;
+            yy->ss_arc = NULL;
+            yy->ss_deleted = 0;
+            yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish);
+         done:          ;
+        }
+    }
+
+    if (Py_DebugFlag)
+        printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
+                                        "before minimizing");
+
+    simplify(xx_nstates, xx_state);
+
+    if (Py_DebugFlag)
+        printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
+                                        "after minimizing");
+
+    convert(d, xx_nstates, xx_state);
+
+    for (int i = 0; i < xx_nstates; i++) {
+        for (int j = 0; j < xx_state[i].ss_narcs; j++)
+            delbitset(xx_state[i].ss_arc[j].sa_bitset);
+        PyObject_FREE(xx_state[i].ss_arc);
+    }
+    PyObject_FREE(xx_state);
+}
+
+static void
+printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
+           labellist *ll, const char *msg)
+{
+    int i, ibit, iarc;
+    ss_state *yy;
+    ss_arc *zz;
+
+    printf("Subset DFA %s\n", msg);
+    for (i = 0; i < xx_nstates; i++) {
+        yy = &xx_state[i];
+        if (yy->ss_deleted)
+            continue;
+        printf(" Subset %d", i);
+        if (yy->ss_finish)
+            printf(" (finish)");
+        printf(" { ");
+        for (ibit = 0; ibit < nbits; ibit++) {
+            if (testbit(yy->ss_ss, ibit))
+                printf("%d ", ibit);
+        }
+        printf("}\n");
+        for (iarc = 0; iarc < yy->ss_narcs; iarc++) {
+            zz = &yy->ss_arc[iarc];
+            printf("  Arc to state %d, label %s\n",
+                zz->sa_arrow,
+                PyGrammar_LabelRepr(
+                    &ll->ll_label[zz->sa_label]));
+        }
+    }
+}
+
+
+/* PART THREE -- SIMPLIFY DFA */
+
+/* Simplify the DFA by repeatedly eliminating states that are
+   equivalent to another oner.  This is NOT Algorithm 3.3 from
+   [Aho&Ullman 77].  It does not always finds the minimal DFA,
+   but it does usually make a much smaller one...  (For an example
+   of sub-optimal behavior, try S: x a b+ | y a b+.)
+*/
+
+static int
+samestate(ss_state *s1, ss_state *s2)
+{
+    int i;
+
+    if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish)
+        return 0;
+    for (i = 0; i < s1->ss_narcs; i++) {
+        if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow ||
+            s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label)
+            return 0;
+    }
+    return 1;
+}
+
+static void
+renamestates(int xx_nstates, ss_state *xx_state, int from, int to)
+{
+    int i, j;
+
+    if (Py_DebugFlag)
+        printf("Rename state %d to %d.\n", from, to);
+    for (i = 0; i < xx_nstates; i++) {
+        if (xx_state[i].ss_deleted)
+            continue;
+        for (j = 0; j < xx_state[i].ss_narcs; j++) {
+            if (xx_state[i].ss_arc[j].sa_arrow == from)
+                xx_state[i].ss_arc[j].sa_arrow = to;
+        }
+    }
+}
+
+static void
+simplify(int xx_nstates, ss_state *xx_state)
+{
+    int changes;
+    int i, j;
+
+    do {
+        changes = 0;
+        for (i = 1; i < xx_nstates; i++) {
+            if (xx_state[i].ss_deleted)
+                continue;
+            for (j = 0; j < i; j++) {
+                if (xx_state[j].ss_deleted)
+                    continue;
+                if (samestate(&xx_state[i], &xx_state[j])) {
+                    xx_state[i].ss_deleted++;
+                    renamestates(xx_nstates, xx_state,
+                                 i, j);
+                    changes++;
+                    break;
+                }
+            }
+        }
+    } while (changes);
+}
+
+
+/* PART FOUR -- GENERATE PARSING TABLES */
+
+/* Convert the DFA into a grammar that can be used by our parser */
+
+static void
+convert(dfa *d, int xx_nstates, ss_state *xx_state)
+{
+    int i, j;
+    ss_state *yy;
+    ss_arc *zz;
+
+    for (i = 0; i < xx_nstates; i++) {
+        yy = &xx_state[i];
+        if (yy->ss_deleted)
+            continue;
+        yy->ss_rename = addstate(d);
+    }
+
+    for (i = 0; i < xx_nstates; i++) {
+        yy = &xx_state[i];
+        if (yy->ss_deleted)
+            continue;
+        for (j = 0; j < yy->ss_narcs; j++) {
+            zz = &yy->ss_arc[j];
+            addarc(d, yy->ss_rename,
+                xx_state[zz->sa_arrow].ss_rename,
+                zz->sa_label);
+        }
+        if (yy->ss_finish)
+            addarc(d, yy->ss_rename, yy->ss_rename, 0);
+    }
+
+    d->d_initial = 0;
+}
+
+
+/* PART FIVE -- GLUE IT ALL TOGETHER */
+
+static grammar *
+maketables(nfagrammar *gr)
+{
+    int i;
+    nfa *nf;
+    dfa *d;
+    grammar *g;
+
+    if (gr->gr_nnfas == 0)
+        return NULL;
+    g = newgrammar(gr->gr_nfa[0]->nf_type);
+                    /* XXX first rule must be start rule */
+    g->g_ll = gr->gr_ll;
+
+    for (i = 0; i < gr->gr_nnfas; i++) {
+        nf = gr->gr_nfa[i];
+        if (Py_DebugFlag) {
+            printf("Dump of NFA for '%s' ...\n", nf->nf_name);
+            dumpnfa(&gr->gr_ll, nf);
+            printf("Making DFA for '%s' ...\n", nf->nf_name);
+        }
+        d = adddfa(g, nf->nf_type, nf->nf_name);
+        makedfa(gr, gr->gr_nfa[i], d);
+    }
+
+    return g;
+}
+
+grammar *
+pgen(node *n)
+{
+    nfagrammar *gr;
+    grammar *g;
+
+    gr = metacompile(n);
+    g = maketables(gr);
+    translatelabels(g);
+    addfirstsets(g);
+    freenfagrammar(gr);
+    return g;
+}
+
+grammar *
+Py_pgen(node *n)
+{
+  return pgen(n);
+}
+
+/*
+
+Description
+-----------
+
+Input is a grammar in extended BNF (using * for repetition, + for
+at-least-once repetition, [] for optional parts, | for alternatives and
+() for grouping).  This has already been parsed and turned into a parse
+tree.
+
+Each rule is considered as a regular expression in its own right.
+It is turned into a Non-deterministic Finite Automaton (NFA), which
+is then turned into a Deterministic Finite Automaton (DFA), which is then
+optimized to reduce the number of states.  See [Aho&Ullman 77] chapter 3,
+or similar compiler books (this technique is more often used for lexical
+analyzers).
+
+The DFA's are used by the parser as parsing tables in a special way
+that's probably unique.  Before they are usable, the FIRST sets of all
+non-terminals are computed.
+
+Reference
+---------
+
+[Aho&Ullman 77]
+    Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
+    (first edition)
+
+*/
--- a/third_party/python/Parser/pgenmain.c
+++ b/third_party/python/Parser/pgenmain.c
@ -0,0 +1,189 @@
+
+/* Parser generator main program */
+
+/* This expects a filename containing the grammar as argv[1] (UNIX)
+   or asks the console for such a file name (THINK C).
+   It writes its output on two files in the current directory:
+   - "graminit.c" gets the grammar as a bunch of initialized data
+   - "graminit.h" gets the grammar's non-terminals as #defines.
+   Error messages and status info during the generation process are
+   written to stdout, or sometimes to stderr. */
+
+/* XXX TO DO:
+   - check for duplicate definitions of names (instead of fatal err)
+*/
+
+#define PGEN
+
+#include "Python.h"
+#include "pgenheaders.h"
+#include "grammar.h"
+#include "node.h"
+#include "parsetok.h"
+#include "pgen.h"
+
+int Py_DebugFlag;
+int Py_VerboseFlag;
+int Py_IgnoreEnvironmentFlag;
+
+/* Forward */
+grammar *getgrammar(const char *filename);
+
+void Py_Exit(int) _Py_NO_RETURN;
+
+void
+Py_Exit(int sts)
+{
+    exit(sts);
+}
+
+#ifdef WITH_THREAD
+/* Needed by obmalloc.c */
+int PyGILState_Check(void)
+{ return 1; }
+#endif
+
+void _PyMem_DumpTraceback(int fd, const void *ptr)
+{}
+
+int
+main(int argc, char **argv)
+{
+    grammar *g;
+    FILE *fp;
+    char *filename, *graminit_h, *graminit_c;
+
+    if (argc != 4) {
+        fprintf(stderr,
+            "usage: %s grammar graminit.h graminit.c\n", argv[0]);
+        Py_Exit(2);
+    }
+    filename = argv[1];
+    graminit_h = argv[2];
+    graminit_c = argv[3];
+    g = getgrammar(filename);
+    fp = fopen(graminit_c, "w");
+    if (fp == NULL) {
+        perror(graminit_c);
+        Py_Exit(1);
+    }
+    if (Py_DebugFlag)
+        printf("Writing %s ...\n", graminit_c);
+    printgrammar(g, fp);
+    fclose(fp);
+    fp = fopen(graminit_h, "w");
+    if (fp == NULL) {
+        perror(graminit_h);
+        Py_Exit(1);
+    }
+    if (Py_DebugFlag)
+        printf("Writing %s ...\n", graminit_h);
+    printnonterminals(g, fp);
+    fclose(fp);
+    freegrammar(g);
+    Py_Exit(0);
+    return 0; /* Make gcc -Wall happy */
+}
+
+grammar *
+getgrammar(const char *filename)
+{
+    FILE *fp;
+    node *n;
+    grammar *g0, *g;
+    perrdetail err;
+
+    fp = fopen(filename, "r");
+    if (fp == NULL) {
+        perror(filename);
+        Py_Exit(1);
+    }
+    g0 = meta_grammar();
+    n = PyParser_ParseFile(fp, filename, g0, g0->g_start,
+                  (char *)NULL, (char *)NULL, &err);
+    fclose(fp);
+    if (n == NULL) {
+        fprintf(stderr, "Parsing error %d, line %d.\n",
+            err.error, err.lineno);
+        if (err.text != NULL) {
+            size_t len;
+            int i;
+            fprintf(stderr, "%s", err.text);
+            len = strlen(err.text);
+            if (len == 0 || err.text[len-1] != '\n')
+                fprintf(stderr, "\n");
+            for (i = 0; i < err.offset; i++) {
+                if (err.text[i] == '\t')
+                    putc('\t', stderr);
+                else
+                    putc(' ', stderr);
+            }
+            fprintf(stderr, "^\n");
+            PyObject_FREE(err.text);
+        }
+        Py_Exit(1);
+    }
+    g = pgen(n);
+    PyNode_Free(n);
+    if (g == NULL) {
+        printf("Bad grammar.\n");
+        Py_Exit(1);
+    }
+    return g;
+}
+
+/* Can't happen in pgen */
+PyObject*
+PyErr_Occurred()
+{
+    return 0;
+}
+
+void
+Py_FatalError(const char *msg)
+{
+    fprintf(stderr, "pgen: FATAL ERROR: %s\n", msg);
+    Py_Exit(1);
+}
+
+/* No-nonsense my_readline() for tokenizer.c */
+
+char *
+PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
+{
+    size_t n = 1000;
+    char *p = (char *)PyMem_MALLOC(n);
+    char *q;
+    if (p == NULL)
+        return NULL;
+    fprintf(stderr, "%s", prompt);
+    q = fgets(p, n, sys_stdin);
+    if (q == NULL) {
+        *p = '\0';
+        return p;
+    }
+    n = strlen(p);
+    if (n > 0 && p[n-1] != '\n')
+        p[n-1] = '\n';
+    return (char *)PyMem_REALLOC(p, n+1);
+}
+
+/* No-nonsense fgets */
+char *
+Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
+{
+    return fgets(buf, n, stream);
+}
+
+
+#include <stdarg.h>
+
+void
+PySys_WriteStderr(const char *format, ...)
+{
+    va_list va;
+
+    va_start(va, format);
+    vfprintf(stderr, format, va);
+    va_end(va);
+}
--- a/third_party/python/Parser/printgrammar.c
+++ b/third_party/python/Parser/printgrammar.c
@ -0,0 +1,120 @@
+
+/* Print a bunch of C initializers that represent a grammar */
+
+#define PGEN
+
+#include "pgenheaders.h"
+#include "grammar.h"
+
+/* Forward */
+static void printarcs(int, dfa *, FILE *);
+static void printstates(grammar *, FILE *);
+static void printdfas(grammar *, FILE *);
+static void printlabels(grammar *, FILE *);
+
+void
+printgrammar(grammar *g, FILE *fp)
+{
+    fprintf(fp, "/* Generated by Parser/pgen */\n\n");
+    fprintf(fp, "#include \"pgenheaders.h\"\n");
+    fprintf(fp, "#include \"grammar.h\"\n");
+    fprintf(fp, "PyAPI_DATA(grammar) _PyParser_Grammar;\n");
+    printdfas(g, fp);
+    printlabels(g, fp);
+    fprintf(fp, "grammar _PyParser_Grammar = {\n");
+    fprintf(fp, "    %d,\n", g->g_ndfas);
+    fprintf(fp, "    dfas,\n");
+    fprintf(fp, "    {%d, labels},\n", g->g_ll.ll_nlabels);
+    fprintf(fp, "    %d\n", g->g_start);
+    fprintf(fp, "};\n");
+}
+
+void
+printnonterminals(grammar *g, FILE *fp)
+{
+    dfa *d;
+    int i;
+
+    fprintf(fp, "/* Generated by Parser/pgen */\n\n");
+
+    d = g->g_dfa;
+    for (i = g->g_ndfas; --i >= 0; d++)
+        fprintf(fp, "#define %s %d\n", d->d_name, d->d_type);
+}
+
+static void
+printarcs(int i, dfa *d, FILE *fp)
+{
+    arc *a;
+    state *s;
+    int j, k;
+
+    s = d->d_state;
+    for (j = 0; j < d->d_nstates; j++, s++) {
+        fprintf(fp, "static arc arcs_%d_%d[%d] = {\n",
+            i, j, s->s_narcs);
+        a = s->s_arc;
+        for (k = 0; k < s->s_narcs; k++, a++)
+            fprintf(fp, "    {%d, %d},\n", a->a_lbl, a->a_arrow);
+        fprintf(fp, "};\n");
+    }
+}
+
+static void
+printstates(grammar *g, FILE *fp)
+{
+    state *s;
+    dfa *d;
+    int i, j;
+
+    d = g->g_dfa;
+    for (i = 0; i < g->g_ndfas; i++, d++) {
+        printarcs(i, d, fp);
+        fprintf(fp, "static state states_%d[%d] = {\n",
+            i, d->d_nstates);
+        s = d->d_state;
+        for (j = 0; j < d->d_nstates; j++, s++)
+            fprintf(fp, "    {%d, arcs_%d_%d},\n",
+                s->s_narcs, i, j);
+        fprintf(fp, "};\n");
+    }
+}
+
+static void
+printdfas(grammar *g, FILE *fp)
+{
+    dfa *d;
+    int i, j, n;
+
+    printstates(g, fp);
+    fprintf(fp, "static dfa dfas[%d] = {\n", g->g_ndfas);
+    d = g->g_dfa;
+    for (i = 0; i < g->g_ndfas; i++, d++) {
+        fprintf(fp, "    {%d, \"%s\", %d, %d, states_%d,\n",
+            d->d_type, d->d_name, d->d_initial, d->d_nstates, i);
+        fprintf(fp, "     \"");
+        n = NBYTES(g->g_ll.ll_nlabels);
+        for (j = 0; j < n; j++)
+            fprintf(fp, "\\%03o", d->d_first[j] & 0xff);
+        fprintf(fp, "\"},\n");
+    }
+    fprintf(fp, "};\n");
+}
+
+static void
+printlabels(grammar *g, FILE *fp)
+{
+    label *l;
+    int i;
+
+    fprintf(fp, "static label labels[%d] = {\n", g->g_ll.ll_nlabels);
+    l = g->g_ll.ll_label;
+    for (i = g->g_ll.ll_nlabels; --i >= 0; l++) {
+        if (l->lb_str == NULL)
+            fprintf(fp, "    {%d, 0},\n", l->lb_type);
+        else
+            fprintf(fp, "    {%d, \"%s\"},\n",
+                l->lb_type, l->lb_str);
+    }
+    fprintf(fp, "};\n");
+}
--- a/third_party/python/Parser/tokenizer.c
+++ b/third_party/python/Parser/tokenizer.c
--- a/third_party/python/Parser/tokenizer.h
+++ b/third_party/python/Parser/tokenizer.h
@ -0,0 +1,89 @@
+#ifndef Py_TOKENIZER_H
+#define Py_TOKENIZER_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "object.h"
+
+/* Tokenizer interface */
+
+#include "token.h"      /* For token types */
+
+#define MAXINDENT 100   /* Max indentation level */
+
+enum decoding_state {
+    STATE_INIT,
+    STATE_RAW,
+    STATE_NORMAL        /* have a codec associated with input */
+};
+
+/* Tokenizer state */
+struct tok_state {
+    /* Input state; buf <= cur <= inp <= end */
+    /* NB an entire line is held in the buffer */
+    char *buf;          /* Input buffer, or NULL; malloc'ed if fp != NULL */
+    char *cur;          /* Next character in buffer */
+    char *inp;          /* End of data in buffer */
+    char *end;          /* End of input buffer if buf != NULL */
+    char *start;        /* Start of current token if not NULL */
+    int done;           /* E_OK normally, E_EOF at EOF, otherwise error code */
+    /* NB If done != E_OK, cur must be == inp!!! */
+    FILE *fp;           /* Rest of input; NULL if tokenizing a string */
+    int tabsize;        /* Tab spacing */
+    int indent;         /* Current indentation index */
+    int indstack[MAXINDENT];            /* Stack of indents */
+    int atbol;          /* Nonzero if at begin of new line */
+    int pendin;         /* Pending indents (if > 0) or dedents (if < 0) */
+    const char *prompt, *nextprompt;          /* For interactive prompting */
+    int lineno;         /* Current line number */
+    int level;          /* () [] {} Parentheses nesting level */
+            /* Used to allow free continuations inside them */
+    /* Stuff for checking on different tab sizes */
+#ifndef PGEN
+    /* pgen doesn't have access to Python codecs, it cannot decode the input
+       filename. The bytes filename might be kept, but it is only used by
+       indenterror() and it is not really needed: pgen only compiles one file
+       (Grammar/Grammar). */
+    PyObject *filename;
+#endif
+    int altwarning;     /* Issue warning if alternate tabs don't match */
+    int alterror;       /* Issue error if alternate tabs don't match */
+    int alttabsize;     /* Alternate tab spacing */
+    int altindstack[MAXINDENT];         /* Stack of alternate indents */
+    /* Stuff for PEP 0263 */
+    enum decoding_state decoding_state;
+    int decoding_erred;         /* whether erred in decoding  */
+    int read_coding_spec;       /* whether 'coding:...' has been read  */
+    char *encoding;         /* Source encoding. */
+    int cont_line;          /* whether we are in a continuation line. */
+    const char* line_start;     /* pointer to start of current line */
+#ifndef PGEN
+    PyObject *decoding_readline; /* open(...).readline */
+    PyObject *decoding_buffer;
+#endif
+    const char* enc;        /* Encoding for the current str. */
+    const char* str;
+    const char* input; /* Tokenizer's newline translated copy of the string. */
+
+    /* async/await related fields; can be removed in 3.7 when async and await
+       become normal keywords. */
+    int async_def;        /* =1 if tokens are inside an 'async def' body. */
+    int async_def_indent; /* Indentation level of the outermost 'async def'. */
+    int async_def_nl;     /* =1 if the outermost 'async def' had at least one
+                             NEWLINE token after it. */
+};
+
+extern struct tok_state *PyTokenizer_FromString(const char *, int);
+extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
+extern struct tok_state *PyTokenizer_FromFile(FILE *, const char*,
+                                              const char *, const char *);
+extern void PyTokenizer_Free(struct tok_state *);
+extern int PyTokenizer_Get(struct tok_state *, char **, char **);
+extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
+                                          int len, int *offset);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_TOKENIZER_H */
--- a/third_party/python/Parser/tokenizer_pgen.c
+++ b/third_party/python/Parser/tokenizer_pgen.c
@ -0,0 +1,2 @@
+#define PGEN
+#include "tokenizer.c"