python-3.6.zip added from Github

README.cosmo contains the necessary links.
This commit is contained in:
ahgamut 2021-08-08 09:38:33 +05:30 committed by Justine Tunney
parent 75fc601ff5
commit 0c4c56ff39
4219 changed files with 1968626 additions and 0 deletions

132
third_party/python/Parser/Python.asdl vendored Normal file
View file

@ -0,0 +1,132 @@
-- ASDL's 7 builtin types are:
-- identifier, int, string, bytes, object, singleton, constant
--
-- singleton: None, True or False
-- constant can be None, whereas None means "no value" for object.
module Python
{
mod = Module(stmt* body)
| Interactive(stmt* body)
| Expression(expr body)
-- not really an actual node but useful in Jython's typesystem.
| Suite(stmt* body)
stmt = FunctionDef(identifier name, arguments args,
stmt* body, expr* decorator_list, expr? returns)
| AsyncFunctionDef(identifier name, arguments args,
stmt* body, expr* decorator_list, expr? returns)
| ClassDef(identifier name,
expr* bases,
keyword* keywords,
stmt* body,
expr* decorator_list)
| Return(expr? value)
| Delete(expr* targets)
| Assign(expr* targets, expr value)
| AugAssign(expr target, operator op, expr value)
-- 'simple' indicates that we annotate simple name without parens
| AnnAssign(expr target, expr annotation, expr? value, int simple)
-- use 'orelse' because else is a keyword in target languages
| For(expr target, expr iter, stmt* body, stmt* orelse)
| AsyncFor(expr target, expr iter, stmt* body, stmt* orelse)
| While(expr test, stmt* body, stmt* orelse)
| If(expr test, stmt* body, stmt* orelse)
| With(withitem* items, stmt* body)
| AsyncWith(withitem* items, stmt* body)
| Raise(expr? exc, expr? cause)
| Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody)
| Assert(expr test, expr? msg)
| Import(alias* names)
| ImportFrom(identifier? module, alias* names, int? level)
| Global(identifier* names)
| Nonlocal(identifier* names)
| Expr(expr value)
| Pass | Break | Continue
-- XXX Jython will be different
-- col_offset is the byte offset in the utf8 string the parser uses
attributes (int lineno, int col_offset)
-- BoolOp() can use left & right?
expr = BoolOp(boolop op, expr* values)
| BinOp(expr left, operator op, expr right)
| UnaryOp(unaryop op, expr operand)
| Lambda(arguments args, expr body)
| IfExp(expr test, expr body, expr orelse)
| Dict(expr* keys, expr* values)
| Set(expr* elts)
| ListComp(expr elt, comprehension* generators)
| SetComp(expr elt, comprehension* generators)
| DictComp(expr key, expr value, comprehension* generators)
| GeneratorExp(expr elt, comprehension* generators)
-- the grammar constrains where yield expressions can occur
| Await(expr value)
| Yield(expr? value)
| YieldFrom(expr value)
-- need sequences for compare to distinguish between
-- x < 4 < 3 and (x < 4) < 3
| Compare(expr left, cmpop* ops, expr* comparators)
| Call(expr func, expr* args, keyword* keywords)
| Num(object n) -- a number as a PyObject.
| Str(string s) -- need to specify raw, unicode, etc?
| FormattedValue(expr value, int? conversion, expr? format_spec)
| JoinedStr(expr* values)
| Bytes(bytes s)
| NameConstant(singleton value)
| Ellipsis
| Constant(constant value)
-- the following expression can appear in assignment context
| Attribute(expr value, identifier attr, expr_context ctx)
| Subscript(expr value, slice slice, expr_context ctx)
| Starred(expr value, expr_context ctx)
| Name(identifier id, expr_context ctx)
| List(expr* elts, expr_context ctx)
| Tuple(expr* elts, expr_context ctx)
-- col_offset is the byte offset in the utf8 string the parser uses
attributes (int lineno, int col_offset)
expr_context = Load | Store | Del | AugLoad | AugStore | Param
slice = Slice(expr? lower, expr? upper, expr? step)
| ExtSlice(slice* dims)
| Index(expr value)
boolop = And | Or
operator = Add | Sub | Mult | MatMult | Div | Mod | Pow | LShift
| RShift | BitOr | BitXor | BitAnd | FloorDiv
unaryop = Invert | Not | UAdd | USub
cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn
comprehension = (expr target, expr iter, expr* ifs, int is_async)
excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body)
attributes (int lineno, int col_offset)
arguments = (arg* args, arg? vararg, arg* kwonlyargs, expr* kw_defaults,
arg? kwarg, expr* defaults)
arg = (identifier arg, expr? annotation)
attributes (int lineno, int col_offset)
-- keyword arguments supplied to call (NULL identifier for **kwargs)
keyword = (identifier? arg, expr value)
-- import name with optional 'as' alias.
alias = (identifier name, identifier? asname)
withitem = (expr context_expr, expr? optional_vars)
}

125
third_party/python/Parser/acceler.c vendored Normal file
View file

@ -0,0 +1,125 @@
/* Parser accelerator module */
/* The parser as originally conceived had disappointing performance.
This module does some precomputation that speeds up the selection
of a DFA based upon a token, turning a search through an array
into a simple indexing operation. The parser now cannot work
without the accelerators installed. Note that the accelerators
are installed dynamically when the parser is initialized, they
are not part of the static data structure written on graminit.[ch]
by the parser generator. */
#include "pgenheaders.h"
#include "grammar.h"
#include "node.h"
#include "token.h"
#include "parser.h"
/* Forward references */
static void fixdfa(grammar *, dfa *);
static void fixstate(grammar *, state *);
void
PyGrammar_AddAccelerators(grammar *g)
{
dfa *d;
int i;
d = g->g_dfa;
for (i = g->g_ndfas; --i >= 0; d++)
fixdfa(g, d);
g->g_accel = 1;
}
void
PyGrammar_RemoveAccelerators(grammar *g)
{
dfa *d;
int i;
g->g_accel = 0;
d = g->g_dfa;
for (i = g->g_ndfas; --i >= 0; d++) {
state *s;
int j;
s = d->d_state;
for (j = 0; j < d->d_nstates; j++, s++) {
if (s->s_accel)
PyObject_FREE(s->s_accel);
s->s_accel = NULL;
}
}
}
static void
fixdfa(grammar *g, dfa *d)
{
state *s;
int j;
s = d->d_state;
for (j = 0; j < d->d_nstates; j++, s++)
fixstate(g, s);
}
static void
fixstate(grammar *g, state *s)
{
arc *a;
int k;
int *accel;
int nl = g->g_ll.ll_nlabels;
s->s_accept = 0;
accel = (int *) PyObject_MALLOC(nl * sizeof(int));
if (accel == NULL) {
fprintf(stderr, "no mem to build parser accelerators\n");
exit(1);
}
for (k = 0; k < nl; k++)
accel[k] = -1;
a = s->s_arc;
for (k = s->s_narcs; --k >= 0; a++) {
int lbl = a->a_lbl;
label *l = &g->g_ll.ll_label[lbl];
int type = l->lb_type;
if (a->a_arrow >= (1 << 7)) {
printf("XXX too many states!\n");
continue;
}
if (ISNONTERMINAL(type)) {
dfa *d1 = PyGrammar_FindDFA(g, type);
int ibit;
if (type - NT_OFFSET >= (1 << 7)) {
printf("XXX too high nonterminal number!\n");
continue;
}
for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) {
if (testbit(d1->d_first, ibit)) {
if (accel[ibit] != -1)
printf("XXX ambiguity!\n");
accel[ibit] = a->a_arrow | (1 << 7) |
((type - NT_OFFSET) << 8);
}
}
}
else if (lbl == EMPTY)
s->s_accept = 1;
else if (lbl >= 0 && lbl < nl)
accel[lbl] = a->a_arrow;
}
while (nl > 0 && accel[nl-1] == -1)
nl--;
for (k = 0; k < nl && accel[k] == -1;)
k++;
if (k < nl) {
int i;
s->s_accel = (int *) PyObject_MALLOC((nl-k) * sizeof(int));
if (s->s_accel == NULL) {
fprintf(stderr, "no mem to add parser accelerators\n");
exit(1);
}
s->s_lower = k;
s->s_upper = nl;
for (i = 0; k < nl; i++, k++)
s->s_accel[i] = accel[k];
}
PyObject_FREE(accel);
}

376
third_party/python/Parser/asdl.py vendored Normal file
View file

@ -0,0 +1,376 @@
#-------------------------------------------------------------------------------
# Parser for ASDL [1] definition files. Reads in an ASDL description and parses
# it into an AST that describes it.
#
# The EBNF we're parsing here: Figure 1 of the paper [1]. Extended to support
# modules and attributes after a product. Words starting with Capital letters
# are terminals. Literal tokens are in "double quotes". Others are
# non-terminals. Id is either TokenId or ConstructorId.
#
# module ::= "module" Id "{" [definitions] "}"
# definitions ::= { TypeId "=" type }
# type ::= product | sum
# product ::= fields ["attributes" fields]
# fields ::= "(" { field, "," } field ")"
# field ::= TypeId ["?" | "*"] [Id]
# sum ::= constructor { "|" constructor } ["attributes" fields]
# constructor ::= ConstructorId [fields]
#
# [1] "The Zephyr Abstract Syntax Description Language" by Wang, et. al. See
# http://asdl.sourceforge.net/
#-------------------------------------------------------------------------------
from collections import namedtuple
import re
__all__ = [
'builtin_types', 'parse', 'AST', 'Module', 'Type', 'Constructor',
'Field', 'Sum', 'Product', 'VisitorBase', 'Check', 'check']
# The following classes define nodes into which the ASDL description is parsed.
# Note: this is a "meta-AST". ASDL files (such as Python.asdl) describe the AST
# structure used by a programming language. But ASDL files themselves need to be
# parsed. This module parses ASDL files and uses a simple AST to represent them.
# See the EBNF at the top of the file to understand the logical connection
# between the various node types.
builtin_types = {'identifier', 'string', 'bytes', 'int', 'object', 'singleton',
'constant'}
class AST:
def __repr__(self):
raise NotImplementedError
class Module(AST):
def __init__(self, name, dfns):
self.name = name
self.dfns = dfns
self.types = {type.name: type.value for type in dfns}
def __repr__(self):
return 'Module({0.name}, {0.dfns})'.format(self)
class Type(AST):
def __init__(self, name, value):
self.name = name
self.value = value
def __repr__(self):
return 'Type({0.name}, {0.value})'.format(self)
class Constructor(AST):
def __init__(self, name, fields=None):
self.name = name
self.fields = fields or []
def __repr__(self):
return 'Constructor({0.name}, {0.fields})'.format(self)
class Field(AST):
def __init__(self, type, name=None, seq=False, opt=False):
self.type = type
self.name = name
self.seq = seq
self.opt = opt
def __repr__(self):
if self.seq:
extra = ", seq=True"
elif self.opt:
extra = ", opt=True"
else:
extra = ""
if self.name is None:
return 'Field({0.type}{1})'.format(self, extra)
else:
return 'Field({0.type}, {0.name}{1})'.format(self, extra)
class Sum(AST):
def __init__(self, types, attributes=None):
self.types = types
self.attributes = attributes or []
def __repr__(self):
if self.attributes:
return 'Sum({0.types}, {0.attributes})'.format(self)
else:
return 'Sum({0.types})'.format(self)
class Product(AST):
def __init__(self, fields, attributes=None):
self.fields = fields
self.attributes = attributes or []
def __repr__(self):
if self.attributes:
return 'Product({0.fields}, {0.attributes})'.format(self)
else:
return 'Product({0.fields})'.format(self)
# A generic visitor for the meta-AST that describes ASDL. This can be used by
# emitters. Note that this visitor does not provide a generic visit method, so a
# subclass needs to define visit methods from visitModule to as deep as the
# interesting node.
# We also define a Check visitor that makes sure the parsed ASDL is well-formed.
class VisitorBase(object):
"""Generic tree visitor for ASTs."""
def __init__(self):
self.cache = {}
def visit(self, obj, *args):
klass = obj.__class__
meth = self.cache.get(klass)
if meth is None:
methname = "visit" + klass.__name__
meth = getattr(self, methname, None)
self.cache[klass] = meth
if meth:
try:
meth(obj, *args)
except Exception as e:
print("Error visiting %r: %s" % (obj, e))
raise
class Check(VisitorBase):
"""A visitor that checks a parsed ASDL tree for correctness.
Errors are printed and accumulated.
"""
def __init__(self):
super(Check, self).__init__()
self.cons = {}
self.errors = 0
self.types = {}
def visitModule(self, mod):
for dfn in mod.dfns:
self.visit(dfn)
def visitType(self, type):
self.visit(type.value, str(type.name))
def visitSum(self, sum, name):
for t in sum.types:
self.visit(t, name)
def visitConstructor(self, cons, name):
key = str(cons.name)
conflict = self.cons.get(key)
if conflict is None:
self.cons[key] = name
else:
print('Redefinition of constructor {}'.format(key))
print('Defined in {} and {}'.format(conflict, name))
self.errors += 1
for f in cons.fields:
self.visit(f, key)
def visitField(self, field, name):
key = str(field.type)
l = self.types.setdefault(key, [])
l.append(name)
def visitProduct(self, prod, name):
for f in prod.fields:
self.visit(f, name)
def check(mod):
"""Check the parsed ASDL tree for correctness.
Return True if success. For failure, the errors are printed out and False
is returned.
"""
v = Check()
v.visit(mod)
for t in v.types:
if t not in mod.types and not t in builtin_types:
v.errors += 1
uses = ", ".join(v.types[t])
print('Undefined type {}, used in {}'.format(t, uses))
return not v.errors
# The ASDL parser itself comes next. The only interesting external interface
# here is the top-level parse function.
def parse(filename):
"""Parse ASDL from the given file and return a Module node describing it."""
with open(filename) as f:
parser = ASDLParser()
return parser.parse(f.read())
# Types for describing tokens in an ASDL specification.
class TokenKind:
"""TokenKind is provides a scope for enumerated token kinds."""
(ConstructorId, TypeId, Equals, Comma, Question, Pipe, Asterisk,
LParen, RParen, LBrace, RBrace) = range(11)
operator_table = {
'=': Equals, ',': Comma, '?': Question, '|': Pipe, '(': LParen,
')': RParen, '*': Asterisk, '{': LBrace, '}': RBrace}
Token = namedtuple('Token', 'kind value lineno')
class ASDLSyntaxError(Exception):
def __init__(self, msg, lineno=None):
self.msg = msg
self.lineno = lineno or '<unknown>'
def __str__(self):
return 'Syntax error on line {0.lineno}: {0.msg}'.format(self)
def tokenize_asdl(buf):
"""Tokenize the given buffer. Yield Token objects."""
for lineno, line in enumerate(buf.splitlines(), 1):
for m in re.finditer(r'\s*(\w+|--.*|.)', line.strip()):
c = m.group(1)
if c[0].isalpha():
# Some kind of identifier
if c[0].isupper():
yield Token(TokenKind.ConstructorId, c, lineno)
else:
yield Token(TokenKind.TypeId, c, lineno)
elif c[:2] == '--':
# Comment
break
else:
# Operators
try:
op_kind = TokenKind.operator_table[c]
except KeyError:
raise ASDLSyntaxError('Invalid operator %s' % c, lineno)
yield Token(op_kind, c, lineno)
class ASDLParser:
"""Parser for ASDL files.
Create, then call the parse method on a buffer containing ASDL.
This is a simple recursive descent parser that uses tokenize_asdl for the
lexing.
"""
def __init__(self):
self._tokenizer = None
self.cur_token = None
def parse(self, buf):
"""Parse the ASDL in the buffer and return an AST with a Module root.
"""
self._tokenizer = tokenize_asdl(buf)
self._advance()
return self._parse_module()
def _parse_module(self):
if self._at_keyword('module'):
self._advance()
else:
raise ASDLSyntaxError(
'Expected "module" (found {})'.format(self.cur_token.value),
self.cur_token.lineno)
name = self._match(self._id_kinds)
self._match(TokenKind.LBrace)
defs = self._parse_definitions()
self._match(TokenKind.RBrace)
return Module(name, defs)
def _parse_definitions(self):
defs = []
while self.cur_token.kind == TokenKind.TypeId:
typename = self._advance()
self._match(TokenKind.Equals)
type = self._parse_type()
defs.append(Type(typename, type))
return defs
def _parse_type(self):
if self.cur_token.kind == TokenKind.LParen:
# If we see a (, it's a product
return self._parse_product()
else:
# Otherwise it's a sum. Look for ConstructorId
sumlist = [Constructor(self._match(TokenKind.ConstructorId),
self._parse_optional_fields())]
while self.cur_token.kind == TokenKind.Pipe:
# More constructors
self._advance()
sumlist.append(Constructor(
self._match(TokenKind.ConstructorId),
self._parse_optional_fields()))
return Sum(sumlist, self._parse_optional_attributes())
def _parse_product(self):
return Product(self._parse_fields(), self._parse_optional_attributes())
def _parse_fields(self):
fields = []
self._match(TokenKind.LParen)
while self.cur_token.kind == TokenKind.TypeId:
typename = self._advance()
is_seq, is_opt = self._parse_optional_field_quantifier()
id = (self._advance() if self.cur_token.kind in self._id_kinds
else None)
fields.append(Field(typename, id, seq=is_seq, opt=is_opt))
if self.cur_token.kind == TokenKind.RParen:
break
elif self.cur_token.kind == TokenKind.Comma:
self._advance()
self._match(TokenKind.RParen)
return fields
def _parse_optional_fields(self):
if self.cur_token.kind == TokenKind.LParen:
return self._parse_fields()
else:
return None
def _parse_optional_attributes(self):
if self._at_keyword('attributes'):
self._advance()
return self._parse_fields()
else:
return None
def _parse_optional_field_quantifier(self):
is_seq, is_opt = False, False
if self.cur_token.kind == TokenKind.Asterisk:
is_seq = True
self._advance()
elif self.cur_token.kind == TokenKind.Question:
is_opt = True
self._advance()
return is_seq, is_opt
def _advance(self):
""" Return the value of the current token and read the next one into
self.cur_token.
"""
cur_val = None if self.cur_token is None else self.cur_token.value
try:
self.cur_token = next(self._tokenizer)
except StopIteration:
self.cur_token = None
return cur_val
_id_kinds = (TokenKind.ConstructorId, TokenKind.TypeId)
def _match(self, kind):
"""The 'match' primitive of RD parsers.
* Verifies that the current token is of the given kind (kind can
be a tuple, in which the kind must match one of its members).
* Returns the value of the current token
* Reads in the next token
"""
if (isinstance(kind, tuple) and self.cur_token.kind in kind or
self.cur_token.kind == kind
):
value = self.cur_token.value
self._advance()
return value
else:
raise ASDLSyntaxError(
'Unmatched {} (found {})'.format(kind, self.cur_token.kind),
self.cur_token.lineno)
def _at_keyword(self, keyword):
return (self.cur_token.kind == TokenKind.TypeId and
self.cur_token.value == keyword)

1339
third_party/python/Parser/asdl_c.py vendored Normal file

File diff suppressed because it is too large Load diff

66
third_party/python/Parser/bitset.c vendored Normal file
View file

@ -0,0 +1,66 @@
/* Bitset primitives used by the parser generator */
#include "pgenheaders.h"
#include "bitset.h"
bitset
newbitset(int nbits)
{
int nbytes = NBYTES(nbits);
bitset ss = (char *)PyObject_MALLOC(sizeof(BYTE) * nbytes);
if (ss == NULL)
Py_FatalError("no mem for bitset");
ss += nbytes;
while (--nbytes >= 0)
*--ss = 0;
return ss;
}
void
delbitset(bitset ss)
{
PyObject_FREE(ss);
}
int
addbit(bitset ss, int ibit)
{
int ibyte = BIT2BYTE(ibit);
BYTE mask = BIT2MASK(ibit);
if (ss[ibyte] & mask)
return 0; /* Bit already set */
ss[ibyte] |= mask;
return 1;
}
#if 0 /* Now a macro */
int
testbit(bitset ss, int ibit)
{
return (ss[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0;
}
#endif
int
samebitset(bitset ss1, bitset ss2, int nbits)
{
int i;
for (i = NBYTES(nbits); --i >= 0; )
if (*ss1++ != *ss2++)
return 0;
return 1;
}
void
mergebitset(bitset ss1, bitset ss2, int nbits)
{
int i;
for (i = NBYTES(nbits); --i >= 0; )
*ss1++ |= *ss2++;
}

113
third_party/python/Parser/firstsets.c vendored Normal file
View file

@ -0,0 +1,113 @@
/* Computation of FIRST stets */
#include "pgenheaders.h"
#include "grammar.h"
#include "token.h"
extern int Py_DebugFlag;
/* Forward */
static void calcfirstset(grammar *, dfa *);
void
addfirstsets(grammar *g)
{
int i;
dfa *d;
if (Py_DebugFlag)
printf("Adding FIRST sets ...\n");
for (i = 0; i < g->g_ndfas; i++) {
d = &g->g_dfa[i];
if (d->d_first == NULL)
calcfirstset(g, d);
}
}
static void
calcfirstset(grammar *g, dfa *d)
{
int i, j;
state *s;
arc *a;
int nsyms;
int *sym;
int nbits;
static bitset dummy;
bitset result;
int type;
dfa *d1;
label *l0;
if (Py_DebugFlag)
printf("Calculate FIRST set for '%s'\n", d->d_name);
if (dummy == NULL)
dummy = newbitset(1);
if (d->d_first == dummy) {
fprintf(stderr, "Left-recursion for '%s'\n", d->d_name);
return;
}
if (d->d_first != NULL) {
fprintf(stderr, "Re-calculating FIRST set for '%s' ???\n",
d->d_name);
}
d->d_first = dummy;
l0 = g->g_ll.ll_label;
nbits = g->g_ll.ll_nlabels;
result = newbitset(nbits);
sym = (int *)PyObject_MALLOC(sizeof(int));
if (sym == NULL)
Py_FatalError("no mem for new sym in calcfirstset");
nsyms = 1;
sym[0] = findlabel(&g->g_ll, d->d_type, (char *)NULL);
s = &d->d_state[d->d_initial];
for (i = 0; i < s->s_narcs; i++) {
a = &s->s_arc[i];
for (j = 0; j < nsyms; j++) {
if (sym[j] == a->a_lbl)
break;
}
if (j >= nsyms) { /* New label */
sym = (int *)PyObject_REALLOC(sym,
sizeof(int) * (nsyms + 1));
if (sym == NULL)
Py_FatalError(
"no mem to resize sym in calcfirstset");
sym[nsyms++] = a->a_lbl;
type = l0[a->a_lbl].lb_type;
if (ISNONTERMINAL(type)) {
d1 = PyGrammar_FindDFA(g, type);
if (d1->d_first == dummy) {
fprintf(stderr,
"Left-recursion below '%s'\n",
d->d_name);
}
else {
if (d1->d_first == NULL)
calcfirstset(g, d1);
mergebitset(result,
d1->d_first, nbits);
}
}
else if (ISTERMINAL(type)) {
addbit(result, a->a_lbl);
}
}
}
d->d_first = result;
if (Py_DebugFlag) {
printf("FIRST set for '%s': {", d->d_name);
for (i = 0; i < nbits; i++) {
if (testbit(result, i))
printf(" %s", PyGrammar_LabelRepr(&l0[i]));
}
printf(" }\n");
}
PyObject_FREE(sym);
}

273
third_party/python/Parser/grammar.c vendored Normal file
View file

@ -0,0 +1,273 @@
/* Grammar implementation */
#include "Python.h"
#include "pgenheaders.h"
#include <ctype.h>
#include "token.h"
#include "grammar.h"
extern int Py_DebugFlag;
grammar *
newgrammar(int start)
{
grammar *g;
g = (grammar *)PyObject_MALLOC(sizeof(grammar));
if (g == NULL)
Py_FatalError("no mem for new grammar");
g->g_ndfas = 0;
g->g_dfa = NULL;
g->g_start = start;
g->g_ll.ll_nlabels = 0;
g->g_ll.ll_label = NULL;
g->g_accel = 0;
return g;
}
void
freegrammar(grammar *g)
{
int i;
for (i = 0; i < g->g_ndfas; i++) {
free(g->g_dfa[i].d_name);
for (int j = 0; j < g->g_dfa[i].d_nstates; j++)
PyObject_FREE(g->g_dfa[i].d_state[j].s_arc);
PyObject_FREE(g->g_dfa[i].d_state);
}
PyObject_FREE(g->g_dfa);
for (i = 0; i < g->g_ll.ll_nlabels; i++)
free(g->g_ll.ll_label[i].lb_str);
PyObject_FREE(g->g_ll.ll_label);
PyObject_FREE(g);
}
dfa *
adddfa(grammar *g, int type, const char *name)
{
dfa *d;
g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa,
sizeof(dfa) * (g->g_ndfas + 1));
if (g->g_dfa == NULL)
Py_FatalError("no mem to resize dfa in adddfa");
d = &g->g_dfa[g->g_ndfas++];
d->d_type = type;
d->d_name = strdup(name);
d->d_nstates = 0;
d->d_state = NULL;
d->d_initial = -1;
d->d_first = NULL;
return d; /* Only use while fresh! */
}
int
addstate(dfa *d)
{
state *s;
d->d_state = (state *)PyObject_REALLOC(d->d_state,
sizeof(state) * (d->d_nstates + 1));
if (d->d_state == NULL)
Py_FatalError("no mem to resize state in addstate");
s = &d->d_state[d->d_nstates++];
s->s_narcs = 0;
s->s_arc = NULL;
s->s_lower = 0;
s->s_upper = 0;
s->s_accel = NULL;
s->s_accept = 0;
return Py_SAFE_DOWNCAST(s - d->d_state, intptr_t, int);
}
void
addarc(dfa *d, int from, int to, int lbl)
{
state *s;
arc *a;
assert(0 <= from && from < d->d_nstates);
assert(0 <= to && to < d->d_nstates);
s = &d->d_state[from];
s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1));
if (s->s_arc == NULL)
Py_FatalError("no mem to resize arc list in addarc");
a = &s->s_arc[s->s_narcs++];
a->a_lbl = lbl;
a->a_arrow = to;
}
int
addlabel(labellist *ll, int type, const char *str)
{
int i;
label *lb;
for (i = 0; i < ll->ll_nlabels; i++) {
if (ll->ll_label[i].lb_type == type &&
strcmp(ll->ll_label[i].lb_str, str) == 0)
return i;
}
ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label,
sizeof(label) * (ll->ll_nlabels + 1));
if (ll->ll_label == NULL)
Py_FatalError("no mem to resize labellist in addlabel");
lb = &ll->ll_label[ll->ll_nlabels++];
lb->lb_type = type;
lb->lb_str = strdup(str);
if (Py_DebugFlag)
printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels,
PyGrammar_LabelRepr(lb));
return Py_SAFE_DOWNCAST(lb - ll->ll_label, intptr_t, int);
}
/* Same, but rather dies than adds */
int
findlabel(labellist *ll, int type, const char *str)
{
int i;
for (i = 0; i < ll->ll_nlabels; i++) {
if (ll->ll_label[i].lb_type == type /*&&
strcmp(ll->ll_label[i].lb_str, str) == 0*/)
return i;
}
fprintf(stderr, "Label %d/'%s' not found\n", type, str);
Py_FatalError("grammar.c:findlabel()");
/* Py_FatalError() is declared with __attribute__((__noreturn__)).
GCC emits a warning without "return 0;" (compiler bug!), but Clang is
smarter and emits a warning on the return... */
#ifndef __clang__
return 0; /* Make gcc -Wall happy */
#endif
}
/* Forward */
static void translabel(grammar *, label *);
void
translatelabels(grammar *g)
{
int i;
#ifdef Py_DEBUG
printf("Translating labels ...\n");
#endif
/* Don't translate EMPTY */
for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++)
translabel(g, &g->g_ll.ll_label[i]);
}
static void
translabel(grammar *g, label *lb)
{
int i;
if (Py_DebugFlag)
printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb));
if (lb->lb_type == NAME) {
for (i = 0; i < g->g_ndfas; i++) {
if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) {
if (Py_DebugFlag)
printf(
"Label %s is non-terminal %d.\n",
lb->lb_str,
g->g_dfa[i].d_type);
lb->lb_type = g->g_dfa[i].d_type;
free(lb->lb_str);
lb->lb_str = NULL;
return;
}
}
for (i = 0; i < (int)N_TOKENS; i++) {
if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) {
if (Py_DebugFlag)
printf("Label %s is terminal %d.\n",
lb->lb_str, i);
lb->lb_type = i;
free(lb->lb_str);
lb->lb_str = NULL;
return;
}
}
printf("Can't translate NAME label '%s'\n", lb->lb_str);
return;
}
if (lb->lb_type == STRING) {
if (isalpha(Py_CHARMASK(lb->lb_str[1])) ||
lb->lb_str[1] == '_') {
char *p;
char *src;
char *dest;
size_t name_len;
if (Py_DebugFlag)
printf("Label %s is a keyword\n", lb->lb_str);
lb->lb_type = NAME;
src = lb->lb_str + 1;
p = strchr(src, '\'');
if (p)
name_len = p - src;
else
name_len = strlen(src);
dest = (char *)malloc(name_len + 1);
if (!dest) {
printf("Can't alloc dest '%s'\n", src);
return;
}
strncpy(dest, src, name_len);
dest[name_len] = '\0';
free(lb->lb_str);
lb->lb_str = dest;
}
else if (lb->lb_str[2] == lb->lb_str[0]) {
int type = (int) PyToken_OneChar(lb->lb_str[1]);
if (type != OP) {
lb->lb_type = type;
free(lb->lb_str);
lb->lb_str = NULL;
}
else
printf("Unknown OP label %s\n",
lb->lb_str);
}
else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) {
int type = (int) PyToken_TwoChars(lb->lb_str[1],
lb->lb_str[2]);
if (type != OP) {
lb->lb_type = type;
free(lb->lb_str);
lb->lb_str = NULL;
}
else
printf("Unknown OP label %s\n",
lb->lb_str);
}
else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) {
int type = (int) PyToken_ThreeChars(lb->lb_str[1],
lb->lb_str[2],
lb->lb_str[3]);
if (type != OP) {
lb->lb_type = type;
free(lb->lb_str);
lb->lb_str = NULL;
}
else
printf("Unknown OP label %s\n",
lb->lb_str);
}
else
printf("Can't translate STRING label %s\n",
lb->lb_str);
}
else
printf("Can't translate label '%s'\n",
PyGrammar_LabelRepr(lb));
}

61
third_party/python/Parser/grammar1.c vendored Normal file
View file

@ -0,0 +1,61 @@
/* Grammar subroutines needed by parser */
#include "Python.h"
#include "pgenheaders.h"
#include "grammar.h"
#include "token.h"
/* Return the DFA for the given type */
dfa *
PyGrammar_FindDFA(grammar *g, int type)
{
dfa *d;
#if 1
/* Massive speed-up */
d = &g->g_dfa[type - NT_OFFSET];
assert(d->d_type == type);
return d;
#else
/* Old, slow version */
int i;
for (i = g->g_ndfas, d = g->g_dfa; --i >= 0; d++) {
if (d->d_type == type)
return d;
}
assert(0);
/* NOTREACHED */
#endif
}
const char *
PyGrammar_LabelRepr(label *lb)
{
static char buf[100];
if (lb->lb_type == ENDMARKER)
return "EMPTY";
else if (ISNONTERMINAL(lb->lb_type)) {
if (lb->lb_str == NULL) {
PyOS_snprintf(buf, sizeof(buf), "NT%d", lb->lb_type);
return buf;
}
else
return lb->lb_str;
}
else if (lb->lb_type < N_TOKENS) {
if (lb->lb_str == NULL)
return _PyParser_TokenNames[lb->lb_type];
else {
PyOS_snprintf(buf, sizeof(buf), "%.32s(%.32s)",
_PyParser_TokenNames[lb->lb_type], lb->lb_str);
return buf;
}
}
else {
Py_FatalError("invalid label");
return NULL;
}
}

66
third_party/python/Parser/listnode.c vendored Normal file
View file

@ -0,0 +1,66 @@
/* List a node on a file */
#include "pgenheaders.h"
#include "token.h"
#include "node.h"
/* Forward */
static void list1node(FILE *, node *);
static void listnode(FILE *, node *);
void
PyNode_ListTree(node *n)
{
listnode(stdout, n);
}
static int level, atbol;
static void
listnode(FILE *fp, node *n)
{
level = 0;
atbol = 1;
list1node(fp, n);
}
static void
list1node(FILE *fp, node *n)
{
if (n == 0)
return;
if (ISNONTERMINAL(TYPE(n))) {
int i;
for (i = 0; i < NCH(n); i++)
list1node(fp, CHILD(n, i));
}
else if (ISTERMINAL(TYPE(n))) {
switch (TYPE(n)) {
case INDENT:
++level;
break;
case DEDENT:
--level;
break;
default:
if (atbol) {
int i;
for (i = 0; i < level; ++i)
fprintf(fp, "\t");
atbol = 0;
}
if (TYPE(n) == NEWLINE) {
if (STR(n) != NULL)
fprintf(fp, "%s", STR(n));
fprintf(fp, "\n");
atbol = 1;
}
else
fprintf(fp, "%s ", STR(n));
break;
}
}
else
fprintf(fp, "? ");
}

159
third_party/python/Parser/metagrammar.c vendored Normal file
View file

@ -0,0 +1,159 @@
#include "pgenheaders.h"
#include "metagrammar.h"
#include "grammar.h"
#include "pgen.h"
static arc arcs_0_0[3] = {
{2, 0},
{3, 0},
{4, 1},
};
static arc arcs_0_1[1] = {
{0, 1},
};
static state states_0[2] = {
{3, arcs_0_0},
{1, arcs_0_1},
};
static arc arcs_1_0[1] = {
{5, 1},
};
static arc arcs_1_1[1] = {
{6, 2},
};
static arc arcs_1_2[1] = {
{7, 3},
};
static arc arcs_1_3[1] = {
{3, 4},
};
static arc arcs_1_4[1] = {
{0, 4},
};
static state states_1[5] = {
{1, arcs_1_0},
{1, arcs_1_1},
{1, arcs_1_2},
{1, arcs_1_3},
{1, arcs_1_4},
};
static arc arcs_2_0[1] = {
{8, 1},
};
static arc arcs_2_1[2] = {
{9, 0},
{0, 1},
};
static state states_2[2] = {
{1, arcs_2_0},
{2, arcs_2_1},
};
static arc arcs_3_0[1] = {
{10, 1},
};
static arc arcs_3_1[2] = {
{10, 1},
{0, 1},
};
static state states_3[2] = {
{1, arcs_3_0},
{2, arcs_3_1},
};
static arc arcs_4_0[2] = {
{11, 1},
{13, 2},
};
static arc arcs_4_1[1] = {
{7, 3},
};
static arc arcs_4_2[3] = {
{14, 4},
{15, 4},
{0, 2},
};
static arc arcs_4_3[1] = {
{12, 4},
};
static arc arcs_4_4[1] = {
{0, 4},
};
static state states_4[5] = {
{2, arcs_4_0},
{1, arcs_4_1},
{3, arcs_4_2},
{1, arcs_4_3},
{1, arcs_4_4},
};
static arc arcs_5_0[3] = {
{5, 1},
{16, 1},
{17, 2},
};
static arc arcs_5_1[1] = {
{0, 1},
};
static arc arcs_5_2[1] = {
{7, 3},
};
static arc arcs_5_3[1] = {
{18, 1},
};
static state states_5[4] = {
{3, arcs_5_0},
{1, arcs_5_1},
{1, arcs_5_2},
{1, arcs_5_3},
};
static dfa dfas[6] = {
{256, "MSTART", 0, 2, states_0,
"\070\000\000"},
{257, "RULE", 0, 5, states_1,
"\040\000\000"},
{258, "RHS", 0, 2, states_2,
"\040\010\003"},
{259, "ALT", 0, 2, states_3,
"\040\010\003"},
{260, "ITEM", 0, 5, states_4,
"\040\010\003"},
{261, "ATOM", 0, 4, states_5,
"\040\000\003"},
};
static label labels[19] = {
{0, "EMPTY"},
{256, 0},
{257, 0},
{4, 0},
{0, 0},
{1, 0},
{11, 0},
{258, 0},
{259, 0},
{18, 0},
{260, 0},
{9, 0},
{10, 0},
{261, 0},
{16, 0},
{14, 0},
{3, 0},
{7, 0},
{8, 0},
};
static grammar _PyParser_Grammar = {
6,
dfas,
{19, labels},
256
};
grammar *
meta_grammar(void)
{
return &_PyParser_Grammar;
}
grammar *
Py_meta_grammar(void)
{
return meta_grammar();
}

399
third_party/python/Parser/myreadline.c vendored Normal file
View file

@ -0,0 +1,399 @@
/* Readline interface for tokenizer.c and [raw_]input() in bltinmodule.c.
By default, or when stdin is not a tty device, we have a super
simple my_readline function using fgets.
Optionally, we can use the GNU readline library.
my_readline() has a different return value from GNU readline():
- NULL if an interrupt occurred or if an error occurred
- a malloc'ed empty string if EOF was read
- a malloc'ed string ending in \n normally
*/
#include "Python.h"
#ifdef MS_WINDOWS
#define WIN32_LEAN_AND_MEAN
#include "windows.h"
#endif /* MS_WINDOWS */
PyThreadState* _PyOS_ReadlineTState;
#ifdef WITH_THREAD
#include "pythread.h"
static PyThread_type_lock _PyOS_ReadlineLock = NULL;
#endif
int (*PyOS_InputHook)(void) = NULL;
/* This function restarts a fgets() after an EINTR error occurred
except if PyOS_InterruptOccurred() returns true. */
static int
my_fgets(char *buf, int len, FILE *fp)
{
#ifdef MS_WINDOWS
HANDLE hInterruptEvent;
#endif
char *p;
int err;
while (1) {
if (PyOS_InputHook != NULL)
(void)(PyOS_InputHook)();
errno = 0;
clearerr(fp);
p = fgets(buf, len, fp);
if (p != NULL)
return 0; /* No error */
err = errno;
#ifdef MS_WINDOWS
/* Ctrl-C anywhere on the line or Ctrl-Z if the only character
on a line will set ERROR_OPERATION_ABORTED. Under normal
circumstances Ctrl-C will also have caused the SIGINT handler
to fire which will have set the event object returned by
_PyOS_SigintEvent. This signal fires in another thread and
is not guaranteed to have occurred before this point in the
code.
Therefore: check whether the event is set with a small timeout.
If it is, assume this is a Ctrl-C and reset the event. If it
isn't set assume that this is a Ctrl-Z on its own and drop
through to check for EOF.
*/
if (GetLastError()==ERROR_OPERATION_ABORTED) {
hInterruptEvent = _PyOS_SigintEvent();
switch (WaitForSingleObjectEx(hInterruptEvent, 10, FALSE)) {
case WAIT_OBJECT_0:
ResetEvent(hInterruptEvent);
return 1; /* Interrupt */
case WAIT_FAILED:
return -2; /* Error */
}
}
#endif /* MS_WINDOWS */
if (feof(fp)) {
clearerr(fp);
return -1; /* EOF */
}
#ifdef EINTR
if (err == EINTR) {
int s;
#ifdef WITH_THREAD
PyEval_RestoreThread(_PyOS_ReadlineTState);
#endif
s = PyErr_CheckSignals();
#ifdef WITH_THREAD
PyEval_SaveThread();
#endif
if (s < 0)
return 1;
/* try again */
continue;
}
#endif
if (PyOS_InterruptOccurred()) {
return 1; /* Interrupt */
}
return -2; /* Error */
}
/* NOTREACHED */
}
#ifdef MS_WINDOWS
/* Readline implementation using ReadConsoleW */
extern char _get_console_type(HANDLE handle);
char *
_PyOS_WindowsConsoleReadline(HANDLE hStdIn)
{
static wchar_t wbuf_local[1024 * 16];
const DWORD chunk_size = 1024;
DWORD n_read, total_read, wbuflen, u8len;
wchar_t *wbuf;
char *buf = NULL;
int err = 0;
n_read = (DWORD)-1;
total_read = 0;
wbuf = wbuf_local;
wbuflen = sizeof(wbuf_local) / sizeof(wbuf_local[0]) - 1;
while (1) {
if (PyOS_InputHook != NULL) {
(void)(PyOS_InputHook)();
}
if (!ReadConsoleW(hStdIn, &wbuf[total_read], wbuflen - total_read, &n_read, NULL)) {
err = GetLastError();
goto exit;
}
if (n_read == (DWORD)-1 && (err = GetLastError()) == ERROR_OPERATION_ABORTED) {
break;
}
if (n_read == 0) {
int s;
err = GetLastError();
if (err != ERROR_OPERATION_ABORTED)
goto exit;
err = 0;
HANDLE hInterruptEvent = _PyOS_SigintEvent();
if (WaitForSingleObjectEx(hInterruptEvent, 100, FALSE)
== WAIT_OBJECT_0) {
ResetEvent(hInterruptEvent);
#ifdef WITH_THREAD
PyEval_RestoreThread(_PyOS_ReadlineTState);
#endif
s = PyErr_CheckSignals();
#ifdef WITH_THREAD
PyEval_SaveThread();
#endif
if (s < 0)
goto exit;
}
break;
}
total_read += n_read;
if (total_read == 0 || wbuf[total_read - 1] == L'\n') {
break;
}
wbuflen += chunk_size;
if (wbuf == wbuf_local) {
wbuf[total_read] = '\0';
wbuf = (wchar_t*)PyMem_RawMalloc(wbuflen * sizeof(wchar_t));
if (wbuf)
wcscpy_s(wbuf, wbuflen, wbuf_local);
else {
PyErr_NoMemory();
goto exit;
}
}
else {
wchar_t *tmp = PyMem_RawRealloc(wbuf, wbuflen * sizeof(wchar_t));
if (tmp == NULL) {
PyErr_NoMemory();
goto exit;
}
wbuf = tmp;
}
}
if (wbuf[0] == '\x1a') {
buf = PyMem_RawMalloc(1);
if (buf)
buf[0] = '\0';
else {
PyErr_NoMemory();
}
goto exit;
}
u8len = WideCharToMultiByte(CP_UTF8, 0, wbuf, total_read, NULL, 0, NULL, NULL);
buf = PyMem_RawMalloc(u8len + 1);
if (buf == NULL) {
PyErr_NoMemory();
goto exit;
}
u8len = WideCharToMultiByte(CP_UTF8, 0, wbuf, total_read, buf, u8len, NULL, NULL);
buf[u8len] = '\0';
exit:
if (wbuf != wbuf_local)
PyMem_RawFree(wbuf);
if (err) {
#ifdef WITH_THREAD
PyEval_RestoreThread(_PyOS_ReadlineTState);
#endif
PyErr_SetFromWindowsErr(err);
#ifdef WITH_THREAD
PyEval_SaveThread();
#endif
}
return buf;
}
#endif
/* Readline implementation using fgets() */
char *
PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
{
size_t n;
char *p, *pr;
#ifdef MS_WINDOWS
if (!Py_LegacyWindowsStdioFlag && sys_stdin == stdin) {
HANDLE hStdIn, hStdErr;
_Py_BEGIN_SUPPRESS_IPH
hStdIn = (HANDLE)_get_osfhandle(fileno(sys_stdin));
hStdErr = (HANDLE)_get_osfhandle(fileno(stderr));
_Py_END_SUPPRESS_IPH
if (_get_console_type(hStdIn) == 'r') {
fflush(sys_stdout);
if (prompt) {
if (_get_console_type(hStdErr) == 'w') {
wchar_t *wbuf;
int wlen;
wlen = MultiByteToWideChar(CP_UTF8, 0, prompt, -1,
NULL, 0);
if (wlen) {
wbuf = PyMem_RawMalloc(wlen * sizeof(wchar_t));
if (wbuf == NULL) {
PyErr_NoMemory();
return NULL;
}
wlen = MultiByteToWideChar(CP_UTF8, 0, prompt, -1,
wbuf, wlen);
if (wlen) {
DWORD n;
fflush(stderr);
/* wlen includes null terminator, so subtract 1 */
WriteConsoleW(hStdErr, wbuf, wlen - 1, &n, NULL);
}
PyMem_RawFree(wbuf);
}
} else {
fprintf(stderr, "%s", prompt);
fflush(stderr);
}
}
clearerr(sys_stdin);
return _PyOS_WindowsConsoleReadline(hStdIn);
}
}
#endif
n = 100;
p = (char *)PyMem_RawMalloc(n);
if (p == NULL) {
PyErr_NoMemory();
return NULL;
}
fflush(sys_stdout);
if (prompt)
fprintf(stderr, "%s", prompt);
fflush(stderr);
switch (my_fgets(p, (int)n, sys_stdin)) {
case 0: /* Normal case */
break;
case 1: /* Interrupt */
PyMem_RawFree(p);
return NULL;
case -1: /* EOF */
case -2: /* Error */
default: /* Shouldn't happen */
*p = '\0';
break;
}
n = strlen(p);
while (n > 0 && p[n-1] != '\n') {
size_t incr = n+2;
if (incr > INT_MAX) {
PyMem_RawFree(p);
PyErr_SetString(PyExc_OverflowError, "input line too long");
return NULL;
}
pr = (char *)PyMem_RawRealloc(p, n + incr);
if (pr == NULL) {
PyMem_RawFree(p);
PyErr_NoMemory();
return NULL;
}
p = pr;
if (my_fgets(p+n, (int)incr, sys_stdin) != 0)
break;
n += strlen(p+n);
}
pr = (char *)PyMem_RawRealloc(p, n+1);
if (pr == NULL) {
PyMem_RawFree(p);
PyErr_NoMemory();
return NULL;
}
return pr;
}
/* By initializing this function pointer, systems embedding Python can
override the readline function.
Note: Python expects in return a buffer allocated with PyMem_Malloc. */
char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *);
/* Interface used by tokenizer.c and bltinmodule.c */
char *
PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
{
char *rv, *res;
size_t len;
if (_PyOS_ReadlineTState == PyThreadState_GET()) {
PyErr_SetString(PyExc_RuntimeError,
"can't re-enter readline");
return NULL;
}
if (PyOS_ReadlineFunctionPointer == NULL) {
PyOS_ReadlineFunctionPointer = PyOS_StdioReadline;
}
#ifdef WITH_THREAD
if (_PyOS_ReadlineLock == NULL) {
_PyOS_ReadlineLock = PyThread_allocate_lock();
if (_PyOS_ReadlineLock == NULL) {
PyErr_SetString(PyExc_MemoryError, "can't allocate lock");
return NULL;
}
}
#endif
_PyOS_ReadlineTState = PyThreadState_GET();
Py_BEGIN_ALLOW_THREADS
#ifdef WITH_THREAD
PyThread_acquire_lock(_PyOS_ReadlineLock, 1);
#endif
/* This is needed to handle the unlikely case that the
* interpreter is in interactive mode *and* stdin/out are not
* a tty. This can happen, for example if python is run like
* this: python -i < test1.py
*/
if (!isatty (fileno (sys_stdin)) || !isatty (fileno (sys_stdout)))
rv = PyOS_StdioReadline (sys_stdin, sys_stdout, prompt);
else
rv = (*PyOS_ReadlineFunctionPointer)(sys_stdin, sys_stdout,
prompt);
Py_END_ALLOW_THREADS
#ifdef WITH_THREAD
PyThread_release_lock(_PyOS_ReadlineLock);
#endif
_PyOS_ReadlineTState = NULL;
if (rv == NULL)
return NULL;
len = strlen(rv) + 1;
res = PyMem_Malloc(len);
if (res != NULL) {
memcpy(res, rv, len);
}
else {
PyErr_NoMemory();
}
PyMem_RawFree(rv);
return res;
}

164
third_party/python/Parser/node.c vendored Normal file
View file

@ -0,0 +1,164 @@
/* Parse tree node implementation */
#include "Python.h"
#include "node.h"
#include "errcode.h"
node *
PyNode_New(int type)
{
node *n = (node *) PyObject_MALLOC(1 * sizeof(node));
if (n == NULL)
return NULL;
n->n_type = type;
n->n_str = NULL;
n->n_lineno = 0;
n->n_nchildren = 0;
n->n_child = NULL;
return n;
}
/* See comments at XXXROUNDUP below. Returns -1 on overflow. */
static int
fancy_roundup(int n)
{
/* Round up to the closest power of 2 >= n. */
int result = 256;
assert(n > 128);
while (result < n) {
result <<= 1;
if (result <= 0)
return -1;
}
return result;
}
/* A gimmick to make massive numbers of reallocs quicker. The result is
* a number >= the input. In PyNode_AddChild, it's used like so, when
* we're about to add child number current_size + 1:
*
* if XXXROUNDUP(current_size) < XXXROUNDUP(current_size + 1):
* allocate space for XXXROUNDUP(current_size + 1) total children
* else:
* we already have enough space
*
* Since a node starts out empty, we must have
*
* XXXROUNDUP(0) < XXXROUNDUP(1)
*
* so that we allocate space for the first child. One-child nodes are very
* common (presumably that would change if we used a more abstract form
* of syntax tree), so to avoid wasting memory it's desirable that
* XXXROUNDUP(1) == 1. That in turn forces XXXROUNDUP(0) == 0.
*
* Else for 2 <= n <= 128, we round up to the closest multiple of 4. Why 4?
* Rounding up to a multiple of an exact power of 2 is very efficient, and
* most nodes with more than one child have <= 4 kids.
*
* Else we call fancy_roundup() to grow proportionately to n. We've got an
* extreme case then (like test_longexp.py), and on many platforms doing
* anything less than proportional growth leads to exorbitant runtime
* (e.g., MacPython), or extreme fragmentation of user address space (e.g.,
* Win98).
*
* In a run of compileall across the 2.3a0 Lib directory, Andrew MacIntyre
* reported that, with this scheme, 89% of PyObject_REALLOC calls in
* PyNode_AddChild passed 1 for the size, and 9% passed 4. So this usually
* wastes very little memory, but is very effective at sidestepping
* platform-realloc disasters on vulnerable platforms.
*
* Note that this would be straightforward if a node stored its current
* capacity. The code is tricky to avoid that.
*/
#define XXXROUNDUP(n) ((n) <= 1 ? (n) : \
(n) <= 128 ? (int)_Py_SIZE_ROUND_UP((n), 4) : \
fancy_roundup(n))
int
PyNode_AddChild(node *n1, int type, char *str, int lineno, int col_offset)
{
const int nch = n1->n_nchildren;
int current_capacity;
int required_capacity;
node *n;
if (nch == INT_MAX || nch < 0)
return E_OVERFLOW;
current_capacity = XXXROUNDUP(nch);
required_capacity = XXXROUNDUP(nch + 1);
if (current_capacity < 0 || required_capacity < 0)
return E_OVERFLOW;
if (current_capacity < required_capacity) {
if ((size_t)required_capacity > SIZE_MAX / sizeof(node)) {
return E_NOMEM;
}
n = n1->n_child;
n = (node *) PyObject_REALLOC(n,
required_capacity * sizeof(node));
if (n == NULL)
return E_NOMEM;
n1->n_child = n;
}
n = &n1->n_child[n1->n_nchildren++];
n->n_type = type;
n->n_str = str;
n->n_lineno = lineno;
n->n_col_offset = col_offset;
n->n_nchildren = 0;
n->n_child = NULL;
return 0;
}
/* Forward */
static void freechildren(node *);
static Py_ssize_t sizeofchildren(node *n);
void
PyNode_Free(node *n)
{
if (n != NULL) {
freechildren(n);
PyObject_FREE(n);
}
}
Py_ssize_t
_PyNode_SizeOf(node *n)
{
Py_ssize_t res = 0;
if (n != NULL)
res = sizeof(node) + sizeofchildren(n);
return res;
}
static void
freechildren(node *n)
{
int i;
for (i = NCH(n); --i >= 0; )
freechildren(CHILD(n, i));
if (n->n_child != NULL)
PyObject_FREE(n->n_child);
if (STR(n) != NULL)
PyObject_FREE(STR(n));
}
static Py_ssize_t
sizeofchildren(node *n)
{
Py_ssize_t res = 0;
int i;
for (i = NCH(n); --i >= 0; )
res += sizeofchildren(CHILD(n, i));
if (n->n_child != NULL)
/* allocated size of n->n_child array */
res += XXXROUNDUP(NCH(n)) * sizeof(node);
if (STR(n) != NULL)
res += strlen(STR(n)) + 1;
return res;
}

447
third_party/python/Parser/parser.c vendored Normal file
View file

@ -0,0 +1,447 @@
/* Parser implementation */
/* For a description, see the comments at end of this file */
/* XXX To do: error recovery */
#include "Python.h"
#include "pgenheaders.h"
#include "token.h"
#include "grammar.h"
#include "node.h"
#include "parser.h"
#include "errcode.h"
#ifdef Py_DEBUG
extern int Py_DebugFlag;
#define D(x) if (!Py_DebugFlag); else x
#else
#define D(x)
#endif
/* STACK DATA TYPE */
static void s_reset(stack *);
static void
s_reset(stack *s)
{
s->s_top = &s->s_base[MAXSTACK];
}
#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
static int
s_push(stack *s, dfa *d, node *parent)
{
stackentry *top;
if (s->s_top == s->s_base) {
fprintf(stderr, "s_push: parser stack overflow\n");
return E_NOMEM;
}
top = --s->s_top;
top->s_dfa = d;
top->s_parent = parent;
top->s_state = 0;
return 0;
}
#ifdef Py_DEBUG
static void
s_pop(stack *s)
{
if (s_empty(s))
Py_FatalError("s_pop: parser stack underflow -- FATAL");
s->s_top++;
}
#else /* !Py_DEBUG */
#define s_pop(s) (s)->s_top++
#endif
/* PARSER CREATION */
parser_state *
PyParser_New(grammar *g, int start)
{
parser_state *ps;
if (!g->g_accel)
PyGrammar_AddAccelerators(g);
ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
if (ps == NULL)
return NULL;
ps->p_grammar = g;
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
ps->p_flags = 0;
#endif
ps->p_tree = PyNode_New(start);
if (ps->p_tree == NULL) {
PyMem_FREE(ps);
return NULL;
}
s_reset(&ps->p_stack);
(void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
return ps;
}
void
PyParser_Delete(parser_state *ps)
{
/* NB If you want to save the parse tree,
you must set p_tree to NULL before calling delparser! */
PyNode_Free(ps->p_tree);
PyMem_FREE(ps);
}
/* PARSER STACK OPERATIONS */
static int
shift(stack *s, int type, char *str, int newstate, int lineno, int col_offset)
{
int err;
assert(!s_empty(s));
err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
if (err)
return err;
s->s_top->s_state = newstate;
return 0;
}
static int
push(stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
{
int err;
node *n;
n = s->s_top->s_parent;
assert(!s_empty(s));
err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
if (err)
return err;
s->s_top->s_state = newstate;
return s_push(s, d, CHILD(n, NCH(n)-1));
}
/* PARSER PROPER */
static int
classify(parser_state *ps, int type, const char *str)
{
grammar *g = ps->p_grammar;
int n = g->g_ll.ll_nlabels;
if (type == NAME) {
label *l = g->g_ll.ll_label;
int i;
for (i = n; i > 0; i--, l++) {
if (l->lb_type != NAME || l->lb_str == NULL ||
l->lb_str[0] != str[0] ||
strcmp(l->lb_str, str) != 0)
continue;
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
#if 0
/* Leaving this in as an example */
if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
if (str[0] == 'w' && strcmp(str, "with") == 0)
break; /* not a keyword yet */
else if (str[0] == 'a' && strcmp(str, "as") == 0)
break; /* not a keyword yet */
}
#endif
#endif
D(printf("It's a keyword\n"));
return n - i;
}
}
{
label *l = g->g_ll.ll_label;
int i;
for (i = n; i > 0; i--, l++) {
if (l->lb_type == type && l->lb_str == NULL) {
D(printf("It's a token we know\n"));
return n - i;
}
}
}
D(printf("Illegal token\n"));
return -1;
}
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
#if 0
/* Leaving this in as an example */
static void
future_hack(parser_state *ps)
{
node *n = ps->p_stack.s_top->s_parent;
node *ch, *cch;
int i;
/* from __future__ import ..., must have at least 4 children */
n = CHILD(n, 0);
if (NCH(n) < 4)
return;
ch = CHILD(n, 0);
if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
return;
ch = CHILD(n, 1);
if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
return;
ch = CHILD(n, 3);
/* ch can be a star, a parenthesis or import_as_names */
if (TYPE(ch) == STAR)
return;
if (TYPE(ch) == LPAR)
ch = CHILD(n, 4);
for (i = 0; i < NCH(ch); i += 2) {
cch = CHILD(ch, i);
if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
char *str_ch = STR(CHILD(cch, 0));
if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
} else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
} else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
}
}
}
}
#endif
#endif /* future keyword */
int
PyParser_AddToken(parser_state *ps, int type, char *str,
int lineno, int col_offset, int *expected_ret)
{
int ilabel;
int err;
D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
/* Find out which label this token is */
ilabel = classify(ps, type, str);
if (ilabel < 0)
return E_SYNTAX;
/* Loop until the token is shifted or an error occurred */
for (;;) {
/* Fetch the current dfa and state */
dfa *d = ps->p_stack.s_top->s_dfa;
state *s = &d->d_state[ps->p_stack.s_top->s_state];
D(printf(" DFA '%s', state %d:",
d->d_name, ps->p_stack.s_top->s_state));
/* Check accelerator */
if (s->s_lower <= ilabel && ilabel < s->s_upper) {
int x = s->s_accel[ilabel - s->s_lower];
if (x != -1) {
if (x & (1<<7)) {
/* Push non-terminal */
int nt = (x >> 8) + NT_OFFSET;
int arrow = x & ((1<<7)-1);
dfa *d1 = PyGrammar_FindDFA(
ps->p_grammar, nt);
if ((err = push(&ps->p_stack, nt, d1,
arrow, lineno, col_offset)) > 0) {
D(printf(" MemError: push\n"));
return err;
}
D(printf(" Push ...\n"));
continue;
}
/* Shift the token */
if ((err = shift(&ps->p_stack, type, str,
x, lineno, col_offset)) > 0) {
D(printf(" MemError: shift.\n"));
return err;
}
D(printf(" Shift.\n"));
/* Pop while we are in an accept-only state */
while (s = &d->d_state
[ps->p_stack.s_top->s_state],
s->s_accept && s->s_narcs == 1) {
D(printf(" DFA '%s', state %d: "
"Direct pop.\n",
d->d_name,
ps->p_stack.s_top->s_state));
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
#if 0
if (d->d_name[0] == 'i' &&
strcmp(d->d_name,
"import_stmt") == 0)
future_hack(ps);
#endif
#endif
s_pop(&ps->p_stack);
if (s_empty(&ps->p_stack)) {
D(printf(" ACCEPT.\n"));
return E_DONE;
}
d = ps->p_stack.s_top->s_dfa;
}
return E_OK;
}
}
if (s->s_accept) {
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
#if 0
if (d->d_name[0] == 'i' &&
strcmp(d->d_name, "import_stmt") == 0)
future_hack(ps);
#endif
#endif
/* Pop this dfa and try again */
s_pop(&ps->p_stack);
D(printf(" Pop ...\n"));
if (s_empty(&ps->p_stack)) {
D(printf(" Error: bottom of stack.\n"));
return E_SYNTAX;
}
continue;
}
/* Stuck, report syntax error */
D(printf(" Error.\n"));
if (expected_ret) {
if (s->s_lower == s->s_upper - 1) {
/* Only one possible expected token */
*expected_ret = ps->p_grammar->
g_ll.ll_label[s->s_lower].lb_type;
}
else
*expected_ret = -1;
}
return E_SYNTAX;
}
}
#ifdef Py_DEBUG
/* DEBUG OUTPUT */
void
dumptree(grammar *g, node *n)
{
int i;
if (n == NULL)
printf("NIL");
else {
label l;
l.lb_type = TYPE(n);
l.lb_str = STR(n);
printf("%s", PyGrammar_LabelRepr(&l));
if (ISNONTERMINAL(TYPE(n))) {
printf("(");
for (i = 0; i < NCH(n); i++) {
if (i > 0)
printf(",");
dumptree(g, CHILD(n, i));
}
printf(")");
}
}
}
void
showtree(grammar *g, node *n)
{
int i;
if (n == NULL)
return;
if (ISNONTERMINAL(TYPE(n))) {
for (i = 0; i < NCH(n); i++)
showtree(g, CHILD(n, i));
}
else if (ISTERMINAL(TYPE(n))) {
printf("%s", _PyParser_TokenNames[TYPE(n)]);
if (TYPE(n) == NUMBER || TYPE(n) == NAME)
printf("(%s)", STR(n));
printf(" ");
}
else
printf("? ");
}
void
printtree(parser_state *ps)
{
if (Py_DebugFlag) {
printf("Parse tree:\n");
dumptree(ps->p_grammar, ps->p_tree);
printf("\n");
printf("Tokens:\n");
showtree(ps->p_grammar, ps->p_tree);
printf("\n");
}
printf("Listing:\n");
PyNode_ListTree(ps->p_tree);
printf("\n");
}
#endif /* Py_DEBUG */
/*
Description
-----------
The parser's interface is different than usual: the function addtoken()
must be called for each token in the input. This makes it possible to
turn it into an incremental parsing system later. The parsing system
constructs a parse tree as it goes.
A parsing rule is represented as a Deterministic Finite-state Automaton
(DFA). A node in a DFA represents a state of the parser; an arc represents
a transition. Transitions are either labeled with terminal symbols or
with non-terminals. When the parser decides to follow an arc labeled
with a non-terminal, it is invoked recursively with the DFA representing
the parsing rule for that as its initial state; when that DFA accepts,
the parser that invoked it continues. The parse tree constructed by the
recursively called parser is inserted as a child in the current parse tree.
The DFA's can be constructed automatically from a more conventional
language description. An extended LL(1) grammar (ELL(1)) is suitable.
Certain restrictions make the parser's life easier: rules that can produce
the empty string should be outlawed (there are other ways to put loops
or optional parts in the language). To avoid the need to construct
FIRST sets, we can require that all but the last alternative of a rule
(really: arc going out of a DFA's state) must begin with a terminal
symbol.
As an example, consider this grammar:
expr: term (OP term)*
term: CONSTANT | '(' expr ')'
The DFA corresponding to the rule for expr is:
------->.---term-->.------->
^ |
| |
\----OP----/
The parse tree generated for the input a+b is:
(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
*/

42
third_party/python/Parser/parser.h vendored Normal file
View file

@ -0,0 +1,42 @@
#ifndef Py_PARSER_H
#define Py_PARSER_H
#ifdef __cplusplus
extern "C" {
#endif
/* Parser interface */
#define MAXSTACK 1500
typedef struct {
int s_state; /* State in current DFA */
dfa *s_dfa; /* Current DFA */
struct _node *s_parent; /* Where to add next node */
} stackentry;
typedef struct {
stackentry *s_top; /* Top entry */
stackentry s_base[MAXSTACK];/* Array of stack entries */
/* NB The stack grows down */
} stack;
typedef struct {
stack p_stack; /* Stack of parser states */
grammar *p_grammar; /* Grammar to use */
node *p_tree; /* Top of parse tree */
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
unsigned long p_flags; /* see co_flags in Include/code.h */
#endif
} parser_state;
parser_state *PyParser_New(grammar *g, int start);
void PyParser_Delete(parser_state *ps);
int PyParser_AddToken(parser_state *ps, int type, char *str, int lineno, int col_offset,
int *expected_ret);
void PyGrammar_AddAccelerators(grammar *g);
#ifdef __cplusplus
}
#endif
#endif /* !Py_PARSER_H */

384
third_party/python/Parser/parsetok.c vendored Normal file
View file

@ -0,0 +1,384 @@
/* Parser-tokenizer link implementation */
#include "pgenheaders.h"
#include "tokenizer.h"
#include "node.h"
#include "grammar.h"
#include "parser.h"
#include "parsetok.h"
#include "errcode.h"
#include "graminit.h"
/* Forward */
static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
static int initerr(perrdetail *err_ret, PyObject * filename);
/* Parse input coming from a string. Return error code, print some errors. */
node *
PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
{
return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
}
node *
PyParser_ParseStringFlags(const char *s, grammar *g, int start,
perrdetail *err_ret, int flags)
{
return PyParser_ParseStringFlagsFilename(s, NULL,
g, start, err_ret, flags);
}
node *
PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
grammar *g, int start,
perrdetail *err_ret, int flags)
{
int iflags = flags;
return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
err_ret, &iflags);
}
node *
PyParser_ParseStringObject(const char *s, PyObject *filename,
grammar *g, int start,
perrdetail *err_ret, int *flags)
{
struct tok_state *tok;
int exec_input = start == file_input;
if (initerr(err_ret, filename) < 0)
return NULL;
if (*flags & PyPARSE_IGNORE_COOKIE)
tok = PyTokenizer_FromUTF8(s, exec_input);
else
tok = PyTokenizer_FromString(s, exec_input);
if (tok == NULL) {
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
return NULL;
}
#ifndef PGEN
Py_INCREF(err_ret->filename);
tok->filename = err_ret->filename;
#endif
return parsetok(tok, g, start, err_ret, flags);
}
node *
PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str,
grammar *g, int start,
perrdetail *err_ret, int *flags)
{
node *n;
PyObject *filename = NULL;
#ifndef PGEN
if (filename_str != NULL) {
filename = PyUnicode_DecodeFSDefault(filename_str);
if (filename == NULL) {
err_ret->error = E_ERROR;
return NULL;
}
}
#endif
n = PyParser_ParseStringObject(s, filename, g, start, err_ret, flags);
#ifndef PGEN
Py_XDECREF(filename);
#endif
return n;
}
/* Parse input coming from a file. Return error code, print some errors. */
node *
PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
const char *ps1, const char *ps2,
perrdetail *err_ret)
{
return PyParser_ParseFileFlags(fp, filename, NULL,
g, start, ps1, ps2, err_ret, 0);
}
node *
PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc,
grammar *g, int start,
const char *ps1, const char *ps2,
perrdetail *err_ret, int flags)
{
int iflags = flags;
return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1,
ps2, err_ret, &iflags);
}
node *
PyParser_ParseFileObject(FILE *fp, PyObject *filename,
const char *enc, grammar *g, int start,
const char *ps1, const char *ps2,
perrdetail *err_ret, int *flags)
{
struct tok_state *tok;
if (initerr(err_ret, filename) < 0)
return NULL;
if ((tok = PyTokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) {
err_ret->error = E_NOMEM;
return NULL;
}
#ifndef PGEN
Py_INCREF(err_ret->filename);
tok->filename = err_ret->filename;
#endif
return parsetok(tok, g, start, err_ret, flags);
}
node *
PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
const char *enc, grammar *g, int start,
const char *ps1, const char *ps2,
perrdetail *err_ret, int *flags)
{
node *n;
PyObject *fileobj = NULL;
#ifndef PGEN
if (filename != NULL) {
fileobj = PyUnicode_DecodeFSDefault(filename);
if (fileobj == NULL) {
err_ret->error = E_ERROR;
return NULL;
}
}
#endif
n = PyParser_ParseFileObject(fp, fileobj, enc, g,
start, ps1, ps2, err_ret, flags);
#ifndef PGEN
Py_XDECREF(fileobj);
#endif
return n;
}
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
#if 0
static const char with_msg[] =
"%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
static const char as_msg[] =
"%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
static void
warn(const char *msg, const char *filename, int lineno)
{
if (filename == NULL)
filename = "<string>";
PySys_WriteStderr(msg, filename, lineno);
}
#endif
#endif
/* Parse input coming from the given tokenizer structure.
Return error code. */
static node *
parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
int *flags)
{
parser_state *ps;
node *n;
int started = 0;
if ((ps = PyParser_New(g, start)) == NULL) {
err_ret->error = E_NOMEM;
PyTokenizer_Free(tok);
return NULL;
}
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
if (*flags & PyPARSE_BARRY_AS_BDFL)
ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
#endif
for (;;) {
char *a, *b;
int type;
size_t len;
char *str;
int col_offset;
type = PyTokenizer_Get(tok, &a, &b);
if (type == ERRORTOKEN) {
err_ret->error = tok->done;
break;
}
if (type == ENDMARKER && started) {
type = NEWLINE; /* Add an extra newline */
started = 0;
/* Add the right number of dedent tokens,
except if a certain flag is given --
codeop.py uses this. */
if (tok->indent &&
!(*flags & PyPARSE_DONT_IMPLY_DEDENT))
{
tok->pendin = -tok->indent;
tok->indent = 0;
}
}
else
started = 1;
len = (a != NULL && b != NULL) ? b - a : 0;
str = (char *) PyObject_MALLOC(len + 1);
if (str == NULL) {
err_ret->error = E_NOMEM;
break;
}
if (len > 0)
strncpy(str, a, len);
str[len] = '\0';
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
if (type == NOTEQUAL) {
if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
strcmp(str, "!=")) {
PyObject_FREE(str);
err_ret->error = E_SYNTAX;
break;
}
else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
strcmp(str, "<>")) {
PyObject_FREE(str);
err_ret->expected = NOTEQUAL;
err_ret->error = E_SYNTAX;
break;
}
}
#endif
if (a != NULL && a >= tok->line_start) {
col_offset = Py_SAFE_DOWNCAST(a - tok->line_start,
intptr_t, int);
}
else {
col_offset = -1;
}
if ((err_ret->error =
PyParser_AddToken(ps, (int)type, str,
tok->lineno, col_offset,
&(err_ret->expected))) != E_OK) {
if (err_ret->error != E_DONE) {
PyObject_FREE(str);
err_ret->token = type;
}
break;
}
}
if (err_ret->error == E_DONE) {
n = ps->p_tree;
ps->p_tree = NULL;
#ifndef PGEN
/* Check that the source for a single input statement really
is a single statement by looking at what is left in the
buffer after parsing. Trailing whitespace and comments
are OK. */
if (start == single_input) {
char *cur = tok->cur;
char c = *tok->cur;
for (;;) {
while (c == ' ' || c == '\t' || c == '\n' || c == '\014')
c = *++cur;
if (!c)
break;
if (c != '#') {
err_ret->error = E_BADSINGLE;
PyNode_Free(n);
n = NULL;
break;
}
/* Suck up comment. */
while (c && c != '\n')
c = *++cur;
}
}
#endif
}
else
n = NULL;
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
*flags = ps->p_flags;
#endif
PyParser_Delete(ps);
if (n == NULL) {
if (tok->done == E_EOF)
err_ret->error = E_EOF;
err_ret->lineno = tok->lineno;
if (tok->buf != NULL) {
size_t len;
assert(tok->cur - tok->buf < INT_MAX);
err_ret->offset = (int)(tok->cur - tok->buf);
len = tok->inp - tok->buf;
err_ret->text = (char *) PyObject_MALLOC(len + 1);
if (err_ret->text != NULL) {
if (len > 0)
strncpy(err_ret->text, tok->buf, len);
err_ret->text[len] = '\0';
}
}
} else if (tok->encoding != NULL) {
/* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
* allocated using PyMem_
*/
node* r = PyNode_New(encoding_decl);
if (r)
r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
if (!r || !r->n_str) {
err_ret->error = E_NOMEM;
if (r)
PyObject_FREE(r);
n = NULL;
goto done;
}
strcpy(r->n_str, tok->encoding);
PyMem_FREE(tok->encoding);
tok->encoding = NULL;
r->n_nchildren = 1;
r->n_child = n;
n = r;
}
done:
PyTokenizer_Free(tok);
return n;
}
static int
initerr(perrdetail *err_ret, PyObject *filename)
{
err_ret->error = E_OK;
err_ret->lineno = 0;
err_ret->offset = 0;
err_ret->text = NULL;
err_ret->token = -1;
err_ret->expected = -1;
#ifndef PGEN
if (filename) {
Py_INCREF(filename);
err_ret->filename = filename;
}
else {
err_ret->filename = PyUnicode_FromString("<string>");
if (err_ret->filename == NULL) {
err_ret->error = E_ERROR;
return -1;
}
}
#endif
return 0;
}

View file

@ -0,0 +1,2 @@
#define PGEN
#include "parsetok.c"

724
third_party/python/Parser/pgen.c vendored Normal file
View file

@ -0,0 +1,724 @@
/* Parser generator */
/* For a description, see the comments at end of this file */
#include "Python.h"
#include "pgenheaders.h"
#include "token.h"
#include "node.h"
#include "grammar.h"
#include "metagrammar.h"
#include "pgen.h"
extern int Py_DebugFlag;
extern int Py_IgnoreEnvironmentFlag; /* needed by Py_GETENV */
/* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */
typedef struct _nfaarc {
int ar_label;
int ar_arrow;
} nfaarc;
typedef struct _nfastate {
int st_narcs;
nfaarc *st_arc;
} nfastate;
typedef struct _nfa {
int nf_type;
char *nf_name;
int nf_nstates;
nfastate *nf_state;
int nf_start, nf_finish;
} nfa;
/* Forward */
static void compile_rhs(labellist *ll,
nfa *nf, node *n, int *pa, int *pb);
static void compile_alt(labellist *ll,
nfa *nf, node *n, int *pa, int *pb);
static void compile_item(labellist *ll,
nfa *nf, node *n, int *pa, int *pb);
static void compile_atom(labellist *ll,
nfa *nf, node *n, int *pa, int *pb);
static int
addnfastate(nfa *nf)
{
nfastate *st;
nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state,
sizeof(nfastate) * (nf->nf_nstates + 1));
if (nf->nf_state == NULL)
Py_FatalError("out of mem");
st = &nf->nf_state[nf->nf_nstates++];
st->st_narcs = 0;
st->st_arc = NULL;
return st - nf->nf_state;
}
static void
addnfaarc(nfa *nf, int from, int to, int lbl)
{
nfastate *st;
nfaarc *ar;
st = &nf->nf_state[from];
st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc,
sizeof(nfaarc) * (st->st_narcs + 1));
if (st->st_arc == NULL)
Py_FatalError("out of mem");
ar = &st->st_arc[st->st_narcs++];
ar->ar_label = lbl;
ar->ar_arrow = to;
}
static nfa *
newnfa(char *name)
{
nfa *nf;
static int type = NT_OFFSET; /* All types will be disjunct */
nf = (nfa *)PyObject_MALLOC(sizeof(nfa));
if (nf == NULL)
Py_FatalError("no mem for new nfa");
nf->nf_type = type++;
nf->nf_name = name; /* XXX strdup(name) ??? */
nf->nf_nstates = 0;
nf->nf_state = NULL;
nf->nf_start = nf->nf_finish = -1;
return nf;
}
typedef struct _nfagrammar {
int gr_nnfas;
nfa **gr_nfa;
labellist gr_ll;
} nfagrammar;
/* Forward */
static void compile_rule(nfagrammar *gr, node *n);
static nfagrammar *
newnfagrammar(void)
{
nfagrammar *gr;
gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar));
if (gr == NULL)
Py_FatalError("no mem for new nfa grammar");
gr->gr_nnfas = 0;
gr->gr_nfa = NULL;
gr->gr_ll.ll_nlabels = 0;
gr->gr_ll.ll_label = NULL;
addlabel(&gr->gr_ll, ENDMARKER, "EMPTY");
return gr;
}
static void
freenfagrammar(nfagrammar *gr)
{
for (int i = 0; i < gr->gr_nnfas; i++) {
PyObject_FREE(gr->gr_nfa[i]->nf_state);
}
PyObject_FREE(gr->gr_nfa);
PyObject_FREE(gr);
}
static nfa *
addnfa(nfagrammar *gr, char *name)
{
nfa *nf;
nf = newnfa(name);
gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa,
sizeof(nfa*) * (gr->gr_nnfas + 1));
if (gr->gr_nfa == NULL)
Py_FatalError("out of mem");
gr->gr_nfa[gr->gr_nnfas++] = nf;
addlabel(&gr->gr_ll, NAME, nf->nf_name);
return nf;
}
#ifdef Py_DEBUG
static const char REQNFMT[] = "metacompile: less than %d children\n";
#define REQN(i, count) do { \
if (i < count) { \
fprintf(stderr, REQNFMT, count); \
Py_FatalError("REQN"); \
} \
} while (0)
#else
#define REQN(i, count) /* empty */
#endif
static nfagrammar *
metacompile(node *n)
{
nfagrammar *gr;
int i;
if (Py_DebugFlag)
printf("Compiling (meta-) parse tree into NFA grammar\n");
gr = newnfagrammar();
REQ(n, MSTART);
i = n->n_nchildren - 1; /* Last child is ENDMARKER */
n = n->n_child;
for (; --i >= 0; n++) {
if (n->n_type != NEWLINE)
compile_rule(gr, n);
}
return gr;
}
static void
compile_rule(nfagrammar *gr, node *n)
{
nfa *nf;
REQ(n, RULE);
REQN(n->n_nchildren, 4);
n = n->n_child;
REQ(n, NAME);
nf = addnfa(gr, n->n_str);
n++;
REQ(n, COLON);
n++;
REQ(n, RHS);
compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);
n++;
REQ(n, NEWLINE);
}
static void
compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
{
int i;
int a, b;
REQ(n, RHS);
i = n->n_nchildren;
REQN(i, 1);
n = n->n_child;
REQ(n, ALT);
compile_alt(ll, nf, n, pa, pb);
if (--i <= 0)
return;
n++;
a = *pa;
b = *pb;
*pa = addnfastate(nf);
*pb = addnfastate(nf);
addnfaarc(nf, *pa, a, EMPTY);
addnfaarc(nf, b, *pb, EMPTY);
for (; --i >= 0; n++) {
REQ(n, VBAR);
REQN(i, 1);
--i;
n++;
REQ(n, ALT);
compile_alt(ll, nf, n, &a, &b);
addnfaarc(nf, *pa, a, EMPTY);
addnfaarc(nf, b, *pb, EMPTY);
}
}
static void
compile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
{
int i;
int a, b;
REQ(n, ALT);
i = n->n_nchildren;
REQN(i, 1);
n = n->n_child;
REQ(n, ITEM);
compile_item(ll, nf, n, pa, pb);
--i;
n++;
for (; --i >= 0; n++) {
REQ(n, ITEM);
compile_item(ll, nf, n, &a, &b);
addnfaarc(nf, *pb, a, EMPTY);
*pb = b;
}
}
static void
compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
{
int i;
int a, b;
REQ(n, ITEM);
i = n->n_nchildren;
REQN(i, 1);
n = n->n_child;
if (n->n_type == LSQB) {
REQN(i, 3);
n++;
REQ(n, RHS);
*pa = addnfastate(nf);
*pb = addnfastate(nf);
addnfaarc(nf, *pa, *pb, EMPTY);
compile_rhs(ll, nf, n, &a, &b);
addnfaarc(nf, *pa, a, EMPTY);
addnfaarc(nf, b, *pb, EMPTY);
REQN(i, 1);
n++;
REQ(n, RSQB);
}
else {
compile_atom(ll, nf, n, pa, pb);
if (--i <= 0)
return;
n++;
addnfaarc(nf, *pb, *pa, EMPTY);
if (n->n_type == STAR)
*pb = *pa;
else
REQ(n, PLUS);
}
}
static void
compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
{
int i;
REQ(n, ATOM);
i = n->n_nchildren;
(void)i; /* Don't warn about set but unused */
REQN(i, 1);
n = n->n_child;
if (n->n_type == LPAR) {
REQN(i, 3);
n++;
REQ(n, RHS);
compile_rhs(ll, nf, n, pa, pb);
n++;
REQ(n, RPAR);
}
else if (n->n_type == NAME || n->n_type == STRING) {
*pa = addnfastate(nf);
*pb = addnfastate(nf);
addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));
}
else
REQ(n, NAME);
}
static void
dumpstate(labellist *ll, nfa *nf, int istate)
{
nfastate *st;
int i;
nfaarc *ar;
printf("%c%2d%c",
istate == nf->nf_start ? '*' : ' ',
istate,
istate == nf->nf_finish ? '.' : ' ');
st = &nf->nf_state[istate];
ar = st->st_arc;
for (i = 0; i < st->st_narcs; i++) {
if (i > 0)
printf("\n ");
printf("-> %2d %s", ar->ar_arrow,
PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label]));
ar++;
}
printf("\n");
}
static void
dumpnfa(labellist *ll, nfa *nf)
{
int i;
printf("NFA '%s' has %d states; start %d, finish %d\n",
nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish);
for (i = 0; i < nf->nf_nstates; i++)
dumpstate(ll, nf, i);
}
/* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */
static void
addclosure(bitset ss, nfa *nf, int istate)
{
if (addbit(ss, istate)) {
nfastate *st = &nf->nf_state[istate];
nfaarc *ar = st->st_arc;
int i;
for (i = st->st_narcs; --i >= 0; ) {
if (ar->ar_label == EMPTY)
addclosure(ss, nf, ar->ar_arrow);
ar++;
}
}
}
typedef struct _ss_arc {
bitset sa_bitset;
int sa_arrow;
int sa_label;
} ss_arc;
typedef struct _ss_state {
bitset ss_ss;
int ss_narcs;
struct _ss_arc *ss_arc;
int ss_deleted;
int ss_finish;
int ss_rename;
} ss_state;
typedef struct _ss_dfa {
int sd_nstates;
ss_state *sd_state;
} ss_dfa;
/* Forward */
static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
labellist *ll, const char *msg);
static void simplify(int xx_nstates, ss_state *xx_state);
static void convert(dfa *d, int xx_nstates, ss_state *xx_state);
static void
makedfa(nfagrammar *gr, nfa *nf, dfa *d)
{
int nbits = nf->nf_nstates;
bitset ss;
int xx_nstates;
ss_state *xx_state, *yy;
ss_arc *zz;
int istate, jstate, iarc, jarc, ibit;
nfastate *st;
nfaarc *ar;
ss = newbitset(nbits);
addclosure(ss, nf, nf->nf_start);
xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state));
if (xx_state == NULL)
Py_FatalError("no mem for xx_state in makedfa");
xx_nstates = 1;
yy = &xx_state[0];
yy->ss_ss = ss;
yy->ss_narcs = 0;
yy->ss_arc = NULL;
yy->ss_deleted = 0;
yy->ss_finish = testbit(ss, nf->nf_finish);
if (yy->ss_finish)
printf("Error: nonterminal '%s' may produce empty.\n",
nf->nf_name);
/* This algorithm is from a book written before
the invention of structured programming... */
/* For each unmarked state... */
for (istate = 0; istate < xx_nstates; ++istate) {
size_t size;
yy = &xx_state[istate];
ss = yy->ss_ss;
/* For all its states... */
for (ibit = 0; ibit < nf->nf_nstates; ++ibit) {
if (!testbit(ss, ibit))
continue;
st = &nf->nf_state[ibit];
/* For all non-empty arcs from this state... */
for (iarc = 0; iarc < st->st_narcs; iarc++) {
ar = &st->st_arc[iarc];
if (ar->ar_label == EMPTY)
continue;
/* Look up in list of arcs from this state */
for (jarc = 0; jarc < yy->ss_narcs; ++jarc) {
zz = &yy->ss_arc[jarc];
if (ar->ar_label == zz->sa_label)
goto found;
}
/* Add new arc for this state */
size = sizeof(ss_arc) * (yy->ss_narcs + 1);
yy->ss_arc = (ss_arc *)PyObject_REALLOC(
yy->ss_arc, size);
if (yy->ss_arc == NULL)
Py_FatalError("out of mem");
zz = &yy->ss_arc[yy->ss_narcs++];
zz->sa_label = ar->ar_label;
zz->sa_bitset = newbitset(nbits);
zz->sa_arrow = -1;
found: ;
/* Add destination */
addclosure(zz->sa_bitset, nf, ar->ar_arrow);
}
}
/* Now look up all the arrow states */
for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) {
zz = &xx_state[istate].ss_arc[jarc];
for (jstate = 0; jstate < xx_nstates; jstate++) {
if (samebitset(zz->sa_bitset,
xx_state[jstate].ss_ss, nbits)) {
zz->sa_arrow = jstate;
goto done;
}
}
size = sizeof(ss_state) * (xx_nstates + 1);
xx_state = (ss_state *)PyObject_REALLOC(xx_state,
size);
if (xx_state == NULL)
Py_FatalError("out of mem");
zz->sa_arrow = xx_nstates;
yy = &xx_state[xx_nstates++];
yy->ss_ss = zz->sa_bitset;
yy->ss_narcs = 0;
yy->ss_arc = NULL;
yy->ss_deleted = 0;
yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish);
done: ;
}
}
if (Py_DebugFlag)
printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
"before minimizing");
simplify(xx_nstates, xx_state);
if (Py_DebugFlag)
printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
"after minimizing");
convert(d, xx_nstates, xx_state);
for (int i = 0; i < xx_nstates; i++) {
for (int j = 0; j < xx_state[i].ss_narcs; j++)
delbitset(xx_state[i].ss_arc[j].sa_bitset);
PyObject_FREE(xx_state[i].ss_arc);
}
PyObject_FREE(xx_state);
}
static void
printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
labellist *ll, const char *msg)
{
int i, ibit, iarc;
ss_state *yy;
ss_arc *zz;
printf("Subset DFA %s\n", msg);
for (i = 0; i < xx_nstates; i++) {
yy = &xx_state[i];
if (yy->ss_deleted)
continue;
printf(" Subset %d", i);
if (yy->ss_finish)
printf(" (finish)");
printf(" { ");
for (ibit = 0; ibit < nbits; ibit++) {
if (testbit(yy->ss_ss, ibit))
printf("%d ", ibit);
}
printf("}\n");
for (iarc = 0; iarc < yy->ss_narcs; iarc++) {
zz = &yy->ss_arc[iarc];
printf(" Arc to state %d, label %s\n",
zz->sa_arrow,
PyGrammar_LabelRepr(
&ll->ll_label[zz->sa_label]));
}
}
}
/* PART THREE -- SIMPLIFY DFA */
/* Simplify the DFA by repeatedly eliminating states that are
equivalent to another oner. This is NOT Algorithm 3.3 from
[Aho&Ullman 77]. It does not always finds the minimal DFA,
but it does usually make a much smaller one... (For an example
of sub-optimal behavior, try S: x a b+ | y a b+.)
*/
static int
samestate(ss_state *s1, ss_state *s2)
{
int i;
if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish)
return 0;
for (i = 0; i < s1->ss_narcs; i++) {
if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow ||
s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label)
return 0;
}
return 1;
}
static void
renamestates(int xx_nstates, ss_state *xx_state, int from, int to)
{
int i, j;
if (Py_DebugFlag)
printf("Rename state %d to %d.\n", from, to);
for (i = 0; i < xx_nstates; i++) {
if (xx_state[i].ss_deleted)
continue;
for (j = 0; j < xx_state[i].ss_narcs; j++) {
if (xx_state[i].ss_arc[j].sa_arrow == from)
xx_state[i].ss_arc[j].sa_arrow = to;
}
}
}
static void
simplify(int xx_nstates, ss_state *xx_state)
{
int changes;
int i, j;
do {
changes = 0;
for (i = 1; i < xx_nstates; i++) {
if (xx_state[i].ss_deleted)
continue;
for (j = 0; j < i; j++) {
if (xx_state[j].ss_deleted)
continue;
if (samestate(&xx_state[i], &xx_state[j])) {
xx_state[i].ss_deleted++;
renamestates(xx_nstates, xx_state,
i, j);
changes++;
break;
}
}
}
} while (changes);
}
/* PART FOUR -- GENERATE PARSING TABLES */
/* Convert the DFA into a grammar that can be used by our parser */
static void
convert(dfa *d, int xx_nstates, ss_state *xx_state)
{
int i, j;
ss_state *yy;
ss_arc *zz;
for (i = 0; i < xx_nstates; i++) {
yy = &xx_state[i];
if (yy->ss_deleted)
continue;
yy->ss_rename = addstate(d);
}
for (i = 0; i < xx_nstates; i++) {
yy = &xx_state[i];
if (yy->ss_deleted)
continue;
for (j = 0; j < yy->ss_narcs; j++) {
zz = &yy->ss_arc[j];
addarc(d, yy->ss_rename,
xx_state[zz->sa_arrow].ss_rename,
zz->sa_label);
}
if (yy->ss_finish)
addarc(d, yy->ss_rename, yy->ss_rename, 0);
}
d->d_initial = 0;
}
/* PART FIVE -- GLUE IT ALL TOGETHER */
static grammar *
maketables(nfagrammar *gr)
{
int i;
nfa *nf;
dfa *d;
grammar *g;
if (gr->gr_nnfas == 0)
return NULL;
g = newgrammar(gr->gr_nfa[0]->nf_type);
/* XXX first rule must be start rule */
g->g_ll = gr->gr_ll;
for (i = 0; i < gr->gr_nnfas; i++) {
nf = gr->gr_nfa[i];
if (Py_DebugFlag) {
printf("Dump of NFA for '%s' ...\n", nf->nf_name);
dumpnfa(&gr->gr_ll, nf);
printf("Making DFA for '%s' ...\n", nf->nf_name);
}
d = adddfa(g, nf->nf_type, nf->nf_name);
makedfa(gr, gr->gr_nfa[i], d);
}
return g;
}
grammar *
pgen(node *n)
{
nfagrammar *gr;
grammar *g;
gr = metacompile(n);
g = maketables(gr);
translatelabels(g);
addfirstsets(g);
freenfagrammar(gr);
return g;
}
grammar *
Py_pgen(node *n)
{
return pgen(n);
}
/*
Description
-----------
Input is a grammar in extended BNF (using * for repetition, + for
at-least-once repetition, [] for optional parts, | for alternatives and
() for grouping). This has already been parsed and turned into a parse
tree.
Each rule is considered as a regular expression in its own right.
It is turned into a Non-deterministic Finite Automaton (NFA), which
is then turned into a Deterministic Finite Automaton (DFA), which is then
optimized to reduce the number of states. See [Aho&Ullman 77] chapter 3,
or similar compiler books (this technique is more often used for lexical
analyzers).
The DFA's are used by the parser as parsing tables in a special way
that's probably unique. Before they are usable, the FIRST sets of all
non-terminals are computed.
Reference
---------
[Aho&Ullman 77]
Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
(first edition)
*/

189
third_party/python/Parser/pgenmain.c vendored Normal file
View file

@ -0,0 +1,189 @@
/* Parser generator main program */
/* This expects a filename containing the grammar as argv[1] (UNIX)
or asks the console for such a file name (THINK C).
It writes its output on two files in the current directory:
- "graminit.c" gets the grammar as a bunch of initialized data
- "graminit.h" gets the grammar's non-terminals as #defines.
Error messages and status info during the generation process are
written to stdout, or sometimes to stderr. */
/* XXX TO DO:
- check for duplicate definitions of names (instead of fatal err)
*/
#define PGEN
#include "Python.h"
#include "pgenheaders.h"
#include "grammar.h"
#include "node.h"
#include "parsetok.h"
#include "pgen.h"
int Py_DebugFlag;
int Py_VerboseFlag;
int Py_IgnoreEnvironmentFlag;
/* Forward */
grammar *getgrammar(const char *filename);
void Py_Exit(int) _Py_NO_RETURN;
void
Py_Exit(int sts)
{
exit(sts);
}
#ifdef WITH_THREAD
/* Needed by obmalloc.c */
int PyGILState_Check(void)
{ return 1; }
#endif
void _PyMem_DumpTraceback(int fd, const void *ptr)
{}
int
main(int argc, char **argv)
{
grammar *g;
FILE *fp;
char *filename, *graminit_h, *graminit_c;
if (argc != 4) {
fprintf(stderr,
"usage: %s grammar graminit.h graminit.c\n", argv[0]);
Py_Exit(2);
}
filename = argv[1];
graminit_h = argv[2];
graminit_c = argv[3];
g = getgrammar(filename);
fp = fopen(graminit_c, "w");
if (fp == NULL) {
perror(graminit_c);
Py_Exit(1);
}
if (Py_DebugFlag)
printf("Writing %s ...\n", graminit_c);
printgrammar(g, fp);
fclose(fp);
fp = fopen(graminit_h, "w");
if (fp == NULL) {
perror(graminit_h);
Py_Exit(1);
}
if (Py_DebugFlag)
printf("Writing %s ...\n", graminit_h);
printnonterminals(g, fp);
fclose(fp);
freegrammar(g);
Py_Exit(0);
return 0; /* Make gcc -Wall happy */
}
grammar *
getgrammar(const char *filename)
{
FILE *fp;
node *n;
grammar *g0, *g;
perrdetail err;
fp = fopen(filename, "r");
if (fp == NULL) {
perror(filename);
Py_Exit(1);
}
g0 = meta_grammar();
n = PyParser_ParseFile(fp, filename, g0, g0->g_start,
(char *)NULL, (char *)NULL, &err);
fclose(fp);
if (n == NULL) {
fprintf(stderr, "Parsing error %d, line %d.\n",
err.error, err.lineno);
if (err.text != NULL) {
size_t len;
int i;
fprintf(stderr, "%s", err.text);
len = strlen(err.text);
if (len == 0 || err.text[len-1] != '\n')
fprintf(stderr, "\n");
for (i = 0; i < err.offset; i++) {
if (err.text[i] == '\t')
putc('\t', stderr);
else
putc(' ', stderr);
}
fprintf(stderr, "^\n");
PyObject_FREE(err.text);
}
Py_Exit(1);
}
g = pgen(n);
PyNode_Free(n);
if (g == NULL) {
printf("Bad grammar.\n");
Py_Exit(1);
}
return g;
}
/* Can't happen in pgen */
PyObject*
PyErr_Occurred()
{
return 0;
}
void
Py_FatalError(const char *msg)
{
fprintf(stderr, "pgen: FATAL ERROR: %s\n", msg);
Py_Exit(1);
}
/* No-nonsense my_readline() for tokenizer.c */
char *
PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
{
size_t n = 1000;
char *p = (char *)PyMem_MALLOC(n);
char *q;
if (p == NULL)
return NULL;
fprintf(stderr, "%s", prompt);
q = fgets(p, n, sys_stdin);
if (q == NULL) {
*p = '\0';
return p;
}
n = strlen(p);
if (n > 0 && p[n-1] != '\n')
p[n-1] = '\n';
return (char *)PyMem_REALLOC(p, n+1);
}
/* No-nonsense fgets */
char *
Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
{
return fgets(buf, n, stream);
}
#include <stdarg.h>
void
PySys_WriteStderr(const char *format, ...)
{
va_list va;
va_start(va, format);
vfprintf(stderr, format, va);
va_end(va);
}

120
third_party/python/Parser/printgrammar.c vendored Normal file
View file

@ -0,0 +1,120 @@
/* Print a bunch of C initializers that represent a grammar */
#define PGEN
#include "pgenheaders.h"
#include "grammar.h"
/* Forward */
static void printarcs(int, dfa *, FILE *);
static void printstates(grammar *, FILE *);
static void printdfas(grammar *, FILE *);
static void printlabels(grammar *, FILE *);
void
printgrammar(grammar *g, FILE *fp)
{
fprintf(fp, "/* Generated by Parser/pgen */\n\n");
fprintf(fp, "#include \"pgenheaders.h\"\n");
fprintf(fp, "#include \"grammar.h\"\n");
fprintf(fp, "PyAPI_DATA(grammar) _PyParser_Grammar;\n");
printdfas(g, fp);
printlabels(g, fp);
fprintf(fp, "grammar _PyParser_Grammar = {\n");
fprintf(fp, " %d,\n", g->g_ndfas);
fprintf(fp, " dfas,\n");
fprintf(fp, " {%d, labels},\n", g->g_ll.ll_nlabels);
fprintf(fp, " %d\n", g->g_start);
fprintf(fp, "};\n");
}
void
printnonterminals(grammar *g, FILE *fp)
{
dfa *d;
int i;
fprintf(fp, "/* Generated by Parser/pgen */\n\n");
d = g->g_dfa;
for (i = g->g_ndfas; --i >= 0; d++)
fprintf(fp, "#define %s %d\n", d->d_name, d->d_type);
}
static void
printarcs(int i, dfa *d, FILE *fp)
{
arc *a;
state *s;
int j, k;
s = d->d_state;
for (j = 0; j < d->d_nstates; j++, s++) {
fprintf(fp, "static arc arcs_%d_%d[%d] = {\n",
i, j, s->s_narcs);
a = s->s_arc;
for (k = 0; k < s->s_narcs; k++, a++)
fprintf(fp, " {%d, %d},\n", a->a_lbl, a->a_arrow);
fprintf(fp, "};\n");
}
}
static void
printstates(grammar *g, FILE *fp)
{
state *s;
dfa *d;
int i, j;
d = g->g_dfa;
for (i = 0; i < g->g_ndfas; i++, d++) {
printarcs(i, d, fp);
fprintf(fp, "static state states_%d[%d] = {\n",
i, d->d_nstates);
s = d->d_state;
for (j = 0; j < d->d_nstates; j++, s++)
fprintf(fp, " {%d, arcs_%d_%d},\n",
s->s_narcs, i, j);
fprintf(fp, "};\n");
}
}
static void
printdfas(grammar *g, FILE *fp)
{
dfa *d;
int i, j, n;
printstates(g, fp);
fprintf(fp, "static dfa dfas[%d] = {\n", g->g_ndfas);
d = g->g_dfa;
for (i = 0; i < g->g_ndfas; i++, d++) {
fprintf(fp, " {%d, \"%s\", %d, %d, states_%d,\n",
d->d_type, d->d_name, d->d_initial, d->d_nstates, i);
fprintf(fp, " \"");
n = NBYTES(g->g_ll.ll_nlabels);
for (j = 0; j < n; j++)
fprintf(fp, "\\%03o", d->d_first[j] & 0xff);
fprintf(fp, "\"},\n");
}
fprintf(fp, "};\n");
}
static void
printlabels(grammar *g, FILE *fp)
{
label *l;
int i;
fprintf(fp, "static label labels[%d] = {\n", g->g_ll.ll_nlabels);
l = g->g_ll.ll_label;
for (i = g->g_ll.ll_nlabels; --i >= 0; l++) {
if (l->lb_str == NULL)
fprintf(fp, " {%d, 0},\n", l->lb_type);
else
fprintf(fp, " {%d, \"%s\"},\n",
l->lb_type, l->lb_str);
}
fprintf(fp, "};\n");
}

2000
third_party/python/Parser/tokenizer.c vendored Normal file

File diff suppressed because it is too large Load diff

89
third_party/python/Parser/tokenizer.h vendored Normal file
View file

@ -0,0 +1,89 @@
#ifndef Py_TOKENIZER_H
#define Py_TOKENIZER_H
#ifdef __cplusplus
extern "C" {
#endif
#include "object.h"
/* Tokenizer interface */
#include "token.h" /* For token types */
#define MAXINDENT 100 /* Max indentation level */
enum decoding_state {
STATE_INIT,
STATE_RAW,
STATE_NORMAL /* have a codec associated with input */
};
/* Tokenizer state */
struct tok_state {
/* Input state; buf <= cur <= inp <= end */
/* NB an entire line is held in the buffer */
char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */
char *cur; /* Next character in buffer */
char *inp; /* End of data in buffer */
char *end; /* End of input buffer if buf != NULL */
char *start; /* Start of current token if not NULL */
int done; /* E_OK normally, E_EOF at EOF, otherwise error code */
/* NB If done != E_OK, cur must be == inp!!! */
FILE *fp; /* Rest of input; NULL if tokenizing a string */
int tabsize; /* Tab spacing */
int indent; /* Current indentation index */
int indstack[MAXINDENT]; /* Stack of indents */
int atbol; /* Nonzero if at begin of new line */
int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
const char *prompt, *nextprompt; /* For interactive prompting */
int lineno; /* Current line number */
int level; /* () [] {} Parentheses nesting level */
/* Used to allow free continuations inside them */
/* Stuff for checking on different tab sizes */
#ifndef PGEN
/* pgen doesn't have access to Python codecs, it cannot decode the input
filename. The bytes filename might be kept, but it is only used by
indenterror() and it is not really needed: pgen only compiles one file
(Grammar/Grammar). */
PyObject *filename;
#endif
int altwarning; /* Issue warning if alternate tabs don't match */
int alterror; /* Issue error if alternate tabs don't match */
int alttabsize; /* Alternate tab spacing */
int altindstack[MAXINDENT]; /* Stack of alternate indents */
/* Stuff for PEP 0263 */
enum decoding_state decoding_state;
int decoding_erred; /* whether erred in decoding */
int read_coding_spec; /* whether 'coding:...' has been read */
char *encoding; /* Source encoding. */
int cont_line; /* whether we are in a continuation line. */
const char* line_start; /* pointer to start of current line */
#ifndef PGEN
PyObject *decoding_readline; /* open(...).readline */
PyObject *decoding_buffer;
#endif
const char* enc; /* Encoding for the current str. */
const char* str;
const char* input; /* Tokenizer's newline translated copy of the string. */
/* async/await related fields; can be removed in 3.7 when async and await
become normal keywords. */
int async_def; /* =1 if tokens are inside an 'async def' body. */
int async_def_indent; /* Indentation level of the outermost 'async def'. */
int async_def_nl; /* =1 if the outermost 'async def' had at least one
NEWLINE token after it. */
};
extern struct tok_state *PyTokenizer_FromString(const char *, int);
extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
extern struct tok_state *PyTokenizer_FromFile(FILE *, const char*,
const char *, const char *);
extern void PyTokenizer_Free(struct tok_state *);
extern int PyTokenizer_Get(struct tok_state *, char **, char **);
extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
int len, int *offset);
#ifdef __cplusplus
}
#endif
#endif /* !Py_TOKENIZER_H */

View file

@ -0,0 +1,2 @@
#define PGEN
#include "tokenizer.c"