mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-22 21:32:31 +00:00
python-3.6.zip added from Github
README.cosmo contains the necessary links.
This commit is contained in:
parent
75fc601ff5
commit
0c4c56ff39
4219 changed files with 1968626 additions and 0 deletions
281
third_party/python/Lib/lib2to3/btm_utils.py
vendored
Normal file
281
third_party/python/Lib/lib2to3/btm_utils.py
vendored
Normal file
|
@ -0,0 +1,281 @@
|
|||
"Utility functions used by the btm_matcher module"
|
||||
|
||||
from . import pytree
|
||||
from .pgen2 import grammar, token
|
||||
from .pygram import pattern_symbols, python_symbols
|
||||
|
||||
syms = pattern_symbols
|
||||
pysyms = python_symbols
|
||||
tokens = grammar.opmap
|
||||
token_labels = token
|
||||
|
||||
TYPE_ANY = -1
|
||||
TYPE_ALTERNATIVES = -2
|
||||
TYPE_GROUP = -3
|
||||
|
||||
class MinNode(object):
|
||||
"""This class serves as an intermediate representation of the
|
||||
pattern tree during the conversion to sets of leaf-to-root
|
||||
subpatterns"""
|
||||
|
||||
def __init__(self, type=None, name=None):
|
||||
self.type = type
|
||||
self.name = name
|
||||
self.children = []
|
||||
self.leaf = False
|
||||
self.parent = None
|
||||
self.alternatives = []
|
||||
self.group = []
|
||||
|
||||
def __repr__(self):
|
||||
return str(self.type) + ' ' + str(self.name)
|
||||
|
||||
def leaf_to_root(self):
|
||||
"""Internal method. Returns a characteristic path of the
|
||||
pattern tree. This method must be run for all leaves until the
|
||||
linear subpatterns are merged into a single"""
|
||||
node = self
|
||||
subp = []
|
||||
while node:
|
||||
if node.type == TYPE_ALTERNATIVES:
|
||||
node.alternatives.append(subp)
|
||||
if len(node.alternatives) == len(node.children):
|
||||
#last alternative
|
||||
subp = [tuple(node.alternatives)]
|
||||
node.alternatives = []
|
||||
node = node.parent
|
||||
continue
|
||||
else:
|
||||
node = node.parent
|
||||
subp = None
|
||||
break
|
||||
|
||||
if node.type == TYPE_GROUP:
|
||||
node.group.append(subp)
|
||||
#probably should check the number of leaves
|
||||
if len(node.group) == len(node.children):
|
||||
subp = get_characteristic_subpattern(node.group)
|
||||
node.group = []
|
||||
node = node.parent
|
||||
continue
|
||||
else:
|
||||
node = node.parent
|
||||
subp = None
|
||||
break
|
||||
|
||||
if node.type == token_labels.NAME and node.name:
|
||||
#in case of type=name, use the name instead
|
||||
subp.append(node.name)
|
||||
else:
|
||||
subp.append(node.type)
|
||||
|
||||
node = node.parent
|
||||
return subp
|
||||
|
||||
def get_linear_subpattern(self):
|
||||
"""Drives the leaf_to_root method. The reason that
|
||||
leaf_to_root must be run multiple times is because we need to
|
||||
reject 'group' matches; for example the alternative form
|
||||
(a | b c) creates a group [b c] that needs to be matched. Since
|
||||
matching multiple linear patterns overcomes the automaton's
|
||||
capabilities, leaf_to_root merges each group into a single
|
||||
choice based on 'characteristic'ity,
|
||||
|
||||
i.e. (a|b c) -> (a|b) if b more characteristic than c
|
||||
|
||||
Returns: The most 'characteristic'(as defined by
|
||||
get_characteristic_subpattern) path for the compiled pattern
|
||||
tree.
|
||||
"""
|
||||
|
||||
for l in self.leaves():
|
||||
subp = l.leaf_to_root()
|
||||
if subp:
|
||||
return subp
|
||||
|
||||
def leaves(self):
|
||||
"Generator that returns the leaves of the tree"
|
||||
for child in self.children:
|
||||
yield from child.leaves()
|
||||
if not self.children:
|
||||
yield self
|
||||
|
||||
def reduce_tree(node, parent=None):
|
||||
"""
|
||||
Internal function. Reduces a compiled pattern tree to an
|
||||
intermediate representation suitable for feeding the
|
||||
automaton. This also trims off any optional pattern elements(like
|
||||
[a], a*).
|
||||
"""
|
||||
|
||||
new_node = None
|
||||
#switch on the node type
|
||||
if node.type == syms.Matcher:
|
||||
#skip
|
||||
node = node.children[0]
|
||||
|
||||
if node.type == syms.Alternatives :
|
||||
#2 cases
|
||||
if len(node.children) <= 2:
|
||||
#just a single 'Alternative', skip this node
|
||||
new_node = reduce_tree(node.children[0], parent)
|
||||
else:
|
||||
#real alternatives
|
||||
new_node = MinNode(type=TYPE_ALTERNATIVES)
|
||||
#skip odd children('|' tokens)
|
||||
for child in node.children:
|
||||
if node.children.index(child)%2:
|
||||
continue
|
||||
reduced = reduce_tree(child, new_node)
|
||||
if reduced is not None:
|
||||
new_node.children.append(reduced)
|
||||
elif node.type == syms.Alternative:
|
||||
if len(node.children) > 1:
|
||||
|
||||
new_node = MinNode(type=TYPE_GROUP)
|
||||
for child in node.children:
|
||||
reduced = reduce_tree(child, new_node)
|
||||
if reduced:
|
||||
new_node.children.append(reduced)
|
||||
if not new_node.children:
|
||||
# delete the group if all of the children were reduced to None
|
||||
new_node = None
|
||||
|
||||
else:
|
||||
new_node = reduce_tree(node.children[0], parent)
|
||||
|
||||
elif node.type == syms.Unit:
|
||||
if (isinstance(node.children[0], pytree.Leaf) and
|
||||
node.children[0].value == '('):
|
||||
#skip parentheses
|
||||
return reduce_tree(node.children[1], parent)
|
||||
if ((isinstance(node.children[0], pytree.Leaf) and
|
||||
node.children[0].value == '[')
|
||||
or
|
||||
(len(node.children)>1 and
|
||||
hasattr(node.children[1], "value") and
|
||||
node.children[1].value == '[')):
|
||||
#skip whole unit if its optional
|
||||
return None
|
||||
|
||||
leaf = True
|
||||
details_node = None
|
||||
alternatives_node = None
|
||||
has_repeater = False
|
||||
repeater_node = None
|
||||
has_variable_name = False
|
||||
|
||||
for child in node.children:
|
||||
if child.type == syms.Details:
|
||||
leaf = False
|
||||
details_node = child
|
||||
elif child.type == syms.Repeater:
|
||||
has_repeater = True
|
||||
repeater_node = child
|
||||
elif child.type == syms.Alternatives:
|
||||
alternatives_node = child
|
||||
if hasattr(child, 'value') and child.value == '=': # variable name
|
||||
has_variable_name = True
|
||||
|
||||
#skip variable name
|
||||
if has_variable_name:
|
||||
#skip variable name, '='
|
||||
name_leaf = node.children[2]
|
||||
if hasattr(name_leaf, 'value') and name_leaf.value == '(':
|
||||
# skip parenthesis
|
||||
name_leaf = node.children[3]
|
||||
else:
|
||||
name_leaf = node.children[0]
|
||||
|
||||
#set node type
|
||||
if name_leaf.type == token_labels.NAME:
|
||||
#(python) non-name or wildcard
|
||||
if name_leaf.value == 'any':
|
||||
new_node = MinNode(type=TYPE_ANY)
|
||||
else:
|
||||
if hasattr(token_labels, name_leaf.value):
|
||||
new_node = MinNode(type=getattr(token_labels, name_leaf.value))
|
||||
else:
|
||||
new_node = MinNode(type=getattr(pysyms, name_leaf.value))
|
||||
|
||||
elif name_leaf.type == token_labels.STRING:
|
||||
#(python) name or character; remove the apostrophes from
|
||||
#the string value
|
||||
name = name_leaf.value.strip("'")
|
||||
if name in tokens:
|
||||
new_node = MinNode(type=tokens[name])
|
||||
else:
|
||||
new_node = MinNode(type=token_labels.NAME, name=name)
|
||||
elif name_leaf.type == syms.Alternatives:
|
||||
new_node = reduce_tree(alternatives_node, parent)
|
||||
|
||||
#handle repeaters
|
||||
if has_repeater:
|
||||
if repeater_node.children[0].value == '*':
|
||||
#reduce to None
|
||||
new_node = None
|
||||
elif repeater_node.children[0].value == '+':
|
||||
#reduce to a single occurrence i.e. do nothing
|
||||
pass
|
||||
else:
|
||||
#TODO: handle {min, max} repeaters
|
||||
raise NotImplementedError
|
||||
pass
|
||||
|
||||
#add children
|
||||
if details_node and new_node is not None:
|
||||
for child in details_node.children[1:-1]:
|
||||
#skip '<', '>' markers
|
||||
reduced = reduce_tree(child, new_node)
|
||||
if reduced is not None:
|
||||
new_node.children.append(reduced)
|
||||
if new_node:
|
||||
new_node.parent = parent
|
||||
return new_node
|
||||
|
||||
|
||||
def get_characteristic_subpattern(subpatterns):
|
||||
"""Picks the most characteristic from a list of linear patterns
|
||||
Current order used is:
|
||||
names > common_names > common_chars
|
||||
"""
|
||||
if not isinstance(subpatterns, list):
|
||||
return subpatterns
|
||||
if len(subpatterns)==1:
|
||||
return subpatterns[0]
|
||||
|
||||
# first pick out the ones containing variable names
|
||||
subpatterns_with_names = []
|
||||
subpatterns_with_common_names = []
|
||||
common_names = ['in', 'for', 'if' , 'not', 'None']
|
||||
subpatterns_with_common_chars = []
|
||||
common_chars = "[]().,:"
|
||||
for subpattern in subpatterns:
|
||||
if any(rec_test(subpattern, lambda x: type(x) is str)):
|
||||
if any(rec_test(subpattern,
|
||||
lambda x: isinstance(x, str) and x in common_chars)):
|
||||
subpatterns_with_common_chars.append(subpattern)
|
||||
elif any(rec_test(subpattern,
|
||||
lambda x: isinstance(x, str) and x in common_names)):
|
||||
subpatterns_with_common_names.append(subpattern)
|
||||
|
||||
else:
|
||||
subpatterns_with_names.append(subpattern)
|
||||
|
||||
if subpatterns_with_names:
|
||||
subpatterns = subpatterns_with_names
|
||||
elif subpatterns_with_common_names:
|
||||
subpatterns = subpatterns_with_common_names
|
||||
elif subpatterns_with_common_chars:
|
||||
subpatterns = subpatterns_with_common_chars
|
||||
# of the remaining subpatterns pick out the longest one
|
||||
return max(subpatterns, key=len)
|
||||
|
||||
def rec_test(sequence, test_func):
|
||||
"""Tests test_func on all items of sequence and items of included
|
||||
sub-iterables"""
|
||||
for x in sequence:
|
||||
if isinstance(x, (list, tuple)):
|
||||
yield from rec_test(x, test_func)
|
||||
else:
|
||||
yield test_func(x)
|
Loading…
Add table
Add a link
Reference in a new issue