mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-22 21:32:31 +00:00
python-3.6.zip added from Github
README.cosmo contains the necessary links.
This commit is contained in:
parent
75fc601ff5
commit
0c4c56ff39
4219 changed files with 1968626 additions and 0 deletions
312
third_party/python/Lib/idlelib/hyperparser.py
vendored
Normal file
312
third_party/python/Lib/idlelib/hyperparser.py
vendored
Normal file
|
@ -0,0 +1,312 @@
|
|||
"""Provide advanced parsing abilities for ParenMatch and other extensions.
|
||||
|
||||
HyperParser uses PyParser. PyParser mostly gives information on the
|
||||
proper indentation of code. HyperParser gives additional information on
|
||||
the structure of code.
|
||||
"""
|
||||
from keyword import iskeyword
|
||||
import string
|
||||
|
||||
from idlelib import pyparse
|
||||
|
||||
# all ASCII chars that may be in an identifier
|
||||
_ASCII_ID_CHARS = frozenset(string.ascii_letters + string.digits + "_")
|
||||
# all ASCII chars that may be the first char of an identifier
|
||||
_ASCII_ID_FIRST_CHARS = frozenset(string.ascii_letters + "_")
|
||||
|
||||
# lookup table for whether 7-bit ASCII chars are valid in a Python identifier
|
||||
_IS_ASCII_ID_CHAR = [(chr(x) in _ASCII_ID_CHARS) for x in range(128)]
|
||||
# lookup table for whether 7-bit ASCII chars are valid as the first
|
||||
# char in a Python identifier
|
||||
_IS_ASCII_ID_FIRST_CHAR = \
|
||||
[(chr(x) in _ASCII_ID_FIRST_CHARS) for x in range(128)]
|
||||
|
||||
|
||||
class HyperParser:
|
||||
def __init__(self, editwin, index):
|
||||
"To initialize, analyze the surroundings of the given index."
|
||||
|
||||
self.editwin = editwin
|
||||
self.text = text = editwin.text
|
||||
|
||||
parser = pyparse.Parser(editwin.indentwidth, editwin.tabwidth)
|
||||
|
||||
def index2line(index):
|
||||
return int(float(index))
|
||||
lno = index2line(text.index(index))
|
||||
|
||||
if not editwin.context_use_ps1:
|
||||
for context in editwin.num_context_lines:
|
||||
startat = max(lno - context, 1)
|
||||
startatindex = repr(startat) + ".0"
|
||||
stopatindex = "%d.end" % lno
|
||||
# We add the newline because PyParse requires a newline
|
||||
# at end. We add a space so that index won't be at end
|
||||
# of line, so that its status will be the same as the
|
||||
# char before it, if should.
|
||||
parser.set_code(text.get(startatindex, stopatindex)+' \n')
|
||||
bod = parser.find_good_parse_start(
|
||||
editwin._build_char_in_string_func(startatindex))
|
||||
if bod is not None or startat == 1:
|
||||
break
|
||||
parser.set_lo(bod or 0)
|
||||
else:
|
||||
r = text.tag_prevrange("console", index)
|
||||
if r:
|
||||
startatindex = r[1]
|
||||
else:
|
||||
startatindex = "1.0"
|
||||
stopatindex = "%d.end" % lno
|
||||
# We add the newline because PyParse requires it. We add a
|
||||
# space so that index won't be at end of line, so that its
|
||||
# status will be the same as the char before it, if should.
|
||||
parser.set_code(text.get(startatindex, stopatindex)+' \n')
|
||||
parser.set_lo(0)
|
||||
|
||||
# We want what the parser has, minus the last newline and space.
|
||||
self.rawtext = parser.code[:-2]
|
||||
# Parser.code apparently preserves the statement we are in, so
|
||||
# that stopatindex can be used to synchronize the string with
|
||||
# the text box indices.
|
||||
self.stopatindex = stopatindex
|
||||
self.bracketing = parser.get_last_stmt_bracketing()
|
||||
# find which pairs of bracketing are openers. These always
|
||||
# correspond to a character of rawtext.
|
||||
self.isopener = [i>0 and self.bracketing[i][1] >
|
||||
self.bracketing[i-1][1]
|
||||
for i in range(len(self.bracketing))]
|
||||
|
||||
self.set_index(index)
|
||||
|
||||
def set_index(self, index):
|
||||
"""Set the index to which the functions relate.
|
||||
|
||||
The index must be in the same statement.
|
||||
"""
|
||||
indexinrawtext = (len(self.rawtext) -
|
||||
len(self.text.get(index, self.stopatindex)))
|
||||
if indexinrawtext < 0:
|
||||
raise ValueError("Index %s precedes the analyzed statement"
|
||||
% index)
|
||||
self.indexinrawtext = indexinrawtext
|
||||
# find the rightmost bracket to which index belongs
|
||||
self.indexbracket = 0
|
||||
while (self.indexbracket < len(self.bracketing)-1 and
|
||||
self.bracketing[self.indexbracket+1][0] < self.indexinrawtext):
|
||||
self.indexbracket += 1
|
||||
if (self.indexbracket < len(self.bracketing)-1 and
|
||||
self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and
|
||||
not self.isopener[self.indexbracket+1]):
|
||||
self.indexbracket += 1
|
||||
|
||||
def is_in_string(self):
|
||||
"""Is the index given to the HyperParser in a string?"""
|
||||
# The bracket to which we belong should be an opener.
|
||||
# If it's an opener, it has to have a character.
|
||||
return (self.isopener[self.indexbracket] and
|
||||
self.rawtext[self.bracketing[self.indexbracket][0]]
|
||||
in ('"', "'"))
|
||||
|
||||
def is_in_code(self):
|
||||
"""Is the index given to the HyperParser in normal code?"""
|
||||
return (not self.isopener[self.indexbracket] or
|
||||
self.rawtext[self.bracketing[self.indexbracket][0]]
|
||||
not in ('#', '"', "'"))
|
||||
|
||||
def get_surrounding_brackets(self, openers='([{', mustclose=False):
|
||||
"""Return bracket indexes or None.
|
||||
|
||||
If the index given to the HyperParser is surrounded by a
|
||||
bracket defined in openers (or at least has one before it),
|
||||
return the indices of the opening bracket and the closing
|
||||
bracket (or the end of line, whichever comes first).
|
||||
|
||||
If it is not surrounded by brackets, or the end of line comes
|
||||
before the closing bracket and mustclose is True, returns None.
|
||||
"""
|
||||
|
||||
bracketinglevel = self.bracketing[self.indexbracket][1]
|
||||
before = self.indexbracket
|
||||
while (not self.isopener[before] or
|
||||
self.rawtext[self.bracketing[before][0]] not in openers or
|
||||
self.bracketing[before][1] > bracketinglevel):
|
||||
before -= 1
|
||||
if before < 0:
|
||||
return None
|
||||
bracketinglevel = min(bracketinglevel, self.bracketing[before][1])
|
||||
after = self.indexbracket + 1
|
||||
while (after < len(self.bracketing) and
|
||||
self.bracketing[after][1] >= bracketinglevel):
|
||||
after += 1
|
||||
|
||||
beforeindex = self.text.index("%s-%dc" %
|
||||
(self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))
|
||||
if (after >= len(self.bracketing) or
|
||||
self.bracketing[after][0] > len(self.rawtext)):
|
||||
if mustclose:
|
||||
return None
|
||||
afterindex = self.stopatindex
|
||||
else:
|
||||
# We are after a real char, so it is a ')' and we give the
|
||||
# index before it.
|
||||
afterindex = self.text.index(
|
||||
"%s-%dc" % (self.stopatindex,
|
||||
len(self.rawtext)-(self.bracketing[after][0]-1)))
|
||||
|
||||
return beforeindex, afterindex
|
||||
|
||||
# the set of built-in identifiers which are also keywords,
|
||||
# i.e. keyword.iskeyword() returns True for them
|
||||
_ID_KEYWORDS = frozenset({"True", "False", "None"})
|
||||
|
||||
@classmethod
|
||||
def _eat_identifier(cls, str, limit, pos):
|
||||
"""Given a string and pos, return the number of chars in the
|
||||
identifier which ends at pos, or 0 if there is no such one.
|
||||
|
||||
This ignores non-identifier eywords are not identifiers.
|
||||
"""
|
||||
is_ascii_id_char = _IS_ASCII_ID_CHAR
|
||||
|
||||
# Start at the end (pos) and work backwards.
|
||||
i = pos
|
||||
|
||||
# Go backwards as long as the characters are valid ASCII
|
||||
# identifier characters. This is an optimization, since it
|
||||
# is faster in the common case where most of the characters
|
||||
# are ASCII.
|
||||
while i > limit and (
|
||||
ord(str[i - 1]) < 128 and
|
||||
is_ascii_id_char[ord(str[i - 1])]
|
||||
):
|
||||
i -= 1
|
||||
|
||||
# If the above loop ended due to reaching a non-ASCII
|
||||
# character, continue going backwards using the most generic
|
||||
# test for whether a string contains only valid identifier
|
||||
# characters.
|
||||
if i > limit and ord(str[i - 1]) >= 128:
|
||||
while i - 4 >= limit and ('a' + str[i - 4:pos]).isidentifier():
|
||||
i -= 4
|
||||
if i - 2 >= limit and ('a' + str[i - 2:pos]).isidentifier():
|
||||
i -= 2
|
||||
if i - 1 >= limit and ('a' + str[i - 1:pos]).isidentifier():
|
||||
i -= 1
|
||||
|
||||
# The identifier candidate starts here. If it isn't a valid
|
||||
# identifier, don't eat anything. At this point that is only
|
||||
# possible if the first character isn't a valid first
|
||||
# character for an identifier.
|
||||
if not str[i:pos].isidentifier():
|
||||
return 0
|
||||
elif i < pos:
|
||||
# All characters in str[i:pos] are valid ASCII identifier
|
||||
# characters, so it is enough to check that the first is
|
||||
# valid as the first character of an identifier.
|
||||
if not _IS_ASCII_ID_FIRST_CHAR[ord(str[i])]:
|
||||
return 0
|
||||
|
||||
# All keywords are valid identifiers, but should not be
|
||||
# considered identifiers here, except for True, False and None.
|
||||
if i < pos and (
|
||||
iskeyword(str[i:pos]) and
|
||||
str[i:pos] not in cls._ID_KEYWORDS
|
||||
):
|
||||
return 0
|
||||
|
||||
return pos - i
|
||||
|
||||
# This string includes all chars that may be in a white space
|
||||
_whitespace_chars = " \t\n\\"
|
||||
|
||||
def get_expression(self):
|
||||
"""Return a string with the Python expression which ends at the
|
||||
given index, which is empty if there is no real one.
|
||||
"""
|
||||
if not self.is_in_code():
|
||||
raise ValueError("get_expression should only be called "
|
||||
"if index is inside a code.")
|
||||
|
||||
rawtext = self.rawtext
|
||||
bracketing = self.bracketing
|
||||
|
||||
brck_index = self.indexbracket
|
||||
brck_limit = bracketing[brck_index][0]
|
||||
pos = self.indexinrawtext
|
||||
|
||||
last_identifier_pos = pos
|
||||
postdot_phase = True
|
||||
|
||||
while 1:
|
||||
# Eat whitespaces, comments, and if postdot_phase is False - a dot
|
||||
while 1:
|
||||
if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars:
|
||||
# Eat a whitespace
|
||||
pos -= 1
|
||||
elif (not postdot_phase and
|
||||
pos > brck_limit and rawtext[pos-1] == '.'):
|
||||
# Eat a dot
|
||||
pos -= 1
|
||||
postdot_phase = True
|
||||
# The next line will fail if we are *inside* a comment,
|
||||
# but we shouldn't be.
|
||||
elif (pos == brck_limit and brck_index > 0 and
|
||||
rawtext[bracketing[brck_index-1][0]] == '#'):
|
||||
# Eat a comment
|
||||
brck_index -= 2
|
||||
brck_limit = bracketing[brck_index][0]
|
||||
pos = bracketing[brck_index+1][0]
|
||||
else:
|
||||
# If we didn't eat anything, quit.
|
||||
break
|
||||
|
||||
if not postdot_phase:
|
||||
# We didn't find a dot, so the expression end at the
|
||||
# last identifier pos.
|
||||
break
|
||||
|
||||
ret = self._eat_identifier(rawtext, brck_limit, pos)
|
||||
if ret:
|
||||
# There is an identifier to eat
|
||||
pos = pos - ret
|
||||
last_identifier_pos = pos
|
||||
# Now, to continue the search, we must find a dot.
|
||||
postdot_phase = False
|
||||
# (the loop continues now)
|
||||
|
||||
elif pos == brck_limit:
|
||||
# We are at a bracketing limit. If it is a closing
|
||||
# bracket, eat the bracket, otherwise, stop the search.
|
||||
level = bracketing[brck_index][1]
|
||||
while brck_index > 0 and bracketing[brck_index-1][1] > level:
|
||||
brck_index -= 1
|
||||
if bracketing[brck_index][0] == brck_limit:
|
||||
# We were not at the end of a closing bracket
|
||||
break
|
||||
pos = bracketing[brck_index][0]
|
||||
brck_index -= 1
|
||||
brck_limit = bracketing[brck_index][0]
|
||||
last_identifier_pos = pos
|
||||
if rawtext[pos] in "([":
|
||||
# [] and () may be used after an identifier, so we
|
||||
# continue. postdot_phase is True, so we don't allow a dot.
|
||||
pass
|
||||
else:
|
||||
# We can't continue after other types of brackets
|
||||
if rawtext[pos] in "'\"":
|
||||
# Scan a string prefix
|
||||
while pos > 0 and rawtext[pos - 1] in "rRbBuU":
|
||||
pos -= 1
|
||||
last_identifier_pos = pos
|
||||
break
|
||||
|
||||
else:
|
||||
# We've found an operator or something.
|
||||
break
|
||||
|
||||
return rawtext[last_identifier_pos:self.indexinrawtext]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from unittest import main
|
||||
main('idlelib.idle_test.test_hyperparser', verbosity=2)
|
Loading…
Add table
Add a link
Reference in a new issue