json: port fixes from mjs to python

This commit is contained in:
ochafik 2024-03-11 02:10:48 +00:00
parent 4e2d06c741
commit c8254e5f8a

View file

@ -29,6 +29,8 @@ GRAMMAR_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"]')
GRAMMAR_RANGE_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"\]\-\\]') GRAMMAR_RANGE_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"\]\-\\]')
GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']': '\\]'} GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']': '\\]'}
NON_LITERAL_SET = set('|.()[]{}*+?')
ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('{*+?')
class SchemaConverter: class SchemaConverter:
def __init__(self, *, prop_order, allow_fetch, dotall): def __init__(self, *, prop_order, allow_fetch, dotall):
@ -245,19 +247,18 @@ class SchemaConverter:
) )
else: else:
lit = '' lit = ''
while i < length and pattern[i] not in ('.', '(', ')', '|', '[', '{', '*', '+', '?') \ while i < length and pattern[i] not in NON_LITERAL_SET \
and not (i < length - 1 and pattern[i+1] in ('{', '*', '+', '?')): and not (i < length - 1 and pattern[i+1] in ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS):
c = pattern[i] if pattern[i] == '\\' and i < length - 1:
if c == '\\' and i < length - 1:
i += 1 i += 1
if c in ('.', '[', ']', '{', '}', '(', ')', '|', '*', '+', '?'): if pattern[i] in NON_LITERAL_SET:
# Escapes in regular expressions that aren't escaped in GBNF literals # Escapes in regular expressions that aren't escaped in GBNF literals
lit += c lit += pattern[i]
else: else:
lit += f'\\{c}' lit += f'\\{pattern[i]}'
i += 1 i += 1
else: else:
lit += c lit += pattern[i]
i += 1 i += 1
if lit: if lit:
seq.append((f'"{lit}"', True)) seq.append((f'"{lit}"', True))