json
: support integer minimum, maximum, exclusiveMinimum, exclusiveMaximum (#7797)
* json: support minimum for positive integer values * json: fix min 0 * json: min + max integer constraints * json: handle negative min / max integer bounds * json: fix missing paren min/max bug * json: proper paren fix * json: integration test for schemas * json: fix bounds tests * Update json-schema-to-grammar.cpp * json: fix negative max * json: fix negative min (w/ more than 1 digit) * Update test-grammar-integration.cpp * json: nit: move string rules together * json: port min/max integer support to Python & JS * nit: move + rename _build_min_max_int * fix min in [1, 9] * Update test-grammar-integration.cpp * add C++11-compatible replacement for std::string_view * add min/max constrained int field to pydantic json schema example * fix merge * json: add integration tests for min/max bounds * reshuffle/merge min/max integ test cases * nits / cleanups * defensive code against string out of bounds (apparently different behaviour of libstdc++ vs. clang's libc++, can't read final NULL char w/ former)
This commit is contained in:
parent
dd047b476c
commit
84631fe150
6 changed files with 1150 additions and 3 deletions
|
@ -4,7 +4,7 @@ import itertools
|
|||
import json
|
||||
import re
|
||||
import sys
|
||||
from typing import Any, Dict, List, Set, Tuple, Union
|
||||
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
|
||||
|
||||
|
||||
def _build_repetition(item_rule, min_items, max_items, separator_rule=None):
|
||||
|
@ -23,6 +23,170 @@ def _build_repetition(item_rule, min_items, max_items, separator_rule=None):
|
|||
result = item_rule + ' ' + _build_repetition(f'({separator_rule} {item_rule})', min_items - 1 if min_items > 0 else 0, max_items - 1 if max_items is not None else None)
|
||||
return f'({result})?' if min_items == 0 else result
|
||||
|
||||
def _generate_min_max_int(min_value: Optional[int], max_value: Optional[int], out: list, decimals_left: int = 16, top_level: bool = True):
|
||||
has_min = min_value != None
|
||||
has_max = max_value != None
|
||||
|
||||
def digit_range(from_char: str, to_char: str):
|
||||
out.append("[")
|
||||
if from_char == to_char:
|
||||
out.append(from_char)
|
||||
else:
|
||||
out.append(from_char)
|
||||
out.append("-")
|
||||
out.append(to_char)
|
||||
out.append("]")
|
||||
|
||||
def more_digits(min_digits: int, max_digits: int):
|
||||
out.append("[0-9]")
|
||||
if min_digits == max_digits and min_digits == 1:
|
||||
return
|
||||
out.append("{")
|
||||
out.append(str(min_digits))
|
||||
if max_digits != min_digits:
|
||||
out.append(",")
|
||||
if max_digits != sys.maxsize:
|
||||
out.append(str(max_digits))
|
||||
out.append("}")
|
||||
|
||||
def uniform_range(from_str: str, to_str: str):
|
||||
i = 0
|
||||
while i < len(from_str) and from_str[i] == to_str[i]:
|
||||
i += 1
|
||||
if i > 0:
|
||||
out.append("\"")
|
||||
out.append(from_str[:i])
|
||||
out.append("\"")
|
||||
if i < len(from_str):
|
||||
if i > 0:
|
||||
out.append(" ")
|
||||
sub_len = len(from_str) - i - 1
|
||||
if sub_len > 0:
|
||||
from_sub = from_str[i+1:]
|
||||
to_sub = to_str[i+1:]
|
||||
sub_zeros = "0" * sub_len
|
||||
sub_nines = "9" * sub_len
|
||||
|
||||
to_reached = False
|
||||
out.append("(")
|
||||
if from_sub == sub_zeros:
|
||||
digit_range(from_str[i], chr(ord(to_str[i]) - 1))
|
||||
out.append(" ")
|
||||
more_digits(sub_len, sub_len)
|
||||
else:
|
||||
out.append("[")
|
||||
out.append(from_str[i])
|
||||
out.append("] ")
|
||||
out.append("(")
|
||||
uniform_range(from_sub, sub_nines)
|
||||
out.append(")")
|
||||
if ord(from_str[i]) < ord(to_str[i]) - 1:
|
||||
out.append(" | ")
|
||||
if to_sub == sub_nines:
|
||||
digit_range(chr(ord(from_str[i]) + 1), to_str[i])
|
||||
to_reached = True
|
||||
else:
|
||||
digit_range(chr(ord(from_str[i]) + 1), chr(ord(to_str[i]) - 1))
|
||||
out.append(" ")
|
||||
more_digits(sub_len, sub_len)
|
||||
if not to_reached:
|
||||
out.append(" | ")
|
||||
digit_range(to_str[i], to_str[i])
|
||||
out.append(" ")
|
||||
uniform_range(sub_zeros, to_sub)
|
||||
out.append(")")
|
||||
else:
|
||||
out.append("[")
|
||||
out.append(from_str[i])
|
||||
out.append("-")
|
||||
out.append(to_str[i])
|
||||
out.append("]")
|
||||
|
||||
if has_min and has_max:
|
||||
if min_value < 0 and max_value < 0:
|
||||
out.append("\"-\" (")
|
||||
_generate_min_max_int(-max_value, -min_value, out, decimals_left, top_level=True)
|
||||
out.append(")")
|
||||
return
|
||||
|
||||
if min_value < 0:
|
||||
out.append("\"-\" (")
|
||||
_generate_min_max_int(0, -min_value, out, decimals_left, top_level=True)
|
||||
out.append(") | ")
|
||||
min_value = 0
|
||||
|
||||
min_s = str(min_value)
|
||||
max_s = str(max_value)
|
||||
min_digits = len(min_s)
|
||||
max_digits = len(max_s)
|
||||
|
||||
for digits in range(min_digits, max_digits):
|
||||
uniform_range(min_s, "9" * digits)
|
||||
min_s = "1" + "0" * digits
|
||||
out.append(" | ")
|
||||
uniform_range(min_s, max_s)
|
||||
return
|
||||
|
||||
less_decimals = max(decimals_left - 1, 1)
|
||||
|
||||
if has_min:
|
||||
if min_value < 0:
|
||||
out.append("\"-\" (")
|
||||
_generate_min_max_int(None, -min_value, out, decimals_left, top_level=False)
|
||||
out.append(") | [0] | [1-9] ")
|
||||
more_digits(0, decimals_left - 1)
|
||||
elif min_value == 0:
|
||||
if top_level:
|
||||
out.append("[0] | [1-9] ")
|
||||
more_digits(0, less_decimals)
|
||||
else:
|
||||
more_digits(1, decimals_left)
|
||||
elif min_value <= 9:
|
||||
c = str(min_value)
|
||||
range_start = '1' if top_level else '0'
|
||||
if c > range_start:
|
||||
digit_range(range_start, chr(ord(c) - 1))
|
||||
out.append(" ")
|
||||
more_digits(1, less_decimals)
|
||||
out.append(" | ")
|
||||
digit_range(c, "9")
|
||||
out.append(" ")
|
||||
more_digits(0, less_decimals)
|
||||
else:
|
||||
min_s = str(min_value)
|
||||
length = len(min_s)
|
||||
c = min_s[0]
|
||||
|
||||
if c > "1":
|
||||
digit_range("1" if top_level else "0", chr(ord(c) - 1))
|
||||
out.append(" ")
|
||||
more_digits(length, less_decimals)
|
||||
out.append(" | ")
|
||||
digit_range(c, c)
|
||||
out.append(" (")
|
||||
_generate_min_max_int(int(min_s[1:]), None, out, less_decimals, top_level=False)
|
||||
out.append(")")
|
||||
if c < "9":
|
||||
out.append(" | ")
|
||||
digit_range(chr(ord(c) + 1), "9")
|
||||
out.append(" ")
|
||||
more_digits(length - 1, less_decimals)
|
||||
return
|
||||
|
||||
if has_max:
|
||||
if max_value >= 0:
|
||||
if top_level:
|
||||
out.append("\"-\" [1-9] ")
|
||||
more_digits(0, less_decimals)
|
||||
out.append(" | ")
|
||||
_generate_min_max_int(0, max_value, out, decimals_left, top_level=True)
|
||||
else:
|
||||
out.append("\"-\" (")
|
||||
_generate_min_max_int(-max_value, None, out, decimals_left, top_level=False)
|
||||
out.append(")")
|
||||
return
|
||||
|
||||
raise RuntimeError("At least one of min_value or max_value must be set")
|
||||
|
||||
class BuiltinRule:
|
||||
def __init__(self, content: str, deps: list = None):
|
||||
|
@ -432,6 +596,24 @@ class SchemaConverter:
|
|||
|
||||
return self._add_rule(rule_name, r'"\"" ' + _build_repetition(char_rule, min_len, max_len) + r' "\"" space')
|
||||
|
||||
elif schema_type in (None, 'integer') and \
|
||||
('minimum' in schema or 'exclusiveMinimum' in schema or 'maximum' in schema or 'exclusiveMaximum' in schema):
|
||||
min_value = None
|
||||
max_value = None
|
||||
if 'minimum' in schema:
|
||||
min_value = schema['minimum']
|
||||
elif 'exclusiveMinimum' in schema:
|
||||
min_value = schema['exclusiveMinimum'] + 1
|
||||
if 'maximum' in schema:
|
||||
max_value = schema['maximum']
|
||||
elif 'exclusiveMaximum' in schema:
|
||||
max_value = schema['exclusiveMaximum'] - 1
|
||||
|
||||
out = ["("]
|
||||
_generate_min_max_int(min_value, max_value, out)
|
||||
out.append(") space")
|
||||
return self._add_rule(rule_name, ''.join(out))
|
||||
|
||||
elif (schema_type == 'object') or (len(schema) == 0):
|
||||
return self._add_rule(rule_name, self._add_primitive('object', PRIMITIVE_RULES['object']))
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue