json
: support integer minimum, maximum, exclusiveMinimum, exclusiveMaximum (#7797)
* json: support minimum for positive integer values * json: fix min 0 * json: min + max integer constraints * json: handle negative min / max integer bounds * json: fix missing paren min/max bug * json: proper paren fix * json: integration test for schemas * json: fix bounds tests * Update json-schema-to-grammar.cpp * json: fix negative max * json: fix negative min (w/ more than 1 digit) * Update test-grammar-integration.cpp * json: nit: move string rules together * json: port min/max integer support to Python & JS * nit: move + rename _build_min_max_int * fix min in [1, 9] * Update test-grammar-integration.cpp * add C++11-compatible replacement for std::string_view * add min/max constrained int field to pydantic json schema example * fix merge * json: add integration tests for min/max bounds * reshuffle/merge min/max integ test cases * nits / cleanups * defensive code against string out of bounds (apparently different behaviour of libstdc++ vs. clang's libc++, can't read final NULL char w/ former)
This commit is contained in:
parent
dd047b476c
commit
84631fe150
6 changed files with 1150 additions and 3 deletions
|
@ -53,6 +53,7 @@ if __name__ == '__main__':
|
|||
question: str
|
||||
concise_answer: str
|
||||
justification: str
|
||||
stars: Annotated[int, Field(ge=1, le=5)]
|
||||
|
||||
class PyramidalSummary(BaseModel):
|
||||
title: str
|
||||
|
|
|
@ -4,7 +4,7 @@ import itertools
|
|||
import json
|
||||
import re
|
||||
import sys
|
||||
from typing import Any, Dict, List, Set, Tuple, Union
|
||||
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
|
||||
|
||||
|
||||
def _build_repetition(item_rule, min_items, max_items, separator_rule=None):
|
||||
|
@ -23,6 +23,170 @@ def _build_repetition(item_rule, min_items, max_items, separator_rule=None):
|
|||
result = item_rule + ' ' + _build_repetition(f'({separator_rule} {item_rule})', min_items - 1 if min_items > 0 else 0, max_items - 1 if max_items is not None else None)
|
||||
return f'({result})?' if min_items == 0 else result
|
||||
|
||||
def _generate_min_max_int(min_value: Optional[int], max_value: Optional[int], out: list, decimals_left: int = 16, top_level: bool = True):
|
||||
has_min = min_value != None
|
||||
has_max = max_value != None
|
||||
|
||||
def digit_range(from_char: str, to_char: str):
|
||||
out.append("[")
|
||||
if from_char == to_char:
|
||||
out.append(from_char)
|
||||
else:
|
||||
out.append(from_char)
|
||||
out.append("-")
|
||||
out.append(to_char)
|
||||
out.append("]")
|
||||
|
||||
def more_digits(min_digits: int, max_digits: int):
|
||||
out.append("[0-9]")
|
||||
if min_digits == max_digits and min_digits == 1:
|
||||
return
|
||||
out.append("{")
|
||||
out.append(str(min_digits))
|
||||
if max_digits != min_digits:
|
||||
out.append(",")
|
||||
if max_digits != sys.maxsize:
|
||||
out.append(str(max_digits))
|
||||
out.append("}")
|
||||
|
||||
def uniform_range(from_str: str, to_str: str):
|
||||
i = 0
|
||||
while i < len(from_str) and from_str[i] == to_str[i]:
|
||||
i += 1
|
||||
if i > 0:
|
||||
out.append("\"")
|
||||
out.append(from_str[:i])
|
||||
out.append("\"")
|
||||
if i < len(from_str):
|
||||
if i > 0:
|
||||
out.append(" ")
|
||||
sub_len = len(from_str) - i - 1
|
||||
if sub_len > 0:
|
||||
from_sub = from_str[i+1:]
|
||||
to_sub = to_str[i+1:]
|
||||
sub_zeros = "0" * sub_len
|
||||
sub_nines = "9" * sub_len
|
||||
|
||||
to_reached = False
|
||||
out.append("(")
|
||||
if from_sub == sub_zeros:
|
||||
digit_range(from_str[i], chr(ord(to_str[i]) - 1))
|
||||
out.append(" ")
|
||||
more_digits(sub_len, sub_len)
|
||||
else:
|
||||
out.append("[")
|
||||
out.append(from_str[i])
|
||||
out.append("] ")
|
||||
out.append("(")
|
||||
uniform_range(from_sub, sub_nines)
|
||||
out.append(")")
|
||||
if ord(from_str[i]) < ord(to_str[i]) - 1:
|
||||
out.append(" | ")
|
||||
if to_sub == sub_nines:
|
||||
digit_range(chr(ord(from_str[i]) + 1), to_str[i])
|
||||
to_reached = True
|
||||
else:
|
||||
digit_range(chr(ord(from_str[i]) + 1), chr(ord(to_str[i]) - 1))
|
||||
out.append(" ")
|
||||
more_digits(sub_len, sub_len)
|
||||
if not to_reached:
|
||||
out.append(" | ")
|
||||
digit_range(to_str[i], to_str[i])
|
||||
out.append(" ")
|
||||
uniform_range(sub_zeros, to_sub)
|
||||
out.append(")")
|
||||
else:
|
||||
out.append("[")
|
||||
out.append(from_str[i])
|
||||
out.append("-")
|
||||
out.append(to_str[i])
|
||||
out.append("]")
|
||||
|
||||
if has_min and has_max:
|
||||
if min_value < 0 and max_value < 0:
|
||||
out.append("\"-\" (")
|
||||
_generate_min_max_int(-max_value, -min_value, out, decimals_left, top_level=True)
|
||||
out.append(")")
|
||||
return
|
||||
|
||||
if min_value < 0:
|
||||
out.append("\"-\" (")
|
||||
_generate_min_max_int(0, -min_value, out, decimals_left, top_level=True)
|
||||
out.append(") | ")
|
||||
min_value = 0
|
||||
|
||||
min_s = str(min_value)
|
||||
max_s = str(max_value)
|
||||
min_digits = len(min_s)
|
||||
max_digits = len(max_s)
|
||||
|
||||
for digits in range(min_digits, max_digits):
|
||||
uniform_range(min_s, "9" * digits)
|
||||
min_s = "1" + "0" * digits
|
||||
out.append(" | ")
|
||||
uniform_range(min_s, max_s)
|
||||
return
|
||||
|
||||
less_decimals = max(decimals_left - 1, 1)
|
||||
|
||||
if has_min:
|
||||
if min_value < 0:
|
||||
out.append("\"-\" (")
|
||||
_generate_min_max_int(None, -min_value, out, decimals_left, top_level=False)
|
||||
out.append(") | [0] | [1-9] ")
|
||||
more_digits(0, decimals_left - 1)
|
||||
elif min_value == 0:
|
||||
if top_level:
|
||||
out.append("[0] | [1-9] ")
|
||||
more_digits(0, less_decimals)
|
||||
else:
|
||||
more_digits(1, decimals_left)
|
||||
elif min_value <= 9:
|
||||
c = str(min_value)
|
||||
range_start = '1' if top_level else '0'
|
||||
if c > range_start:
|
||||
digit_range(range_start, chr(ord(c) - 1))
|
||||
out.append(" ")
|
||||
more_digits(1, less_decimals)
|
||||
out.append(" | ")
|
||||
digit_range(c, "9")
|
||||
out.append(" ")
|
||||
more_digits(0, less_decimals)
|
||||
else:
|
||||
min_s = str(min_value)
|
||||
length = len(min_s)
|
||||
c = min_s[0]
|
||||
|
||||
if c > "1":
|
||||
digit_range("1" if top_level else "0", chr(ord(c) - 1))
|
||||
out.append(" ")
|
||||
more_digits(length, less_decimals)
|
||||
out.append(" | ")
|
||||
digit_range(c, c)
|
||||
out.append(" (")
|
||||
_generate_min_max_int(int(min_s[1:]), None, out, less_decimals, top_level=False)
|
||||
out.append(")")
|
||||
if c < "9":
|
||||
out.append(" | ")
|
||||
digit_range(chr(ord(c) + 1), "9")
|
||||
out.append(" ")
|
||||
more_digits(length - 1, less_decimals)
|
||||
return
|
||||
|
||||
if has_max:
|
||||
if max_value >= 0:
|
||||
if top_level:
|
||||
out.append("\"-\" [1-9] ")
|
||||
more_digits(0, less_decimals)
|
||||
out.append(" | ")
|
||||
_generate_min_max_int(0, max_value, out, decimals_left, top_level=True)
|
||||
else:
|
||||
out.append("\"-\" (")
|
||||
_generate_min_max_int(-max_value, None, out, decimals_left, top_level=False)
|
||||
out.append(")")
|
||||
return
|
||||
|
||||
raise RuntimeError("At least one of min_value or max_value must be set")
|
||||
|
||||
class BuiltinRule:
|
||||
def __init__(self, content: str, deps: list = None):
|
||||
|
@ -432,6 +596,24 @@ class SchemaConverter:
|
|||
|
||||
return self._add_rule(rule_name, r'"\"" ' + _build_repetition(char_rule, min_len, max_len) + r' "\"" space')
|
||||
|
||||
elif schema_type in (None, 'integer') and \
|
||||
('minimum' in schema or 'exclusiveMinimum' in schema or 'maximum' in schema or 'exclusiveMaximum' in schema):
|
||||
min_value = None
|
||||
max_value = None
|
||||
if 'minimum' in schema:
|
||||
min_value = schema['minimum']
|
||||
elif 'exclusiveMinimum' in schema:
|
||||
min_value = schema['exclusiveMinimum'] + 1
|
||||
if 'maximum' in schema:
|
||||
max_value = schema['maximum']
|
||||
elif 'exclusiveMaximum' in schema:
|
||||
max_value = schema['exclusiveMaximum'] - 1
|
||||
|
||||
out = ["("]
|
||||
_generate_min_max_int(min_value, max_value, out)
|
||||
out.append(") space")
|
||||
return self._add_rule(rule_name, ''.join(out))
|
||||
|
||||
elif (schema_type == 'object') or (len(schema) == 0):
|
||||
return self._add_rule(rule_name, self._add_primitive('object', PRIMITIVE_RULES['object']))
|
||||
|
||||
|
|
|
@ -24,6 +24,201 @@ function _buildRepetition(itemRule, minItems, maxItems, opts={}) {
|
|||
return minItems === 0 ? `(${result})?` : result;
|
||||
}
|
||||
|
||||
function _generateMinMaxInt(minValue, maxValue, out, decimalsLeft = 16, topLevel = true) {
|
||||
const hasMin = minValue !== null;
|
||||
const hasMax = maxValue !== null;
|
||||
|
||||
function digitRange(fromChar, toChar) {
|
||||
out.push("[");
|
||||
if (fromChar === toChar) {
|
||||
out.push(fromChar);
|
||||
} else {
|
||||
out.push(fromChar);
|
||||
out.push("-");
|
||||
out.push(toChar);
|
||||
}
|
||||
out.push("]");
|
||||
}
|
||||
|
||||
function moreDigits(minDigits, maxDigits) {
|
||||
out.push("[0-9]");
|
||||
if (minDigits === maxDigits && minDigits === 1) {
|
||||
return;
|
||||
}
|
||||
out.push("{");
|
||||
out.push(minDigits.toString());
|
||||
if (maxDigits !== minDigits) {
|
||||
out.push(",");
|
||||
if (maxDigits !== Number.MAX_SAFE_INTEGER) {
|
||||
out.push(maxDigits.toString());
|
||||
}
|
||||
}
|
||||
out.push("}");
|
||||
}
|
||||
|
||||
function uniformRange(fromStr, toStr) {
|
||||
let i = 0;
|
||||
while (i < fromStr.length && fromStr[i] === toStr[i]) {
|
||||
i++;
|
||||
}
|
||||
if (i > 0) {
|
||||
out.push("\"");
|
||||
out.push(fromStr.slice(0, i));
|
||||
out.push("\"");
|
||||
}
|
||||
if (i < fromStr.length) {
|
||||
if (i > 0) {
|
||||
out.push(" ");
|
||||
}
|
||||
const subLen = fromStr.length - i - 1;
|
||||
if (subLen > 0) {
|
||||
const fromSub = fromStr.slice(i + 1);
|
||||
const toSub = toStr.slice(i + 1);
|
||||
const subZeros = "0".repeat(subLen);
|
||||
const subNines = "9".repeat(subLen);
|
||||
|
||||
let toReached = false;
|
||||
out.push("(");
|
||||
if (fromSub === subZeros) {
|
||||
digitRange(fromStr[i], String.fromCharCode(toStr.charCodeAt(i) - 1));
|
||||
out.push(" ");
|
||||
moreDigits(subLen, subLen);
|
||||
} else {
|
||||
out.push("[");
|
||||
out.push(fromStr[i]);
|
||||
out.push("] ");
|
||||
out.push("(");
|
||||
uniformRange(fromSub, subNines);
|
||||
out.push(")");
|
||||
if (fromStr.charCodeAt(i) < toStr.charCodeAt(i) - 1) {
|
||||
out.push(" | ");
|
||||
if (toSub === subNines) {
|
||||
digitRange(String.fromCharCode(fromStr.charCodeAt(i) + 1), toStr[i]);
|
||||
toReached = true;
|
||||
} else {
|
||||
digitRange(String.fromCharCode(fromStr.charCodeAt(i) + 1), String.fromCharCode(toStr.charCodeAt(i) - 1));
|
||||
}
|
||||
out.push(" ");
|
||||
moreDigits(subLen, subLen);
|
||||
}
|
||||
}
|
||||
if (!toReached) {
|
||||
out.push(" | ");
|
||||
digitRange(toStr[i], toStr[i]);
|
||||
out.push(" ");
|
||||
uniformRange(subZeros, toSub);
|
||||
}
|
||||
out.push(")");
|
||||
} else {
|
||||
out.push("[");
|
||||
out.push(fromStr[i]);
|
||||
out.push("-");
|
||||
out.push(toStr[i]);
|
||||
out.push("]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hasMin && hasMax) {
|
||||
if (minValue < 0 && maxValue < 0) {
|
||||
out.push("\"-\" (");
|
||||
_generateMinMaxInt(-maxValue, -minValue, out, decimalsLeft, true);
|
||||
out.push(")");
|
||||
return;
|
||||
}
|
||||
|
||||
if (minValue < 0) {
|
||||
out.push("\"-\" (");
|
||||
_generateMinMaxInt(0, -minValue, out, decimalsLeft, true);
|
||||
out.push(") | ");
|
||||
minValue = 0;
|
||||
}
|
||||
|
||||
let minS = minValue.toString();
|
||||
const maxS = maxValue.toString();
|
||||
const minDigits = minS.length;
|
||||
const maxDigits = maxS.length;
|
||||
|
||||
for (let digits = minDigits; digits < maxDigits; digits++) {
|
||||
uniformRange(minS, "9".repeat(digits));
|
||||
minS = "1" + "0".repeat(digits);
|
||||
out.push(" | ");
|
||||
}
|
||||
uniformRange(minS, maxS);
|
||||
return;
|
||||
}
|
||||
|
||||
const lessDecimals = Math.max(decimalsLeft - 1, 1);
|
||||
|
||||
if (hasMin) {
|
||||
if (minValue < 0) {
|
||||
out.push("\"-\" (");
|
||||
_generateMinMaxInt(null, -minValue, out, decimalsLeft, false);
|
||||
out.push(") | [0] | [1-9] ");
|
||||
moreDigits(0, decimalsLeft - 1);
|
||||
} else if (minValue === 0) {
|
||||
if (topLevel) {
|
||||
out.push("[0] | [1-9] ");
|
||||
moreDigits(0, lessDecimals);
|
||||
} else {
|
||||
moreDigits(1, decimalsLeft);
|
||||
}
|
||||
} else if (minValue <= 9) {
|
||||
const c = minValue.toString();
|
||||
const range_start = topLevel ? '1' : '0';
|
||||
if (c > range_start) {
|
||||
digitRange(range_start, String.fromCharCode(c.charCodeAt(0) - 1));
|
||||
out.push(" ");
|
||||
moreDigits(1, lessDecimals);
|
||||
out.push(" | ");
|
||||
}
|
||||
digitRange(c, "9");
|
||||
out.push(" ");
|
||||
moreDigits(0, lessDecimals);
|
||||
} else {
|
||||
const minS = minValue.toString();
|
||||
const length = minS.length;
|
||||
const c = minS[0];
|
||||
|
||||
if (c > "1") {
|
||||
digitRange(topLevel ? "1" : "0", String.fromCharCode(c.charCodeAt(0) - 1));
|
||||
out.push(" ");
|
||||
moreDigits(length, lessDecimals);
|
||||
out.push(" | ");
|
||||
}
|
||||
digitRange(c, c);
|
||||
out.push(" (");
|
||||
_generateMinMaxInt(parseInt(minS.slice(1)), null, out, lessDecimals, false);
|
||||
out.push(")");
|
||||
if (c < "9") {
|
||||
out.push(" | ");
|
||||
digitRange(String.fromCharCode(c.charCodeAt(0) + 1), "9");
|
||||
out.push(" ");
|
||||
moreDigits(length - 1, lessDecimals);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (hasMax) {
|
||||
if (maxValue >= 0) {
|
||||
if (topLevel) {
|
||||
out.push("\"-\" [1-9] ");
|
||||
moreDigits(0, lessDecimals);
|
||||
out.push(" | ");
|
||||
}
|
||||
_generateMinMaxInt(0, maxValue, out, decimalsLeft, true);
|
||||
} else {
|
||||
out.push("\"-\" (");
|
||||
_generateMinMaxInt(-maxValue, null, out, decimalsLeft, false);
|
||||
out.push(")");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
throw new Error("At least one of minValue or maxValue must be set");
|
||||
}
|
||||
|
||||
class BuiltinRule {
|
||||
constructor(content, deps) {
|
||||
this.content = content;
|
||||
|
@ -435,6 +630,24 @@ export class SchemaConverter {
|
|||
const minLen = schema.minLength || 0;
|
||||
const maxLen = schema.maxLength;
|
||||
return this._addRule(ruleName, '"\\\"" ' + _buildRepetition(charRuleName, minLen, maxLen) + ' "\\\"" space');
|
||||
} else if (schemaType === 'integer' && ('minimum' in schema || 'exclusiveMinimum' in schema || 'maximum' in schema || 'exclusiveMaximum' in schema)) {
|
||||
let minValue = null;
|
||||
let maxValue = null;
|
||||
if ('minimum' in schema) {
|
||||
minValue = schema.minimum;
|
||||
} else if ('exclusiveMinimum' in schema) {
|
||||
minValue = schema.exclusiveMinimum + 1;
|
||||
}
|
||||
if ('maximum' in schema) {
|
||||
maxValue = schema.maximum;
|
||||
} else if ('exclusiveMaximum' in schema) {
|
||||
maxValue = schema.exclusiveMaximum - 1;
|
||||
}
|
||||
|
||||
const out = ["("];
|
||||
_generateMinMaxInt(minValue, maxValue, out);
|
||||
out.push(") space");
|
||||
return this._addRule(ruleName, out.join(''));
|
||||
} else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
|
||||
return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object']));
|
||||
} else {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue