json: fix additionalProperties, allow space after enum/const (#7840)

* json: default additionalProperty to true

* json: don't force additional props after normal properties!

* json: allow space after enum/const

* json: update pydantic example to set additionalProperties: false

* json: prevent additional props to redefine a typed prop

* port not_strings to python, add trailing space

* fix not_strings & port to js+py

* Update json-schema-to-grammar.cpp

* fix _not_strings for substring overlaps

* json: fix additionalProperties default, uncomment tests

* json: add integ. test case for additionalProperties

* json: nit: simplify condition

* reformat grammar integ tests w/ R"""()""" strings where there's escapes

* update # tokens in server test: consts can now have trailing space
This commit is contained in:
Olivier Chafik 2024-06-26 01:45:58 +01:00 committed by GitHub
parent 163d50adaf
commit 6777c544bd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 497 additions and 245 deletions

View file

@ -4,8 +4,7 @@ import itertools
import json
import re
import sys
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
from typing import Any, List, Optional, Set, Tuple, Union
def _build_repetition(item_rule, min_items, max_items, separator_rule=None):
@ -276,6 +275,51 @@ class SchemaConverter:
return ''.join(('(', *recurse(0), ')'))
def _not_strings(self, strings):
class TrieNode:
def __init__(self):
self.children = {}
self.is_end_of_string = False
def insert(self, string):
node = self
for c in string:
node = node.children.setdefault(c, TrieNode())
node.is_end_of_string = True
trie = TrieNode()
for s in strings:
trie.insert(s)
char_rule = self._add_primitive('char', PRIMITIVE_RULES['char'])
out = ['["] ( ']
def visit(node):
rejects = []
first = True
for c in sorted(node.children.keys()):
child = node.children[c]
rejects.append(c)
if first:
first = False
else:
out.append(' | ')
out.append(f'[{c}]')
if child.children:
out.append(f' (')
visit(child)
out.append(')')
elif child.is_end_of_string:
out.append(f' {char_rule}+')
if node.children:
if not first:
out.append(' | ')
out.append(f'[^"{"".join(rejects)}] {char_rule}*')
visit(trie)
out.append(f' ){"" if trie.is_end_of_string else "?"} ["] space')
return ''.join(out)
def _add_rule(self, name, rule):
esc_name = INVALID_RULE_CHARS_RE.sub('-', name)
if esc_name not in self._rules or self._rules[esc_name] == rule:
@ -524,10 +568,10 @@ class SchemaConverter:
return self._add_rule(rule_name, self._generate_union_rule(name, [{'type': t} for t in schema_type]))
elif 'const' in schema:
return self._add_rule(rule_name, self._generate_constant_rule(schema['const']))
return self._add_rule(rule_name, self._generate_constant_rule(schema['const']) + ' space')
elif 'enum' in schema:
rule = ' | '.join((self._generate_constant_rule(v) for v in schema['enum']))
rule = '(' + ' | '.join((self._generate_constant_rule(v) for v in schema['enum'])) + ') space'
return self._add_rule(rule_name, rule)
elif schema_type in (None, 'object') and \
@ -632,7 +676,7 @@ class SchemaConverter:
self._add_primitive(dep, dep_rule)
return n
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]):
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Optional[Union[bool, Any]]):
prop_order = self._prop_order
# sort by position in prop_order (if specified) then by original order
sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))]
@ -647,12 +691,16 @@ class SchemaConverter:
required_props = [k for k in sorted_props if k in required]
optional_props = [k for k in sorted_props if k not in required]
if additional_properties == True or isinstance(additional_properties, dict):
if additional_properties != False:
sub_name = f'{name}{"-" if name else ""}additional'
value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value')
value_rule = self.visit(additional_properties, f'{sub_name}-value') if isinstance(additional_properties, dict) else \
self._add_primitive('value', PRIMITIVE_RULES['value'])
key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not sorted_props \
else self._add_rule(f'{sub_name}-k', self._not_strings(sorted_props))
prop_kv_rule_names["*"] = self._add_rule(
f'{sub_name}-kv',
self._add_primitive('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}'
f'{key_rule} ":" space {value_rule}'
)
optional_props.append("*")
@ -667,15 +715,11 @@ class SchemaConverter:
def get_recursive_refs(ks, first_is_optional):
[k, *rest] = ks
kv_rule_name = prop_kv_rule_names[k]
if k == '*':
res = self._add_rule(
f'{name}{"-" if name else ""}additional-kvs',
f'{kv_rule_name} ( "," space ' + kv_rule_name + ' )*'
)
elif first_is_optional:
res = f'( "," space {kv_rule_name} )?'
comma_ref = f'( "," space {kv_rule_name} )'
if first_is_optional:
res = comma_ref + ('*' if k == '*' else '?')
else:
res = kv_rule_name
res = kv_rule_name + (' ' + comma_ref + "*" if k == '*' else '')
if len(rest) > 0:
res += ' ' + self._add_rule(
f'{name}{"-" if name else ""}{k}-rest',