json: fix additionalProperties default, uncomment tests

This commit is contained in:
ochafik 2024-06-22 20:52:02 +01:00
parent 6c859ee422
commit 2f1a087c6b
5 changed files with 31 additions and 40 deletions

View file

@ -401,6 +401,8 @@ private:
std::map<char, TrieNode> children; std::map<char, TrieNode> children;
bool is_end_of_string; bool is_end_of_string;
TrieNode() : is_end_of_string(false) {}
void insert(const std::string & string) { void insert(const std::string & string) {
auto node = this; auto node = this;
for (char c : string) { for (char c : string) {
@ -491,7 +493,7 @@ private:
} }
prop_names.push_back(prop_name); prop_names.push_back(prop_name);
} }
if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get<bool>())) { if (additional_properties.is_null() || additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get<bool>())) {
std::string sub_name = name + (name.empty() ? "" : "-") + "additional"; std::string sub_name = name + (name.empty() ? "" : "-") + "additional";
std::string value_rule = std::string value_rule =
additional_properties.is_object() ? visit(additional_properties, sub_name + "-value") additional_properties.is_object() ? visit(additional_properties, sub_name + "-value")
@ -695,7 +697,7 @@ public:
return _add_rule(rule_name, return _add_rule(rule_name,
_build_object_rule( _build_object_rule(
properties, required, name, properties, required, name,
schema.contains("additionalProperties") ? schema["additionalProperties"] : json::object())); schema.contains("additionalProperties") ? schema["additionalProperties"] : json()));
} else if ((schema_type.is_null() || schema_type == "object") && schema.contains("allOf")) { } else if ((schema_type.is_null() || schema_type == "object") && schema.contains("allOf")) {
std::unordered_set<std::string> required; std::unordered_set<std::string> required;
std::vector<std::pair<std::string, json>> properties; std::vector<std::pair<std::string, json>> properties;

View file

@ -4,7 +4,7 @@ import itertools
import json import json
import re import re
import sys import sys
from typing import Any, Dict, List, Set, Tuple, Union from typing import Any, Dict, List, Optional, Set, Tuple, Union
def _build_repetition(item_rule, min_items, max_items, separator_rule=None): def _build_repetition(item_rule, min_items, max_items, separator_rule=None):
@ -416,10 +416,7 @@ class SchemaConverter:
('additionalProperties' in schema and schema['additionalProperties'] is not True)): ('additionalProperties' in schema and schema['additionalProperties'] is not True)):
required = set(schema.get('required', [])) required = set(schema.get('required', []))
properties = list(schema.get('properties', {}).items()) properties = list(schema.get('properties', {}).items())
additional_properties = schema.get('additionalProperties', True) return self._add_rule(rule_name, self._build_object_rule(properties, required, name, schema.get('additionalProperties')))
if additional_properties is None:
additional_properties = True
return self._add_rule(rule_name, self._build_object_rule(properties, required, name, additional_properties))
elif schema_type in (None, 'object') and 'allOf' in schema: elif schema_type in (None, 'object') and 'allOf' in schema:
required = set() required = set()
@ -498,7 +495,7 @@ class SchemaConverter:
self._add_primitive(dep, dep_rule) self._add_primitive(dep, dep_rule)
return n return n
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]): def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Optional[Union[bool, Any]]):
prop_order = self._prop_order prop_order = self._prop_order
# sort by position in prop_order (if specified) then by original order # sort by position in prop_order (if specified) then by original order
sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))] sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))]
@ -513,9 +510,10 @@ class SchemaConverter:
required_props = [k for k in sorted_props if k in required] required_props = [k for k in sorted_props if k in required]
optional_props = [k for k in sorted_props if k not in required] optional_props = [k for k in sorted_props if k not in required]
if additional_properties == True or isinstance(additional_properties, dict): if additional_properties != False:
sub_name = f'{name}{"-" if name else ""}additional' sub_name = f'{name}{"-" if name else ""}additional'
value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value') value_rule = self.visit(additional_properties, f'{sub_name}-value') if isinstance(additional_properties, dict) else \
self._add_primitive('value', PRIMITIVE_RULES['value'])
key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not sorted_props \ key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not sorted_props \
else self._add_rule(f'{sub_name}-k', self._not_strings(sorted_props)) else self._add_rule(f'{sub_name}-k', self._not_strings(sorted_props))

View file

@ -432,11 +432,7 @@ export class SchemaConverter {
('additionalProperties' in schema && schema.additionalProperties !== true))) { ('additionalProperties' in schema && schema.additionalProperties !== true))) {
const required = new Set(schema.required || []); const required = new Set(schema.required || []);
const properties = Object.entries(schema.properties ?? {}); const properties = Object.entries(schema.properties ?? {});
let additionalProperties = schema.additionalProperties; return this._addRule(ruleName, this._buildObjectRule(properties, required, name, schema.additionalProperties));
if (additionalProperties === undefined) {
additionalProperties = true;
}
return this._addRule(ruleName, this._buildObjectRule(properties, required, name, additionalProperties));
} else if ((schemaType === undefined || schemaType === 'object') && 'allOf' in schema) { } else if ((schemaType === undefined || schemaType === 'object') && 'allOf' in schema) {
const required = new Set(); const required = new Set();
const properties = []; const properties = [];
@ -466,7 +462,7 @@ export class SchemaConverter {
} }
} }
return this._addRule(ruleName, this._buildObjectRule(properties, required, name, /* additionalProperties= */ false)); return this._addRule(ruleName, this._buildObjectRule(properties, required, name, null));
} else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) { } else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) {
const items = schema.items ?? schema.prefixItems; const items = schema.items ?? schema.prefixItems;
if (Array.isArray(items)) { if (Array.isArray(items)) {
@ -542,9 +538,11 @@ export class SchemaConverter {
const requiredProps = sortedProps.filter(k => required.has(k)); const requiredProps = sortedProps.filter(k => required.has(k));
const optionalProps = sortedProps.filter(k => !required.has(k)); const optionalProps = sortedProps.filter(k => !required.has(k));
if (typeof additionalProperties === 'object' || additionalProperties === true) { if (additionalProperties !== false) {
const subName = `${name ?? ''}${name ? '-' : ''}additional`; const subName = `${name ?? ''}${name ? '-' : ''}additional`;
const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`); const valueRule =
additionalProperties != null && typeof additionalProperties === 'object' ? this.visit(additionalProperties, `${subName}-value`)
: this._addPrimitive('value', PRIMITIVE_RULES['value']);
const key_rule = const key_rule =
sortedProps.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string']) sortedProps.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string'])

View file

@ -15,8 +15,6 @@
using json = nlohmann::ordered_json; using json = nlohmann::ordered_json;
//#define INCLUDE_FAILING_TESTS 1
static llama_grammar* build_grammar(const std::string & grammar_str) { static llama_grammar* build_grammar(const std::string & grammar_str) {
auto parsed_grammar = grammar_parser::parse(grammar_str.c_str()); auto parsed_grammar = grammar_parser::parse(grammar_str.c_str());
@ -823,12 +821,8 @@ static void test_json_schema() {
// "By extension, even an empty object is valid" // "By extension, even an empty object is valid"
R"""({})""", R"""({})""",
// "By default, providing additional properties is valid" // "By default, providing additional properties is valid"
#ifdef INCLUDE_FAILING_TESTS
// TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default.
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""", R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
// TODO: Spaces should be permitted around enum values, but currently they fail to pass.
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
#endif
}, },
// Failing strings // Failing strings
{ {
@ -861,20 +855,13 @@ static void test_json_schema() {
{ {
// "By extension, even an empty object is valid" // "By extension, even an empty object is valid"
R"""({})""", R"""({})""",
#ifdef INCLUDE_FAILING_TESTS
// TODO: Following line should pass and doesn't
R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""", R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
// "By default, leaving out properties is valid" // "By default, leaving out properties is valid"
// TODO: Following line should pass and doesn't
R"""({ "street_name": "Pennsylvania" })""", R"""({ "street_name": "Pennsylvania" })""",
// TODO: Following line should pass and doesn't
R"""({ "number": 1600, "street_name": "Pennsylvania" })""", R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
// "By default, providing additional properties is valid" // "By default, providing additional properties is valid"
// TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default.
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""", R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
// TODO: Spaces should be permitted around enum values, but currently they fail to pass.
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
#endif
}, },
// Failing strings // Failing strings
{ {
@ -906,10 +893,8 @@ static void test_json_schema() {
R"""({ "number": 1600, "street_type":"Avenue"})""", R"""({ "number": 1600, "street_type":"Avenue"})""",
R"""({ "number": 1600, "street_name": "Pennsylvania" })""", R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""", R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
#ifdef INCLUDE_FAILING_TESTS // Spaces are permitted around enum values
// TODO: Spaces should be permitted around enum values, but currently they fail to pass.
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
#endif
}, },
// Failing strings // Failing strings
{ {

View file

@ -827,8 +827,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
array ::= "[" space ( value ("," space value)* )? "]" space array ::= "[" space ( value ("," space value)* )? "]" space
bar ::= "{" space (bar-b-kv bar-b-rest | bar-additional-kv ( "," space bar-additional-kv )* )? "}" space bar ::= "{" space (bar-b-kv bar-b-rest | bar-additional-kv ( "," space bar-additional-kv )* )? "}" space
bar-additional-k ::= ["] ( [b] char+ | [^"b] char* )? ["] space bar-additional-k ::= ["] ( [b] char+ | [^"b] char* )? ["] space
bar-additional-kv ::= bar-additional-k ":" space bar-additional-value bar-additional-kv ::= bar-additional-k ":" space value
bar-additional-value ::= object
bar-b-kv ::= "\"b\"" space ":" space number bar-b-kv ::= "\"b\"" space ":" space number
bar-b-rest ::= ( "," space bar-additional-kv )* bar-b-rest ::= ( "," space bar-additional-kv )*
boolean ::= ("true" | "false") space boolean ::= ("true" | "false") space
@ -838,8 +837,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
foo-a-kv ::= "\"a\"" space ":" space number foo-a-kv ::= "\"a\"" space ":" space number
foo-a-rest ::= ( "," space foo-additional-kv )* foo-a-rest ::= ( "," space foo-additional-kv )*
foo-additional-k ::= ["] ( [a] char+ | [^"a] char* )? ["] space foo-additional-k ::= ["] ( [a] char+ | [^"a] char* )? ["] space
foo-additional-kv ::= foo-additional-k ":" space foo-additional-value foo-additional-kv ::= foo-additional-k ":" space value
foo-additional-value ::= object
integral-part ::= [0] | [1-9] [0-9]{0,15} integral-part ::= [0] | [1-9] [0-9]{0,15}
null ::= "null" space null ::= "null" space
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
@ -883,15 +881,25 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
})""", })""",
R"""( R"""(
a-kv ::= "\"a\"" space ":" space number a-kv ::= "\"a\"" space ":" space number
additional-k ::= ["] ( [a] char+ | [b] char+ | [c] char+ | [d] char+ | [^"abcd] char* )? ["] space
additional-kv ::= additional-k ":" space value
array ::= "[" space ( value ("," space value)* )? "]" space
b-kv ::= "\"b\"" space ":" space number b-kv ::= "\"b\"" space ":" space number
boolean ::= ("true" | "false") space
c-kv ::= "\"c\"" space ":" space number c-kv ::= "\"c\"" space ":" space number
c-rest ::= ( "," space additional-kv )*
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
d-kv ::= "\"d\"" space ":" space number d-kv ::= "\"d\"" space ":" space number
d-rest ::= ( "," space c-kv )? d-rest ::= ( "," space c-kv )? c-rest
decimal-part ::= [0-9]{1,16} decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15} integral-part ::= [0] | [1-9] [0-9]{0,15}
null ::= "null" space
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv c-rest | additional-kv ( "," space additional-kv )* ) )? "}" space
space ::= | " " | "\n" [ \t]{0,20} space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
value ::= object | array | string | number | boolean | null
)""" )"""
}); });