json: support mix of additional props & required/optional

This commit is contained in:
ochafik 2024-03-16 11:13:29 +00:00
parent f30d6c27b9
commit 391b17e7f6
6 changed files with 692 additions and 595 deletions

View file

@ -4,7 +4,7 @@ import itertools
import json import json
import re import re
import sys import sys
from typing import Any, Dict, List, Set, Tuple from typing import Any, Dict, List, Set, Tuple, Union
# whitespace is constrained to a single space char to prevent model "running away" in # whitespace is constrained to a single space char to prevent model "running away" in
# whitespace. Also maybe improves generation quality? # whitespace. Also maybe improves generation quality?
@ -320,10 +320,10 @@ class SchemaConverter:
rule = ' | '.join((self._format_literal(v) for v in schema['enum'])) rule = ' | '.join((self._format_literal(v) for v in schema['enum']))
return self._add_rule(rule_name, rule) return self._add_rule(rule_name, rule)
elif schema_type in (None, 'object') and 'properties' in schema: elif schema_type in (None, 'object') and ('properties' in schema or 'additionalProperties' in schema):
required = set(schema.get('required', [])) required = set(schema.get('required', []))
properties = list(schema['properties'].items()) properties = list(schema.get('properties', {}).items())
return self._add_rule(rule_name, self._build_object_rule(properties, required, name)) return self._add_rule(rule_name, self._build_object_rule(properties, required, name, schema.get('additionalProperties')))
elif schema_type in (None, 'object') and 'allOf' in schema: elif schema_type in (None, 'object') and 'allOf' in schema:
required = set() required = set()
@ -346,19 +346,7 @@ class SchemaConverter:
else: else:
add_component(t, is_required=True) add_component(t, is_required=True)
return self._add_rule(rule_name, self._build_object_rule(properties, required, hybrid_name)) return self._add_rule(rule_name, self._build_object_rule(properties, required, hybrid_name, additional_properties=[]))
elif schema_type in (None, 'object') and 'additionalProperties' in schema:
additional_properties = schema['additionalProperties']
if not isinstance(additional_properties, dict):
additional_properties = {}
sub_name = f'{name}{"-" if name else ""}additionalProperties'
value_rule = self.visit(additional_properties, f'{sub_name}-value')
kv_rule = self._add_rule(f'{sub_name}-kv', f'string ":" space {value_rule}')
return self._add_rule(
rule_name,
f'( {kv_rule} ( "," space {kv_rule} )* )*')
elif schema_type in (None, 'array') and ('items' in schema or 'prefixItems' in schema): elif schema_type in (None, 'array') and ('items' in schema or 'prefixItems' in schema):
items = schema.get('items') or schema['prefixItems'] items = schema.get('items') or schema['prefixItems']
@ -417,7 +405,7 @@ class SchemaConverter:
PRIMITIVE_RULES[schema_type] PRIMITIVE_RULES[schema_type]
) )
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str): def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]):
prop_order = self._prop_order prop_order = self._prop_order
# sort by position in prop_order (if specified) then by original order # sort by position in prop_order (if specified) then by original order
sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))] sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))]
@ -429,10 +417,15 @@ class SchemaConverter:
f'{name}{"-" if name else ""}{prop_name}-kv', f'{name}{"-" if name else ""}{prop_name}-kv',
fr'{self._format_literal(prop_name)} space ":" space {prop_rule_name}' fr'{self._format_literal(prop_name)} space ":" space {prop_rule_name}'
) )
required_props = [k for k in sorted_props if k in required] required_props = [k for k in sorted_props if k in required]
optional_props = [k for k in sorted_props if k not in required] optional_props = [k for k in sorted_props if k not in required]
if additional_properties:
sub_name = f'{name}{"-" if name else ""}additional'
value_rule = self.visit(additional_properties, f'{sub_name}-value')
prop_kv_rule_names["*"] = self._add_rule(f'{sub_name}-kv', f'string ":" space {value_rule}')
optional_props.append("*")
rule = '"{" space ' rule = '"{" space '
rule += ' "," space '.join(prop_kv_rule_names[k] for k in required_props) rule += ' "," space '.join(prop_kv_rule_names[k] for k in required_props)
@ -444,7 +437,12 @@ class SchemaConverter:
def get_recursive_refs(ks, first_is_optional): def get_recursive_refs(ks, first_is_optional):
[k, *rest] = ks [k, *rest] = ks
kv_rule_name = prop_kv_rule_names[k] kv_rule_name = prop_kv_rule_names[k]
if first_is_optional: if k == '*':
res = self._add_rule(
f'{name}{"-" if name else ""}additional-kvs',
f'{kv_rule_name} ( "," space ' + kv_rule_name + ' )*'
)
elif first_is_optional:
res = f'( "," space {kv_rule_name} )?' res = f'( "," space {kv_rule_name} )?'
else: else:
res = kv_rule_name res = kv_rule_name

View file

@ -388,7 +388,12 @@ private:
return ref_name; return ref_name;
} }
string _build_object_rule(const vector<pair<string, json>>& properties, const unordered_set<string>& required, const string& name) { string _build_object_rule(
const vector<pair<string, json>>& properties,
const unordered_set<string>& required,
const string& name,
const json& additional_properties)
{
vector<string> required_props; vector<string> required_props;
vector<string> optional_props; vector<string> optional_props;
unordered_map<string, string> prop_kv_rule_names; unordered_map<string, string> prop_kv_rule_names;
@ -407,6 +412,13 @@ private:
optional_props.push_back(prop_name); optional_props.push_back(prop_name);
} }
} }
if (additional_properties.is_object()) {
string sub_name = name + (name.empty() ? "" : "-") + "additional";
string value_rule = visit(additional_properties, sub_name + "-value");
string kv_rule = _add_rule(sub_name + "-kv", "string \":\" space " + value_rule);
prop_kv_rule_names["*"] = kv_rule;
optional_props.push_back("*");
}
string rule = "\"{\" space "; string rule = "\"{\" space ";
for (size_t i = 0; i < required_props.size(); i++) { for (size_t i = 0; i < required_props.size(); i++) {
@ -429,7 +441,12 @@ private:
} }
string k = ks[0]; string k = ks[0];
string kv_rule_name = prop_kv_rule_names[k]; string kv_rule_name = prop_kv_rule_names[k];
if (first_is_optional) { if (k == "*") {
res = _add_rule(
name + (name.empty() ? "" : "-") + "additional-kvs",
kv_rule_name + " ( \",\" space " + kv_rule_name + " )*"
);
} else if (first_is_optional) {
res = "( \",\" space " + kv_rule_name + " )?"; res = "( \",\" space " + kv_rule_name + " )?";
} else { } else {
res = kv_rule_name; res = kv_rule_name;
@ -554,7 +571,8 @@ public:
enum_values.push_back(_format_literal(v.dump())); enum_values.push_back(_format_literal(v.dump()));
} }
return _add_rule(rule_name, join(enum_values.begin(), enum_values.end(), " | ")); return _add_rule(rule_name, join(enum_values.begin(), enum_values.end(), " | "));
} else if ((schema_type.is_null() || schema_type == "object") && schema.contains("properties")) { } else if ((schema_type.is_null() || schema_type == "object")
&& (schema.contains("properties") || schema.contains("additionalProperties"))) {
unordered_set<string> required; unordered_set<string> required;
if (schema.contains("required") && schema["required"].is_array()) { if (schema.contains("required") && schema["required"].is_array()) {
for (const auto& item : schema["required"]) { for (const auto& item : schema["required"]) {
@ -564,10 +582,12 @@ public:
} }
} }
vector<pair<string, json>> properties; vector<pair<string, json>> properties;
if (schema.contains("properties")) {
for (const auto& prop : schema["properties"].items()) { for (const auto& prop : schema["properties"].items()) {
properties.emplace_back(prop.key(), prop.value()); properties.emplace_back(prop.key(), prop.value());
} }
return _add_rule(rule_name, _build_object_rule(properties, required, name)); }
return _add_rule(rule_name, _build_object_rule(properties, required, name, schema["additionalProperties"]));
} else if ((schema_type.is_null() || schema_type == "object") && schema.contains("allOf")) { } else if ((schema_type.is_null() || schema_type == "object") && schema.contains("allOf")) {
unordered_set<string> required; unordered_set<string> required;
vector<pair<string, json>> properties; vector<pair<string, json>> properties;
@ -595,16 +615,7 @@ public:
add_component(t, true); add_component(t, true);
} }
} }
return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name)); return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json()));
} else if ((schema_type.is_null() || schema_type == "object") && schema.contains("additionalProperties")) {
json additional_properties = schema["additionalProperties"];
if (!additional_properties.is_object()) {
additional_properties = json::object();
}
string sub_name = name + (name.empty() ? "" : "-") + "additionalProperties";
string value_rule = visit(additional_properties, sub_name + "-value");
string kv_rule = _add_rule(sub_name + "-kv", "string \":\" space " + value_rule);
return _add_rule(rule_name, "( " + kv_rule + " ( \",\" space " + kv_rule + " )* )*");
} else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) { } else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) {
json items = schema.contains("items") ? schema["items"] : schema["prefixItems"]; json items = schema.contains("items") ? schema["items"] : schema["prefixItems"];
if (items.is_array()) { if (items.is_array()) {
@ -656,7 +667,7 @@ public:
} }
return schema_format + "-string"; return schema_format + "-string";
} else { } else {
if (PRIMITIVE_RULES.find(schema_type.get<string>()) == PRIMITIVE_RULES.end()) { if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<string>()) == PRIMITIVE_RULES.end()) {
_errors.push_back("Unrecognized schema: " + schema.dump()); _errors.push_back("Unrecognized schema: " + schema.dump());
return ""; return "";
} }

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

View file

@ -342,10 +342,10 @@ export class SchemaConverter {
} else if ('enum' in schema) { } else if ('enum' in schema) {
const rule = schema.enum.map(v => this._formatLiteral(v)).join(' | '); const rule = schema.enum.map(v => this._formatLiteral(v)).join(' | ');
return this._addRule(ruleName, rule); return this._addRule(ruleName, rule);
} else if ((schemaType === undefined || schemaType === 'object') && 'properties' in schema) { } else if ((schemaType === undefined || schemaType === 'object') && ('properties' in schema || 'additionalProperties' in schema)) {
const required = new Set(schema.required || []); const required = new Set(schema.required || []);
const properties = Object.entries(schema.properties); const properties = Object.entries(schema.properties ?? {});
return this._addRule(ruleName, this._buildObjectRule(properties, required, name)); return this._addRule(ruleName, this._buildObjectRule(properties, required, name, schema.additionalProperties));
} else if ((schemaType === undefined || schemaType === 'object') && 'allOf' in schema) { } else if ((schemaType === undefined || schemaType === 'object') && 'allOf' in schema) {
const required = new Set(); const required = new Set();
const properties = []; const properties = [];
@ -375,14 +375,7 @@ export class SchemaConverter {
} }
} }
return this._addRule(ruleName, this._buildObjectRule(properties, required, name)); return this._addRule(ruleName, this._buildObjectRule(properties, required, name, /* additionalProperties= */ false));
} else if ((schemaType === undefined || schemaType === 'object') && 'additionalProperties' in schema) {
const additionalProperties = typeof schema.additionalProperties === 'object' ? schema.additionalProperties : {};
const subName = `${name ?? ''}${name ? '-' : ''}additionalProperties`;
const valueRule = this.visit(additionalProperties, `${subName}-value`);
const kvRule = this._addRule(`${subName}-kv`, `string ":" space ${valueRule}`);
return this._addRule(ruleName, `( ${kvRule} ( "," space ${kvRule} )* )*`);
} else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) { } else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) {
const items = schema.items ?? schema.prefixItems; const items = schema.items ?? schema.prefixItems;
if (Array.isArray(items)) { if (Array.isArray(items)) {
@ -438,7 +431,7 @@ export class SchemaConverter {
} }
} }
_buildObjectRule(properties, required, name) { _buildObjectRule(properties, required, name, additionalProperties) {
const propOrder = this._propOrder; const propOrder = this._propOrder;
// sort by position in prop_order (if specified) then by original order // sort by position in prop_order (if specified) then by original order
const sortedProps = properties.map(([k]) => k).sort((a, b) => { const sortedProps = properties.map(([k]) => k).sort((a, b) => {
@ -458,10 +451,16 @@ export class SchemaConverter {
`${this._formatLiteral(propName)} space ":" space ${propRuleName}` `${this._formatLiteral(propName)} space ":" space ${propRuleName}`
); );
} }
const requiredProps = sortedProps.filter(k => required.has(k)); const requiredProps = sortedProps.filter(k => required.has(k));
const optionalProps = sortedProps.filter(k => !required.has(k)); const optionalProps = sortedProps.filter(k => !required.has(k));
if (typeof additionalProperties === 'object') {
const subName = `${name ?? ''}${name ? '-' : ''}additional`;
const valueRule = this.visit(additionalProperties, `${subName}-value`);
propKvRuleNames['*'] = this._addRule(`${subName}-kv`, `string ":" space ${valueRule}`);
optionalProps.push('*');
}
let rule = '"{" space '; let rule = '"{" space ';
rule += requiredProps.map(k => propKvRuleNames[k]).join(' "," space '); rule += requiredProps.map(k => propKvRuleNames[k]).join(' "," space ');
@ -474,7 +473,17 @@ export class SchemaConverter {
const getRecursiveRefs = (ks, firstIsOptional) => { const getRecursiveRefs = (ks, firstIsOptional) => {
const [k, ...rest] = ks; const [k, ...rest] = ks;
const kvRuleName = propKvRuleNames[k]; const kvRuleName = propKvRuleNames[k];
let res = firstIsOptional ? `( "," space ${kvRuleName} )?` : kvRuleName; let res;
if (k === '*') {
res = this._addRule(
`${name ?? ''}${name ? '-' : ''}additional-kvs`,
`${kvRuleName} ( "," space ` + kvRuleName + ` )*`
)
} else if (firstIsOptional) {
res = `( "," space ${kvRuleName} )?`;
} else {
res = kvRuleName;
}
if (rest.length > 0) { if (rest.length > 0) {
res += ' ' + this._addRule( res += ' ' + this._addRule(
`${name ?? ''}${name ? '-' : ''}${k}-rest`, `${name ?? ''}${name ? '-' : ''}${k}-rest`,

View file

@ -290,67 +290,22 @@ static void test(const string& lang, std::function<void(const TestCase&)> runner
}); });
run({ run({
"N optionals", "N optional props",
R"""({ R"""({
"type": "object",
"properties": { "properties": {
"a": { "a": {"type": "string"},
"type": "string" "b": {"type": "string"},
"c": {"type": "string"}
}, },
"b": { "additionalProperties": false
"type": "string"
},
"c": {
"type": [
"number",
"string"
]
},
"d": {
"type": "string"
},
"e": {
"type": "object",
"additionalProperties": {
"type": "array",
"items": {
"type": "array",
"minItems": 2,
"items": [
{
"type": "string"
},
{
"type": "number"
}
],
"maxItems": 2
}
}
}
},
"required": [
"a",
"b"
],
"additionalProperties": false,
"definitions": {}
})""", })""",
R"""( R"""(
a-kv ::= "\"a\"" space ":" space string a-kv ::= "\"a\"" space ":" space string
a-rest ::= ( "," space b-kv )? b-rest
b-kv ::= "\"b\"" space ":" space string b-kv ::= "\"b\"" space ":" space string
c ::= number | string b-rest ::= ( "," space c-kv )?
c-kv ::= "\"c\"" space ":" space c c-kv ::= "\"c\"" space ":" space string
c-rest ::= ( "," space d-kv )? d-rest root ::= "{" space (a-kv a-rest | b-kv b-rest | c-kv )? "}" space
d-kv ::= "\"d\"" space ":" space string
d-rest ::= ( "," space e-kv )?
e ::= ( e-additionalProperties-kv ( "," space e-additionalProperties-kv )* )*
e-additionalProperties-kv ::= string ":" space e-additionalProperties-value
e-additionalProperties-value ::= "[" space ( e-additionalProperties-value-item ( "," space e-additionalProperties-value-item )* )? "]" space
e-additionalProperties-value-item ::= "[" space string "," space number "]" space
e-kv ::= "\"e\"" space ":" space e
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
root ::= "{" space a-kv "," space b-kv ( "," space ( c-kv c-rest | d-kv d-rest | e-kv ) )? "}" space
space ::= " "? space ::= " "?
string ::= "\"" ( string ::= "\"" (
[^"\\] | [^"\\] |
@ -359,6 +314,116 @@ static void test(const string& lang, std::function<void(const TestCase&)> runner
)""" )"""
}); });
run({
"required + optional props",
R"""({
"properties": {
"a": {"type": "string"},
"b": {"type": "string"},
"c": {"type": "string"},
"d": {"type": "string"}
},
"required": ["a", "b"],
"additionalProperties": false
})""",
R"""(
a-kv ::= "\"a\"" space ":" space string
b-kv ::= "\"b\"" space ":" space string
c-kv ::= "\"c\"" space ":" space string
c-rest ::= ( "," space d-kv )?
d-kv ::= "\"d\"" space ":" space string
root ::= "{" space a-kv "," space b-kv ( "," space ( c-kv c-rest | d-kv ) )? "}" space
space ::= " "?
string ::= "\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
)"""
});
run({
"additional props",
R"""({
"type": "object",
"additionalProperties": {"type": "array", "items": {"type": "number"}}
})""",
R"""(
additional-kv ::= string ":" space additional-value
additional-kvs ::= additional-kv ( "," space additional-kv )*
additional-value ::= "[" space ( number ( "," space number )* )? "]" space
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
root ::= "{" space (additional-kvs )? "}" space
space ::= " "?
)"""
});
run({
"required + additional props",
R"""({
"type": "object",
"properties": {
"a": {"type": "number"}
},
"required": ["a"],
"additionalProperties": {"type": "string"}
})""",
R"""(
a-kv ::= "\"a\"" space ":" space number
additional-kv ::= string ":" space string
additional-kvs ::= additional-kv ( "," space additional-kv )*
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
root ::= "{" space a-kv ( "," space ( additional-kvs ) )? "}" space
space ::= " "?
string ::= "\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
)"""
});
run({
"optional + additional props",
R"""({
"type": "object",
"properties": {
"a": {"type": "number"}
},
"additionalProperties": {"type": "number"}
})""",
R"""(
a-kv ::= "\"a\"" space ":" space number
a-rest ::= additional-kvs
additional-kv ::= string ":" space number
additional-kvs ::= additional-kv ( "," space additional-kv )*
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
root ::= "{" space (a-kv a-rest | additional-kvs )? "}" space
space ::= " "?
)"""
});
run({
"required + optional + additional props",
R"""({
"type": "object",
"properties": {
"a": {"type": "number"},
"b": {"type": "number"}
},
"required": ["a"],
"additionalProperties": {"type": "number"}
})""",
R"""(
a-kv ::= "\"a\"" space ":" space number
additional-kv ::= string ":" space number
additional-kvs ::= additional-kv ( "," space additional-kv )*
b-kv ::= "\"b\"" space ":" space number
b-rest ::= additional-kvs
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
root ::= "{" space a-kv ( "," space ( b-kv b-rest | additional-kvs ) )? "}" space
space ::= " "?
)"""
});
run({ run({
"top-level $ref", "top-level $ref",
R"""({ R"""({
@ -451,4 +516,3 @@ int main() {
tc.verify(json_schema_to_grammar(nlohmann::json::parse(tc.schema))); tc.verify(json_schema_to_grammar(nlohmann::json::parse(tc.schema)));
}); });
} }