json: support mix of additional props & required/optional

This commit is contained in:
ochafik 2024-03-16 11:13:29 +00:00
parent f30d6c27b9
commit 391b17e7f6
6 changed files with 692 additions and 595 deletions

View file

@ -4,7 +4,7 @@ import itertools
import json
import re
import sys
from typing import Any, Dict, List, Set, Tuple
from typing import Any, Dict, List, Set, Tuple, Union
# whitespace is constrained to a single space char to prevent model "running away" in
# whitespace. Also maybe improves generation quality?
@ -320,10 +320,10 @@ class SchemaConverter:
rule = ' | '.join((self._format_literal(v) for v in schema['enum']))
return self._add_rule(rule_name, rule)
elif schema_type in (None, 'object') and 'properties' in schema:
elif schema_type in (None, 'object') and ('properties' in schema or 'additionalProperties' in schema):
required = set(schema.get('required', []))
properties = list(schema['properties'].items())
return self._add_rule(rule_name, self._build_object_rule(properties, required, name))
properties = list(schema.get('properties', {}).items())
return self._add_rule(rule_name, self._build_object_rule(properties, required, name, schema.get('additionalProperties')))
elif schema_type in (None, 'object') and 'allOf' in schema:
required = set()
@ -346,19 +346,7 @@ class SchemaConverter:
else:
add_component(t, is_required=True)
return self._add_rule(rule_name, self._build_object_rule(properties, required, hybrid_name))
elif schema_type in (None, 'object') and 'additionalProperties' in schema:
additional_properties = schema['additionalProperties']
if not isinstance(additional_properties, dict):
additional_properties = {}
sub_name = f'{name}{"-" if name else ""}additionalProperties'
value_rule = self.visit(additional_properties, f'{sub_name}-value')
kv_rule = self._add_rule(f'{sub_name}-kv', f'string ":" space {value_rule}')
return self._add_rule(
rule_name,
f'( {kv_rule} ( "," space {kv_rule} )* )*')
return self._add_rule(rule_name, self._build_object_rule(properties, required, hybrid_name, additional_properties=[]))
elif schema_type in (None, 'array') and ('items' in schema or 'prefixItems' in schema):
items = schema.get('items') or schema['prefixItems']
@ -417,7 +405,7 @@ class SchemaConverter:
PRIMITIVE_RULES[schema_type]
)
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str):
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]):
prop_order = self._prop_order
# sort by position in prop_order (if specified) then by original order
sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))]
@ -429,10 +417,15 @@ class SchemaConverter:
f'{name}{"-" if name else ""}{prop_name}-kv',
fr'{self._format_literal(prop_name)} space ":" space {prop_rule_name}'
)
required_props = [k for k in sorted_props if k in required]
optional_props = [k for k in sorted_props if k not in required]
if additional_properties:
sub_name = f'{name}{"-" if name else ""}additional'
value_rule = self.visit(additional_properties, f'{sub_name}-value')
prop_kv_rule_names["*"] = self._add_rule(f'{sub_name}-kv', f'string ":" space {value_rule}')
optional_props.append("*")
rule = '"{" space '
rule += ' "," space '.join(prop_kv_rule_names[k] for k in required_props)
@ -444,7 +437,12 @@ class SchemaConverter:
def get_recursive_refs(ks, first_is_optional):
[k, *rest] = ks
kv_rule_name = prop_kv_rule_names[k]
if first_is_optional:
if k == '*':
res = self._add_rule(
f'{name}{"-" if name else ""}additional-kvs',
f'{kv_rule_name} ( "," space ' + kv_rule_name + ' )*'
)
elif first_is_optional:
res = f'( "," space {kv_rule_name} )?'
else:
res = kv_rule_name

View file

@ -388,7 +388,12 @@ private:
return ref_name;
}
string _build_object_rule(const vector<pair<string, json>>& properties, const unordered_set<string>& required, const string& name) {
string _build_object_rule(
const vector<pair<string, json>>& properties,
const unordered_set<string>& required,
const string& name,
const json& additional_properties)
{
vector<string> required_props;
vector<string> optional_props;
unordered_map<string, string> prop_kv_rule_names;
@ -407,6 +412,13 @@ private:
optional_props.push_back(prop_name);
}
}
if (additional_properties.is_object()) {
string sub_name = name + (name.empty() ? "" : "-") + "additional";
string value_rule = visit(additional_properties, sub_name + "-value");
string kv_rule = _add_rule(sub_name + "-kv", "string \":\" space " + value_rule);
prop_kv_rule_names["*"] = kv_rule;
optional_props.push_back("*");
}
string rule = "\"{\" space ";
for (size_t i = 0; i < required_props.size(); i++) {
@ -429,7 +441,12 @@ private:
}
string k = ks[0];
string kv_rule_name = prop_kv_rule_names[k];
if (first_is_optional) {
if (k == "*") {
res = _add_rule(
name + (name.empty() ? "" : "-") + "additional-kvs",
kv_rule_name + " ( \",\" space " + kv_rule_name + " )*"
);
} else if (first_is_optional) {
res = "( \",\" space " + kv_rule_name + " )?";
} else {
res = kv_rule_name;
@ -554,7 +571,8 @@ public:
enum_values.push_back(_format_literal(v.dump()));
}
return _add_rule(rule_name, join(enum_values.begin(), enum_values.end(), " | "));
} else if ((schema_type.is_null() || schema_type == "object") && schema.contains("properties")) {
} else if ((schema_type.is_null() || schema_type == "object")
&& (schema.contains("properties") || schema.contains("additionalProperties"))) {
unordered_set<string> required;
if (schema.contains("required") && schema["required"].is_array()) {
for (const auto& item : schema["required"]) {
@ -564,10 +582,12 @@ public:
}
}
vector<pair<string, json>> properties;
for (const auto& prop : schema["properties"].items()) {
properties.emplace_back(prop.key(), prop.value());
if (schema.contains("properties")) {
for (const auto& prop : schema["properties"].items()) {
properties.emplace_back(prop.key(), prop.value());
}
}
return _add_rule(rule_name, _build_object_rule(properties, required, name));
return _add_rule(rule_name, _build_object_rule(properties, required, name, schema["additionalProperties"]));
} else if ((schema_type.is_null() || schema_type == "object") && schema.contains("allOf")) {
unordered_set<string> required;
vector<pair<string, json>> properties;
@ -595,16 +615,7 @@ public:
add_component(t, true);
}
}
return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name));
} else if ((schema_type.is_null() || schema_type == "object") && schema.contains("additionalProperties")) {
json additional_properties = schema["additionalProperties"];
if (!additional_properties.is_object()) {
additional_properties = json::object();
}
string sub_name = name + (name.empty() ? "" : "-") + "additionalProperties";
string value_rule = visit(additional_properties, sub_name + "-value");
string kv_rule = _add_rule(sub_name + "-kv", "string \":\" space " + value_rule);
return _add_rule(rule_name, "( " + kv_rule + " ( \",\" space " + kv_rule + " )* )*");
return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json()));
} else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) {
json items = schema.contains("items") ? schema["items"] : schema["prefixItems"];
if (items.is_array()) {
@ -656,7 +667,7 @@ public:
}
return schema_format + "-string";
} else {
if (PRIMITIVE_RULES.find(schema_type.get<string>()) == PRIMITIVE_RULES.end()) {
if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<string>()) == PRIMITIVE_RULES.end()) {
_errors.push_back("Unrecognized schema: " + schema.dump());
return "";
}

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

View file

@ -342,10 +342,10 @@ export class SchemaConverter {
} else if ('enum' in schema) {
const rule = schema.enum.map(v => this._formatLiteral(v)).join(' | ');
return this._addRule(ruleName, rule);
} else if ((schemaType === undefined || schemaType === 'object') && 'properties' in schema) {
} else if ((schemaType === undefined || schemaType === 'object') && ('properties' in schema || 'additionalProperties' in schema)) {
const required = new Set(schema.required || []);
const properties = Object.entries(schema.properties);
return this._addRule(ruleName, this._buildObjectRule(properties, required, name));
const properties = Object.entries(schema.properties ?? {});
return this._addRule(ruleName, this._buildObjectRule(properties, required, name, schema.additionalProperties));
} else if ((schemaType === undefined || schemaType === 'object') && 'allOf' in schema) {
const required = new Set();
const properties = [];
@ -375,14 +375,7 @@ export class SchemaConverter {
}
}
return this._addRule(ruleName, this._buildObjectRule(properties, required, name));
} else if ((schemaType === undefined || schemaType === 'object') && 'additionalProperties' in schema) {
const additionalProperties = typeof schema.additionalProperties === 'object' ? schema.additionalProperties : {};
const subName = `${name ?? ''}${name ? '-' : ''}additionalProperties`;
const valueRule = this.visit(additionalProperties, `${subName}-value`);
const kvRule = this._addRule(`${subName}-kv`, `string ":" space ${valueRule}`);
return this._addRule(ruleName, `( ${kvRule} ( "," space ${kvRule} )* )*`);
return this._addRule(ruleName, this._buildObjectRule(properties, required, name, /* additionalProperties= */ false));
} else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) {
const items = schema.items ?? schema.prefixItems;
if (Array.isArray(items)) {
@ -438,7 +431,7 @@ export class SchemaConverter {
}
}
_buildObjectRule(properties, required, name) {
_buildObjectRule(properties, required, name, additionalProperties) {
const propOrder = this._propOrder;
// sort by position in prop_order (if specified) then by original order
const sortedProps = properties.map(([k]) => k).sort((a, b) => {
@ -458,10 +451,16 @@ export class SchemaConverter {
`${this._formatLiteral(propName)} space ":" space ${propRuleName}`
);
}
const requiredProps = sortedProps.filter(k => required.has(k));
const optionalProps = sortedProps.filter(k => !required.has(k));
if (typeof additionalProperties === 'object') {
const subName = `${name ?? ''}${name ? '-' : ''}additional`;
const valueRule = this.visit(additionalProperties, `${subName}-value`);
propKvRuleNames['*'] = this._addRule(`${subName}-kv`, `string ":" space ${valueRule}`);
optionalProps.push('*');
}
let rule = '"{" space ';
rule += requiredProps.map(k => propKvRuleNames[k]).join(' "," space ');
@ -474,7 +473,17 @@ export class SchemaConverter {
const getRecursiveRefs = (ks, firstIsOptional) => {
const [k, ...rest] = ks;
const kvRuleName = propKvRuleNames[k];
let res = firstIsOptional ? `( "," space ${kvRuleName} )?` : kvRuleName;
let res;
if (k === '*') {
res = this._addRule(
`${name ?? ''}${name ? '-' : ''}additional-kvs`,
`${kvRuleName} ( "," space ` + kvRuleName + ` )*`
)
} else if (firstIsOptional) {
res = `( "," space ${kvRuleName} )?`;
} else {
res = kvRuleName;
}
if (rest.length > 0) {
res += ' ' + this._addRule(
`${name ?? ''}${name ? '-' : ''}${k}-rest`,

View file

@ -290,67 +290,22 @@ static void test(const string& lang, std::function<void(const TestCase&)> runner
});
run({
"N optionals",
"N optional props",
R"""({
"type": "object",
"properties": {
"a": {
"type": "string"
},
"b": {
"type": "string"
},
"c": {
"type": [
"number",
"string"
]
},
"d": {
"type": "string"
},
"e": {
"type": "object",
"additionalProperties": {
"type": "array",
"items": {
"type": "array",
"minItems": 2,
"items": [
{
"type": "string"
},
{
"type": "number"
}
],
"maxItems": 2
}
}
}
"a": {"type": "string"},
"b": {"type": "string"},
"c": {"type": "string"}
},
"required": [
"a",
"b"
],
"additionalProperties": false,
"definitions": {}
"additionalProperties": false
})""",
R"""(
a-kv ::= "\"a\"" space ":" space string
a-rest ::= ( "," space b-kv )? b-rest
b-kv ::= "\"b\"" space ":" space string
c ::= number | string
c-kv ::= "\"c\"" space ":" space c
c-rest ::= ( "," space d-kv )? d-rest
d-kv ::= "\"d\"" space ":" space string
d-rest ::= ( "," space e-kv )?
e ::= ( e-additionalProperties-kv ( "," space e-additionalProperties-kv )* )*
e-additionalProperties-kv ::= string ":" space e-additionalProperties-value
e-additionalProperties-value ::= "[" space ( e-additionalProperties-value-item ( "," space e-additionalProperties-value-item )* )? "]" space
e-additionalProperties-value-item ::= "[" space string "," space number "]" space
e-kv ::= "\"e\"" space ":" space e
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
root ::= "{" space a-kv "," space b-kv ( "," space ( c-kv c-rest | d-kv d-rest | e-kv ) )? "}" space
b-rest ::= ( "," space c-kv )?
c-kv ::= "\"c\"" space ":" space string
root ::= "{" space (a-kv a-rest | b-kv b-rest | c-kv )? "}" space
space ::= " "?
string ::= "\"" (
[^"\\] |
@ -359,6 +314,116 @@ static void test(const string& lang, std::function<void(const TestCase&)> runner
)"""
});
run({
"required + optional props",
R"""({
"properties": {
"a": {"type": "string"},
"b": {"type": "string"},
"c": {"type": "string"},
"d": {"type": "string"}
},
"required": ["a", "b"],
"additionalProperties": false
})""",
R"""(
a-kv ::= "\"a\"" space ":" space string
b-kv ::= "\"b\"" space ":" space string
c-kv ::= "\"c\"" space ":" space string
c-rest ::= ( "," space d-kv )?
d-kv ::= "\"d\"" space ":" space string
root ::= "{" space a-kv "," space b-kv ( "," space ( c-kv c-rest | d-kv ) )? "}" space
space ::= " "?
string ::= "\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
)"""
});
run({
"additional props",
R"""({
"type": "object",
"additionalProperties": {"type": "array", "items": {"type": "number"}}
})""",
R"""(
additional-kv ::= string ":" space additional-value
additional-kvs ::= additional-kv ( "," space additional-kv )*
additional-value ::= "[" space ( number ( "," space number )* )? "]" space
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
root ::= "{" space (additional-kvs )? "}" space
space ::= " "?
)"""
});
run({
"required + additional props",
R"""({
"type": "object",
"properties": {
"a": {"type": "number"}
},
"required": ["a"],
"additionalProperties": {"type": "string"}
})""",
R"""(
a-kv ::= "\"a\"" space ":" space number
additional-kv ::= string ":" space string
additional-kvs ::= additional-kv ( "," space additional-kv )*
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
root ::= "{" space a-kv ( "," space ( additional-kvs ) )? "}" space
space ::= " "?
string ::= "\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
)"""
});
run({
"optional + additional props",
R"""({
"type": "object",
"properties": {
"a": {"type": "number"}
},
"additionalProperties": {"type": "number"}
})""",
R"""(
a-kv ::= "\"a\"" space ":" space number
a-rest ::= additional-kvs
additional-kv ::= string ":" space number
additional-kvs ::= additional-kv ( "," space additional-kv )*
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
root ::= "{" space (a-kv a-rest | additional-kvs )? "}" space
space ::= " "?
)"""
});
run({
"required + optional + additional props",
R"""({
"type": "object",
"properties": {
"a": {"type": "number"},
"b": {"type": "number"}
},
"required": ["a"],
"additionalProperties": {"type": "number"}
})""",
R"""(
a-kv ::= "\"a\"" space ":" space number
additional-kv ::= string ":" space number
additional-kvs ::= additional-kv ( "," space additional-kv )*
b-kv ::= "\"b\"" space ":" space number
b-rest ::= additional-kvs
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
root ::= "{" space a-kv ( "," space ( b-kv b-rest | additional-kvs ) )? "}" space
space ::= " "?
)"""
});
run({
"top-level $ref",
R"""({
@ -451,4 +516,3 @@ int main() {
tc.verify(json_schema_to_grammar(nlohmann::json::parse(tc.schema)));
});
}