json: deps management for primitive rules (+ allow null values)
This commit is contained in:
parent
f771a8f1b5
commit
159b883bd4
7 changed files with 1820 additions and 1669 deletions
|
@ -13,33 +13,37 @@ using json = nlohmann::ordered_json;
|
||||||
|
|
||||||
const std::string SPACE_RULE = "\" \"?";
|
const std::string SPACE_RULE = "\" \"?";
|
||||||
|
|
||||||
std::unordered_map<std::string, std::string> PRIMITIVE_RULES = {
|
struct BuiltinRule {
|
||||||
{"boolean", "(\"true\" | \"false\") space"},
|
std::string content;
|
||||||
{"number", "(\"-\"? ([0-9] | [1-9] [0-9]*)) (\".\" [0-9]+)? ([eE] [-+]? [0-9]+)? space"},
|
std::vector<std::string> deps;
|
||||||
{"integer", "(\"-\"? ([0-9] | [1-9] [0-9]*)) space"},
|
};
|
||||||
{"value", "object | array | string | number | boolean"},
|
|
||||||
{"object", "\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space"},
|
std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
|
||||||
{"array", "\"[\" space ( value (\",\" space value)* )? \"]\" space"},
|
{"boolean", {"(\"true\" | \"false\") space", {}}},
|
||||||
{"uuid", "\"\\\"\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
{"number", {"(\"-\"? ([0-9] | [1-9] [0-9]*)) (\".\" [0-9]+)? ([eE] [-+]? [0-9]+)? space", {}}},
|
||||||
|
{"integer", {"(\"-\"? ([0-9] | [1-9] [0-9]*)) space", {}}},
|
||||||
|
{"value", {"object | array | string | number | boolean | null", {"object", "array", "string", "number", "boolean", "null"}}},
|
||||||
|
{"object", {"\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space", {"string", "value"}}},
|
||||||
|
{"array", {"\"[\" space ( value (\",\" space value)* )? \"]\" space", {"value"}}},
|
||||||
|
{"uuid", {"\"\\\"\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] \"\\\"\" space"},
|
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] \"\\\"\" space", {}}},
|
||||||
{"string", " \"\\\"\" (\n"
|
{"string", {" \"\\\"\" (\n"
|
||||||
" [^\"\\\\] |\n"
|
" [^\"\\\\] |\n"
|
||||||
" \"\\\\\" ([\"\\\\/bfnrt] | \"u\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])\n"
|
" \"\\\\\" ([\"\\\\/bfnrt] | \"u\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])\n"
|
||||||
" )* \"\\\"\" space"},
|
" )* \"\\\"\" space", {}}},
|
||||||
{"null", "\"null\" space"}
|
{"null", {"\"null\" space", {}}},
|
||||||
};
|
};
|
||||||
std::vector<std::string> OBJECT_RULE_NAMES = {"object", "array", "string", "number", "boolean", "null", "value"};
|
|
||||||
|
|
||||||
std::unordered_map<std::string, std::string> DATE_RULES = {
|
std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = {
|
||||||
{"date", "[0-9] [0-9] [0-9] [0-9] \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )"},
|
{"date", {"[0-9] [0-9] [0-9] [0-9] \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}},
|
||||||
{"time", "([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9] [0-9] [0-9] )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )"},
|
{"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9] [0-9] [0-9] )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}},
|
||||||
{"date-time", "date \"T\" time"},
|
{"date-time", {"date \"T\" time", {"date", "time"}}},
|
||||||
{"date-string", "\"\\\"\" date \"\\\"\" space"},
|
{"date-string", {"\"\\\"\" date \"\\\"\" space", {"date"}}},
|
||||||
{"time-string", "\"\\\"\" time \"\\\"\" space"},
|
{"time-string", {"\"\\\"\" time \"\\\"\" space", {"time"}}},
|
||||||
{"date-time-string", "\"\\\"\" date-time \"\\\"\" space"}
|
{"date-time-string", {"\"\\\"\" date-time \"\\\"\" space", {"date-time"}}}
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool is_reserved_name(const std::string & name) {
|
static bool is_reserved_name(const std::string & name) {
|
||||||
|
@ -47,7 +51,7 @@ static bool is_reserved_name(const std::string & name) {
|
||||||
if (RESERVED_NAMES.empty()) {
|
if (RESERVED_NAMES.empty()) {
|
||||||
RESERVED_NAMES.insert("root");
|
RESERVED_NAMES.insert("root");
|
||||||
for (const auto &p : PRIMITIVE_RULES) RESERVED_NAMES.insert(p.first);
|
for (const auto &p : PRIMITIVE_RULES) RESERVED_NAMES.insert(p.first);
|
||||||
for (const auto &p : DATE_RULES) RESERVED_NAMES.insert(p.first);
|
for (const auto &p : STRING_FORMAT_RULES) RESERVED_NAMES.insert(p.first);
|
||||||
}
|
}
|
||||||
return RESERVED_NAMES.find(name) != RESERVED_NAMES.end();
|
return RESERVED_NAMES.find(name) != RESERVED_NAMES.end();
|
||||||
}
|
}
|
||||||
|
@ -424,7 +428,7 @@ private:
|
||||||
if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get<bool>())) {
|
if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get<bool>())) {
|
||||||
std::string sub_name = name + (name.empty() ? "" : "-") + "additional";
|
std::string sub_name = name + (name.empty() ? "" : "-") + "additional";
|
||||||
std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value");
|
std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value");
|
||||||
std::string kv_rule = _add_rule(sub_name + "-kv", _add_rule("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule);
|
std::string kv_rule = _add_rule(sub_name + "-kv", _add_primitive("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule);
|
||||||
prop_kv_rule_names["*"] = kv_rule;
|
prop_kv_rule_names["*"] = kv_rule;
|
||||||
optional_props.push_back("*");
|
optional_props.push_back("*");
|
||||||
}
|
}
|
||||||
|
@ -486,6 +490,25 @@ private:
|
||||||
return rule;
|
return rule;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string _add_primitive(const std::string & name, const BuiltinRule & rule) {
|
||||||
|
auto n = _add_rule(name, rule.content);
|
||||||
|
for (const auto & dep : rule.deps) {
|
||||||
|
BuiltinRule dep_rule;
|
||||||
|
auto it = PRIMITIVE_RULES.find(dep);
|
||||||
|
if (it == PRIMITIVE_RULES.end()) {
|
||||||
|
it = STRING_FORMAT_RULES.find(dep);
|
||||||
|
if (it == STRING_FORMAT_RULES.end()) {
|
||||||
|
_errors.push_back("Rule " + dep + " not known");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (_rules.find(dep) == _rules.end()) {
|
||||||
|
_add_primitive(dep, it->second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
SchemaConverter(
|
SchemaConverter(
|
||||||
const std::function<json(const std::string &)> & fetch_json,
|
const std::function<json(const std::string &)> & fetch_json,
|
||||||
|
@ -672,24 +695,19 @@ public:
|
||||||
} else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
|
} else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
|
||||||
return _visit_pattern(schema["pattern"], rule_name);
|
return _visit_pattern(schema["pattern"], rule_name);
|
||||||
} else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
|
} else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
|
||||||
return _add_rule(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid"));
|
return _add_primitive(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid"));
|
||||||
} else if ((schema_type.is_null() || schema_type == "string") && DATE_RULES.find(schema_format) != DATE_RULES.end()) {
|
} else if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) {
|
||||||
for (const auto & kv : DATE_RULES) {
|
auto prim_name = schema_format + "-string";
|
||||||
_add_rule(kv.first, kv.second);
|
return _add_rule(rule_name, _add_primitive(prim_name, STRING_FORMAT_RULES.at(prim_name)));
|
||||||
}
|
|
||||||
return schema_format + "-string";
|
|
||||||
} else if (schema.empty() || schema_type == "object") {
|
} else if (schema.empty() || schema_type == "object") {
|
||||||
for (const auto & n : OBJECT_RULE_NAMES) {
|
return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
|
||||||
_add_rule(n, PRIMITIVE_RULES.at(n));
|
|
||||||
}
|
|
||||||
return _add_rule(rule_name, "object");
|
|
||||||
} else {
|
} else {
|
||||||
if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
|
if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
|
||||||
_errors.push_back("Unrecognized schema: " + schema.dump());
|
_errors.push_back("Unrecognized schema: " + schema.dump());
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
||||||
return _add_rule(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
|
return _add_primitive(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,37 +6,44 @@ import re
|
||||||
import sys
|
import sys
|
||||||
from typing import Any, Dict, List, Set, Tuple, Union
|
from typing import Any, Dict, List, Set, Tuple, Union
|
||||||
|
|
||||||
|
class BuiltinRule:
|
||||||
|
def __init__(self, content: str, deps: list[str] = None):
|
||||||
|
self.content = content
|
||||||
|
self.deps = deps or []
|
||||||
|
|
||||||
# whitespace is constrained to a single space char to prevent model "running away" in
|
# whitespace is constrained to a single space char to prevent model "running away" in
|
||||||
# whitespace. Also maybe improves generation quality?
|
# whitespace. Also maybe improves generation quality?
|
||||||
SPACE_RULE = '" "?'
|
SPACE_RULE = '" "?'
|
||||||
|
|
||||||
PRIMITIVE_RULES = {
|
PRIMITIVE_RULES = {
|
||||||
'boolean': '("true" | "false") space',
|
'boolean' : BuiltinRule('("true" | "false") space', []),
|
||||||
'number': '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space',
|
'number' : BuiltinRule('("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space', []),
|
||||||
'integer': '("-"? ([0-9] | [1-9] [0-9]*)) space',
|
'integer' : BuiltinRule('("-"? ([0-9] | [1-9] [0-9]*)) space', []),
|
||||||
'value' : 'object | array | string | number | boolean',
|
'value' : BuiltinRule('object | array | string | number | boolean | null', ['object', 'array', 'string', 'number', 'boolean', 'null']),
|
||||||
'object' : '"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space',
|
'object' : BuiltinRule('"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', ['string', 'value']),
|
||||||
'array' : '"[" space ( value ("," space value)* )? "]" space',
|
'array' : BuiltinRule('"[" space ( value ("," space value)* )? "]" space', ['value']),
|
||||||
'uuid' : '"\\"" ' + ' "-" '.join('[0-9a-fA-F]' * n for n in [8, 4, 4, 4, 12]) + ' "\\"" space',
|
'uuid' : BuiltinRule('"\\"" ' + ' "-" '.join('[0-9a-fA-F]' * n for n in [8, 4, 4, 4, 12]) + ' "\\"" space', []),
|
||||||
'string': r''' "\"" (
|
'string' : BuiltinRule(r''' "\"" (
|
||||||
[^"\\] |
|
[^"\\] |
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
)* "\"" space''',
|
)* "\"" space''', []),
|
||||||
'null': '"null" space',
|
'null' : BuiltinRule('"null" space', []),
|
||||||
}
|
}
|
||||||
OBJECT_RULE_NAMES = ['object', 'array', 'string', 'number', 'boolean', 'null', 'value']
|
|
||||||
|
|
||||||
# TODO: support "uri", "email" string formats
|
# TODO: support "uri", "email" string formats
|
||||||
DATE_RULES = {
|
STRING_FORMAT_RULES = {
|
||||||
'date' : '[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )',
|
'date' : BuiltinRule('[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', []),
|
||||||
'time' : '([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )',
|
'time' : BuiltinRule('([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', []),
|
||||||
'date-time': 'date "T" time',
|
'date-time' : BuiltinRule('date "T" time', ['date', 'time']),
|
||||||
'date-string': '"\\"" date "\\"" space',
|
'date-string' : BuiltinRule('"\\"" date "\\"" space', ['date']),
|
||||||
'time-string': '"\\"" time "\\"" space',
|
'time-string' : BuiltinRule('"\\"" time "\\"" space', ['time']),
|
||||||
'date-time-string': '"\\"" date-time "\\"" space',
|
'date-time-string': BuiltinRule('"\\"" date-time "\\"" space', ['date-time']),
|
||||||
}
|
}
|
||||||
|
|
||||||
RESERVED_NAMES = set(["root", *PRIMITIVE_RULES.keys(), *DATE_RULES.keys()])
|
DOTALL = '[\\U00000000-\\U0010FFFF]'
|
||||||
|
DOT = '[\\U00000000-\\x09\\x0B\\x0C\\x0E-\\U0010FFFF]'
|
||||||
|
|
||||||
|
RESERVED_NAMES = set(["root", "dot", *PRIMITIVE_RULES.keys(), *STRING_FORMAT_RULES.keys()])
|
||||||
|
|
||||||
INVALID_RULE_CHARS_RE = re.compile(r'[^a-zA-Z0-9-]+')
|
INVALID_RULE_CHARS_RE = re.compile(r'[^a-zA-Z0-9-]+')
|
||||||
GRAMMAR_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"]')
|
GRAMMAR_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"]')
|
||||||
|
@ -46,8 +53,6 @@ GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']'
|
||||||
NON_LITERAL_SET = set('|.()[]{}*+?')
|
NON_LITERAL_SET = set('|.()[]{}*+?')
|
||||||
ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('[]()|{}*+?')
|
ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('[]()|{}*+?')
|
||||||
|
|
||||||
DATE_PATTERN = '[0-9]{4}-(0[1-9]|1[0-2])-([0-2][0-9]|3[0-1])'
|
|
||||||
TIME_PATTERN = '([01][0-9]|2[0-3])(:[0-5][0-9]){2}(\\.[0-9]{1,3})?(Z|[+-](([01][0-9]|2[0-3]):[0-5][0-9]))' # Cap millisecond precision w/ 3 digits
|
|
||||||
|
|
||||||
class SchemaConverter:
|
class SchemaConverter:
|
||||||
def __init__(self, *, prop_order, allow_fetch, dotall, raw_pattern):
|
def __init__(self, *, prop_order, allow_fetch, dotall, raw_pattern):
|
||||||
|
@ -55,7 +60,9 @@ class SchemaConverter:
|
||||||
self._allow_fetch = allow_fetch
|
self._allow_fetch = allow_fetch
|
||||||
self._dotall = dotall
|
self._dotall = dotall
|
||||||
self._raw_pattern = raw_pattern
|
self._raw_pattern = raw_pattern
|
||||||
self._rules = {'space': SPACE_RULE}
|
self._rules = {
|
||||||
|
'space': SPACE_RULE,
|
||||||
|
}
|
||||||
self._refs = {}
|
self._refs = {}
|
||||||
self._refs_being_resolved = set()
|
self._refs_being_resolved = set()
|
||||||
|
|
||||||
|
@ -65,6 +72,29 @@ class SchemaConverter:
|
||||||
)
|
)
|
||||||
return f'"{escaped}"'
|
return f'"{escaped}"'
|
||||||
|
|
||||||
|
def not_literal(self, literal: str, dotall: bool = True, maybe_escaped_underscores = False) -> str:
|
||||||
|
'''
|
||||||
|
not_literal('a') -> '[^a]'
|
||||||
|
not_literal('abc') -> '([^a] | "a" ([^b] | "b" ([^c])?)?)?'
|
||||||
|
'''
|
||||||
|
assert len(literal) > 0, 'Empty literal not supported'
|
||||||
|
def recurse(i: int):
|
||||||
|
c = literal[i]
|
||||||
|
if maybe_escaped_underscores and c == '_':
|
||||||
|
yield f'[^{c}\\\\]'
|
||||||
|
yield ' | '
|
||||||
|
yield f'"\\\\"? "{c}"'
|
||||||
|
else:
|
||||||
|
yield f'[^{c}]'
|
||||||
|
if i < len(literal) - 1:
|
||||||
|
yield ' | '
|
||||||
|
yield self._format_literal(c)
|
||||||
|
yield ' ('
|
||||||
|
yield from recurse(i + 1)
|
||||||
|
yield ')?'
|
||||||
|
|
||||||
|
return ''.join(('(', *recurse(0), ')'))
|
||||||
|
|
||||||
def _add_rule(self, name, rule):
|
def _add_rule(self, name, rule):
|
||||||
esc_name = INVALID_RULE_CHARS_RE.sub('-', name)
|
esc_name = INVALID_RULE_CHARS_RE.sub('-', name)
|
||||||
if esc_name not in self._rules or self._rules[esc_name] == rule:
|
if esc_name not in self._rules or self._rules[esc_name] == rule:
|
||||||
|
@ -169,10 +199,10 @@ class SchemaConverter:
|
||||||
|
|
||||||
def get_dot():
|
def get_dot():
|
||||||
if self._dotall:
|
if self._dotall:
|
||||||
rule = '[\\U00000000-\\U0010FFFF]'
|
rule = DOTALL
|
||||||
else:
|
else:
|
||||||
# Accept any character... except \n and \r line break chars (\x0A and \xOD)
|
# Accept any character... except \n and \r line break chars (\x0A and \xOD)
|
||||||
rule = '[\\U00000000-\\x09\\x0B\\x0C\\x0E-\\U0010FFFF]'
|
rule = DOT
|
||||||
return self._add_rule(f'dot', rule)
|
return self._add_rule(f'dot', rule)
|
||||||
|
|
||||||
def join_seq():
|
def join_seq():
|
||||||
|
@ -394,28 +424,32 @@ class SchemaConverter:
|
||||||
return self._visit_pattern(schema['pattern'], rule_name)
|
return self._visit_pattern(schema['pattern'], rule_name)
|
||||||
|
|
||||||
elif schema_type in (None, 'string') and re.match(r'^uuid[1-5]?$', schema_format or ''):
|
elif schema_type in (None, 'string') and re.match(r'^uuid[1-5]?$', schema_format or ''):
|
||||||
return self._add_rule(
|
return self._add_primitive(
|
||||||
'root' if rule_name == 'root' else schema_format,
|
'root' if rule_name == 'root' else schema_format,
|
||||||
PRIMITIVE_RULES['uuid']
|
PRIMITIVE_RULES['uuid']
|
||||||
)
|
)
|
||||||
|
|
||||||
elif schema_type in (None, 'string') and schema_format in DATE_RULES:
|
elif schema_type in (None, 'string') and f'{schema_format}-string' in STRING_FORMAT_RULES:
|
||||||
for t, r in DATE_RULES.items():
|
prim_name = f'{schema_format}-string'
|
||||||
self._add_rule(t, r)
|
return self._add_rule(rule_name, self._add_primitive(prim_name, STRING_FORMAT_RULES[prim_name]))
|
||||||
return schema_format + '-string'
|
|
||||||
|
|
||||||
elif (schema_type == 'object') or (len(schema) == 0):
|
elif (schema_type == 'object') or (len(schema) == 0):
|
||||||
for n in OBJECT_RULE_NAMES:
|
return self._add_rule(rule_name, self._add_primitive('object', PRIMITIVE_RULES['object']))
|
||||||
self._add_rule(n, PRIMITIVE_RULES[n])
|
|
||||||
return self._add_rule(rule_name, 'object')
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}'
|
assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}'
|
||||||
# TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
# TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
||||||
return self._add_rule(
|
return self._add_primitive('root' if rule_name == 'root' else schema_type, PRIMITIVE_RULES[schema_type])
|
||||||
'root' if rule_name == 'root' else schema_type,
|
|
||||||
PRIMITIVE_RULES[schema_type]
|
def _add_primitive(self, name: str, rule: BuiltinRule):
|
||||||
)
|
n = self._add_rule(name, rule.content)
|
||||||
|
|
||||||
|
for dep in rule.deps:
|
||||||
|
dep_rule = PRIMITIVE_RULES.get(dep) or STRING_FORMAT_RULES.get(dep)
|
||||||
|
assert dep_rule, f'Rule {dep} not known'
|
||||||
|
if dep not in self._rules:
|
||||||
|
self._add_primitive(dep, dep_rule)
|
||||||
|
return n
|
||||||
|
|
||||||
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]):
|
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]):
|
||||||
prop_order = self._prop_order
|
prop_order = self._prop_order
|
||||||
|
@ -437,7 +471,7 @@ class SchemaConverter:
|
||||||
value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value')
|
value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value')
|
||||||
prop_kv_rule_names["*"] = self._add_rule(
|
prop_kv_rule_names["*"] = self._add_rule(
|
||||||
f'{sub_name}-kv',
|
f'{sub_name}-kv',
|
||||||
self._add_rule('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}'
|
self._add_primitive('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}'
|
||||||
)
|
)
|
||||||
optional_props.append("*")
|
optional_props.append("*")
|
||||||
|
|
||||||
|
@ -547,4 +581,4 @@ def main(args_in = None):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|
0
examples/server/copy.mjs.hpp
Normal file
0
examples/server/copy.mjs.hpp
Normal file
0
examples/server/json-schema-to-grammar.hpp
Normal file
0
examples/server/json-schema-to-grammar.hpp
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,33 +1,39 @@
|
||||||
// WARNING: This file was ported from json-schema-to-grammar.py, please fix bugs / add features there first.
|
// WARNING: This file was ported from json_schema_to_grammar.py, please fix bugs / add features there first.
|
||||||
const SPACE_RULE = '" "?';
|
const SPACE_RULE = '" "?';
|
||||||
|
|
||||||
|
class BuiltinRule {
|
||||||
|
constructor(content, deps) {
|
||||||
|
this.content = content;
|
||||||
|
this.deps = deps || [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const PRIMITIVE_RULES = {
|
const PRIMITIVE_RULES = {
|
||||||
boolean: '("true" | "false") space',
|
boolean : new BuiltinRule('("true" | "false") space', []),
|
||||||
number: '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space',
|
number : new BuiltinRule('("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space', []),
|
||||||
integer: '("-"? ([0-9] | [1-9] [0-9]*)) space',
|
integer : new BuiltinRule('("-"? ([0-9] | [1-9] [0-9]*)) space', []),
|
||||||
value: 'object | array | string | number | boolean',
|
value : new BuiltinRule('object | array | string | number | boolean | null', ['object', 'array', 'string', 'number', 'boolean', 'null']),
|
||||||
object: '"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space',
|
object : new BuiltinRule('"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', ['string', 'value']),
|
||||||
array: '"[" space ( value ("," space value)* )? "]" space',
|
array : new BuiltinRule('"[" space ( value ("," space value)* )? "]" space', ['value']),
|
||||||
uuid: '"\\"" ' + [8, 4, 4, 4, 12].map(n => [...new Array(n)].map(_ => '[0-9a-fA-F]').join('')).join(' "-" ') + ' "\\"" space',
|
uuid : new BuiltinRule('"\\"" ' + [8, 4, 4, 4, 12].map(n => [...new Array(n)].map(_ => '[0-9a-fA-F]').join('')).join(' "-" ') + ' "\\"" space', []),
|
||||||
string: ` "\\"" (
|
string : new BuiltinRule(` "\\"" (
|
||||||
[^"\\\\] |
|
[^"\\\\] |
|
||||||
"\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
"\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
)* "\\"" space`,
|
)* "\\"" space`, []),
|
||||||
null: '"null" space',
|
null: new BuiltinRule('"null" space', []),
|
||||||
};
|
};
|
||||||
const OBJECT_RULE_NAMES = ['object', 'array', 'string', 'number', 'boolean', 'null', 'value'];
|
|
||||||
|
|
||||||
// TODO: support "uri", "email" string formats
|
// TODO: support "uri", "email" string formats
|
||||||
const DATE_RULES = {
|
const STRING_FORMAT_RULES = {
|
||||||
'date' : '[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )',
|
'date' : new BuiltinRule('[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', []),
|
||||||
'time' : '([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )',
|
'time' : new BuiltinRule('([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', []),
|
||||||
'date-time': 'date "T" time',
|
'date-time' : new BuiltinRule('date "T" time', ['date', 'time']),
|
||||||
'date-string': '"\\"" date "\\"" space',
|
'date-string' : new BuiltinRule('"\\"" date "\\"" space', ['date']),
|
||||||
'time-string': '"\\"" time "\\"" space',
|
'time-string' : new BuiltinRule('"\\"" time "\\"" space', ['time']),
|
||||||
'date-time-string': '"\\"" date-time "\\"" space',
|
'date-time-string': new BuiltinRule('"\\"" date-time "\\"" space', ['date-time']),
|
||||||
};
|
}
|
||||||
|
|
||||||
const RESERVED_NAMES = {'root': true, ...PRIMITIVE_RULES, ...DATE_RULES};
|
const RESERVED_NAMES = {'root': true, ...PRIMITIVE_RULES, ...STRING_FORMAT_RULES};
|
||||||
|
|
||||||
const INVALID_RULE_CHARS_RE = /[^\dA-Za-z-]+/g;
|
const INVALID_RULE_CHARS_RE = /[^\dA-Za-z-]+/g;
|
||||||
const GRAMMAR_LITERAL_ESCAPE_RE = /[\n\r"]/g;
|
const GRAMMAR_LITERAL_ESCAPE_RE = /[\n\r"]/g;
|
||||||
|
@ -415,28 +421,38 @@ export class SchemaConverter {
|
||||||
} else if ((schemaType === undefined || schemaType === 'string') && 'pattern' in schema) {
|
} else if ((schemaType === undefined || schemaType === 'string') && 'pattern' in schema) {
|
||||||
return this._visitPattern(schema.pattern, ruleName);
|
return this._visitPattern(schema.pattern, ruleName);
|
||||||
} else if ((schemaType === undefined || schemaType === 'string') && /^uuid[1-5]?$/.test(schema.format || '')) {
|
} else if ((schemaType === undefined || schemaType === 'string') && /^uuid[1-5]?$/.test(schema.format || '')) {
|
||||||
return this._addRule(
|
return this._addPrimitive(
|
||||||
ruleName === 'root' ? 'root' : schemaFormat,
|
ruleName === 'root' ? 'root' : schemaFormat,
|
||||||
PRIMITIVE_RULES['uuid'])
|
PRIMITIVE_RULES['uuid']
|
||||||
} else if ((schemaType === undefined || schemaType === 'string') && schema.format in DATE_RULES) {
|
);
|
||||||
for (const [t, r] of Object.entries(DATE_RULES)) {
|
} else if ((schemaType === undefined || schemaType === 'string') && `${schema.format}-string` in STRING_FORMAT_RULES) {
|
||||||
this._addRule(t, r);
|
const primName = `${schema.format}-string`
|
||||||
}
|
return this._addRule(ruleName, this._addPrimitive(primName, STRING_FORMAT_RULES[primName]));
|
||||||
return schemaFormat + '-string';
|
|
||||||
} else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
|
} else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
|
||||||
for (const n of OBJECT_RULE_NAMES) {
|
return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object']));
|
||||||
this._addRule(n, PRIMITIVE_RULES[n]);
|
|
||||||
}
|
|
||||||
return this._addRule(ruleName, 'object');
|
|
||||||
} else {
|
} else {
|
||||||
if (!(schemaType in PRIMITIVE_RULES)) {
|
if (!(schemaType in PRIMITIVE_RULES)) {
|
||||||
throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);
|
throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);
|
||||||
}
|
}
|
||||||
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
||||||
return this._addRule(ruleName === 'root' ? 'root' : schemaType, PRIMITIVE_RULES[schemaType]);
|
return this._addPrimitive(ruleName === 'root' ? 'root' : schemaType, PRIMITIVE_RULES[schemaType]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_addPrimitive(name, rule) {
|
||||||
|
let n = this._addRule(name, rule.content);
|
||||||
|
for (const dep of rule.deps) {
|
||||||
|
const depRule = PRIMITIVE_RULES[dep] || STRING_FORMAT_RULES[dep];
|
||||||
|
if (!depRule) {
|
||||||
|
throw new Error(`Rule ${dep} not known`);
|
||||||
|
}
|
||||||
|
if (!(dep in this._rules)) {
|
||||||
|
this._addPrimitive(dep, depRule);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
_buildObjectRule(properties, required, name, additionalProperties) {
|
_buildObjectRule(properties, required, name, additionalProperties) {
|
||||||
const propOrder = this._propOrder;
|
const propOrder = this._propOrder;
|
||||||
// sort by position in prop_order (if specified) then by original order
|
// sort by position in prop_order (if specified) then by original order
|
||||||
|
@ -462,7 +478,7 @@ export class SchemaConverter {
|
||||||
const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`);
|
const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`);
|
||||||
propKvRuleNames['*'] = this._addRule(
|
propKvRuleNames['*'] = this._addRule(
|
||||||
`${subName}-kv`,
|
`${subName}-kv`,
|
||||||
`${this._addRule('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`);
|
`${this._addPrimitive('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`);
|
||||||
optionalProps.push('*');
|
optionalProps.push('*');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -113,7 +113,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||||
[^"\\] |
|
[^"\\] |
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
)* "\"" space
|
)* "\"" space
|
||||||
value ::= object | array | string | number | boolean
|
value ::= object | array | string | number | boolean | null
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -133,10 +133,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||||
date-string ::= "\"" date "\"" space
|
date-string ::= "\"" date "\"" space
|
||||||
date-time ::= date "T" time
|
date-time ::= date "T" time
|
||||||
date-time-string ::= "\"" date-time "\"" space
|
date-time-string ::= "\"" date-time "\"" space
|
||||||
root ::= "[" space date-string "," space uuid "," space time-string "," space date-time-string "]" space
|
root ::= "[" space tuple-0 "," space uuid "," space tuple-2 "," space tuple-3 "]" space
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
time ::= ([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )
|
time ::= ([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )
|
||||||
time-string ::= "\"" time "\"" space
|
time-string ::= "\"" time "\"" space
|
||||||
|
tuple-0 ::= date-string
|
||||||
|
tuple-2 ::= time-string
|
||||||
|
tuple-3 ::= date-time-string
|
||||||
uuid ::= "\"" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "\"" space
|
uuid ::= "\"" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "\"" space
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
@ -529,7 +532,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||||
[^"\\] |
|
[^"\\] |
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
)* "\"" space
|
)* "\"" space
|
||||||
value ::= object | array | string | number | boolean
|
value ::= object | array | string | number | boolean | null
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -551,7 +554,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||||
[^"\\] |
|
[^"\\] |
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
)* "\"" space
|
)* "\"" space
|
||||||
value ::= object | array | string | number | boolean
|
value ::= object | array | string | number | boolean | null
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue