json: deps management for primitive rules (+ allow null values)
This commit is contained in:
parent
f771a8f1b5
commit
159b883bd4
7 changed files with 1820 additions and 1669 deletions
|
@ -13,33 +13,37 @@ using json = nlohmann::ordered_json;
|
|||
|
||||
const std::string SPACE_RULE = "\" \"?";
|
||||
|
||||
std::unordered_map<std::string, std::string> PRIMITIVE_RULES = {
|
||||
{"boolean", "(\"true\" | \"false\") space"},
|
||||
{"number", "(\"-\"? ([0-9] | [1-9] [0-9]*)) (\".\" [0-9]+)? ([eE] [-+]? [0-9]+)? space"},
|
||||
{"integer", "(\"-\"? ([0-9] | [1-9] [0-9]*)) space"},
|
||||
{"value", "object | array | string | number | boolean"},
|
||||
{"object", "\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space"},
|
||||
{"array", "\"[\" space ( value (\",\" space value)* )? \"]\" space"},
|
||||
{"uuid", "\"\\\"\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||
struct BuiltinRule {
|
||||
std::string content;
|
||||
std::vector<std::string> deps;
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
|
||||
{"boolean", {"(\"true\" | \"false\") space", {}}},
|
||||
{"number", {"(\"-\"? ([0-9] | [1-9] [0-9]*)) (\".\" [0-9]+)? ([eE] [-+]? [0-9]+)? space", {}}},
|
||||
{"integer", {"(\"-\"? ([0-9] | [1-9] [0-9]*)) space", {}}},
|
||||
{"value", {"object | array | string | number | boolean | null", {"object", "array", "string", "number", "boolean", "null"}}},
|
||||
{"object", {"\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space", {"string", "value"}}},
|
||||
{"array", {"\"[\" space ( value (\",\" space value)* )? \"]\" space", {"value"}}},
|
||||
{"uuid", {"\"\\\"\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] \"\\\"\" space"},
|
||||
{"string", " \"\\\"\" (\n"
|
||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] \"\\\"\" space", {}}},
|
||||
{"string", {" \"\\\"\" (\n"
|
||||
" [^\"\\\\] |\n"
|
||||
" \"\\\\\" ([\"\\\\/bfnrt] | \"u\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])\n"
|
||||
" )* \"\\\"\" space"},
|
||||
{"null", "\"null\" space"}
|
||||
" )* \"\\\"\" space", {}}},
|
||||
{"null", {"\"null\" space", {}}},
|
||||
};
|
||||
std::vector<std::string> OBJECT_RULE_NAMES = {"object", "array", "string", "number", "boolean", "null", "value"};
|
||||
|
||||
std::unordered_map<std::string, std::string> DATE_RULES = {
|
||||
{"date", "[0-9] [0-9] [0-9] [0-9] \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )"},
|
||||
{"time", "([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9] [0-9] [0-9] )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )"},
|
||||
{"date-time", "date \"T\" time"},
|
||||
{"date-string", "\"\\\"\" date \"\\\"\" space"},
|
||||
{"time-string", "\"\\\"\" time \"\\\"\" space"},
|
||||
{"date-time-string", "\"\\\"\" date-time \"\\\"\" space"}
|
||||
std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = {
|
||||
{"date", {"[0-9] [0-9] [0-9] [0-9] \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}},
|
||||
{"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9] [0-9] [0-9] )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}},
|
||||
{"date-time", {"date \"T\" time", {"date", "time"}}},
|
||||
{"date-string", {"\"\\\"\" date \"\\\"\" space", {"date"}}},
|
||||
{"time-string", {"\"\\\"\" time \"\\\"\" space", {"time"}}},
|
||||
{"date-time-string", {"\"\\\"\" date-time \"\\\"\" space", {"date-time"}}}
|
||||
};
|
||||
|
||||
static bool is_reserved_name(const std::string & name) {
|
||||
|
@ -47,7 +51,7 @@ static bool is_reserved_name(const std::string & name) {
|
|||
if (RESERVED_NAMES.empty()) {
|
||||
RESERVED_NAMES.insert("root");
|
||||
for (const auto &p : PRIMITIVE_RULES) RESERVED_NAMES.insert(p.first);
|
||||
for (const auto &p : DATE_RULES) RESERVED_NAMES.insert(p.first);
|
||||
for (const auto &p : STRING_FORMAT_RULES) RESERVED_NAMES.insert(p.first);
|
||||
}
|
||||
return RESERVED_NAMES.find(name) != RESERVED_NAMES.end();
|
||||
}
|
||||
|
@ -424,7 +428,7 @@ private:
|
|||
if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get<bool>())) {
|
||||
std::string sub_name = name + (name.empty() ? "" : "-") + "additional";
|
||||
std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value");
|
||||
std::string kv_rule = _add_rule(sub_name + "-kv", _add_rule("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule);
|
||||
std::string kv_rule = _add_rule(sub_name + "-kv", _add_primitive("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule);
|
||||
prop_kv_rule_names["*"] = kv_rule;
|
||||
optional_props.push_back("*");
|
||||
}
|
||||
|
@ -486,6 +490,25 @@ private:
|
|||
return rule;
|
||||
}
|
||||
|
||||
std::string _add_primitive(const std::string & name, const BuiltinRule & rule) {
|
||||
auto n = _add_rule(name, rule.content);
|
||||
for (const auto & dep : rule.deps) {
|
||||
BuiltinRule dep_rule;
|
||||
auto it = PRIMITIVE_RULES.find(dep);
|
||||
if (it == PRIMITIVE_RULES.end()) {
|
||||
it = STRING_FORMAT_RULES.find(dep);
|
||||
if (it == STRING_FORMAT_RULES.end()) {
|
||||
_errors.push_back("Rule " + dep + " not known");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (_rules.find(dep) == _rules.end()) {
|
||||
_add_primitive(dep, it->second);
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
public:
|
||||
SchemaConverter(
|
||||
const std::function<json(const std::string &)> & fetch_json,
|
||||
|
@ -672,24 +695,19 @@ public:
|
|||
} else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
|
||||
return _visit_pattern(schema["pattern"], rule_name);
|
||||
} else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
|
||||
return _add_rule(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid"));
|
||||
} else if ((schema_type.is_null() || schema_type == "string") && DATE_RULES.find(schema_format) != DATE_RULES.end()) {
|
||||
for (const auto & kv : DATE_RULES) {
|
||||
_add_rule(kv.first, kv.second);
|
||||
}
|
||||
return schema_format + "-string";
|
||||
return _add_primitive(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid"));
|
||||
} else if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) {
|
||||
auto prim_name = schema_format + "-string";
|
||||
return _add_rule(rule_name, _add_primitive(prim_name, STRING_FORMAT_RULES.at(prim_name)));
|
||||
} else if (schema.empty() || schema_type == "object") {
|
||||
for (const auto & n : OBJECT_RULE_NAMES) {
|
||||
_add_rule(n, PRIMITIVE_RULES.at(n));
|
||||
}
|
||||
return _add_rule(rule_name, "object");
|
||||
return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
|
||||
} else {
|
||||
if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
|
||||
_errors.push_back("Unrecognized schema: " + schema.dump());
|
||||
return "";
|
||||
}
|
||||
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
||||
return _add_rule(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
|
||||
return _add_primitive(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -6,37 +6,44 @@ import re
|
|||
import sys
|
||||
from typing import Any, Dict, List, Set, Tuple, Union
|
||||
|
||||
class BuiltinRule:
|
||||
def __init__(self, content: str, deps: list[str] = None):
|
||||
self.content = content
|
||||
self.deps = deps or []
|
||||
|
||||
# whitespace is constrained to a single space char to prevent model "running away" in
|
||||
# whitespace. Also maybe improves generation quality?
|
||||
SPACE_RULE = '" "?'
|
||||
|
||||
PRIMITIVE_RULES = {
|
||||
'boolean': '("true" | "false") space',
|
||||
'number': '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space',
|
||||
'integer': '("-"? ([0-9] | [1-9] [0-9]*)) space',
|
||||
'value' : 'object | array | string | number | boolean',
|
||||
'object' : '"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space',
|
||||
'array' : '"[" space ( value ("," space value)* )? "]" space',
|
||||
'uuid' : '"\\"" ' + ' "-" '.join('[0-9a-fA-F]' * n for n in [8, 4, 4, 4, 12]) + ' "\\"" space',
|
||||
'string': r''' "\"" (
|
||||
'boolean' : BuiltinRule('("true" | "false") space', []),
|
||||
'number' : BuiltinRule('("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space', []),
|
||||
'integer' : BuiltinRule('("-"? ([0-9] | [1-9] [0-9]*)) space', []),
|
||||
'value' : BuiltinRule('object | array | string | number | boolean | null', ['object', 'array', 'string', 'number', 'boolean', 'null']),
|
||||
'object' : BuiltinRule('"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', ['string', 'value']),
|
||||
'array' : BuiltinRule('"[" space ( value ("," space value)* )? "]" space', ['value']),
|
||||
'uuid' : BuiltinRule('"\\"" ' + ' "-" '.join('[0-9a-fA-F]' * n for n in [8, 4, 4, 4, 12]) + ' "\\"" space', []),
|
||||
'string' : BuiltinRule(r''' "\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space''',
|
||||
'null': '"null" space',
|
||||
)* "\"" space''', []),
|
||||
'null' : BuiltinRule('"null" space', []),
|
||||
}
|
||||
OBJECT_RULE_NAMES = ['object', 'array', 'string', 'number', 'boolean', 'null', 'value']
|
||||
|
||||
# TODO: support "uri", "email" string formats
|
||||
DATE_RULES = {
|
||||
'date' : '[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )',
|
||||
'time' : '([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )',
|
||||
'date-time': 'date "T" time',
|
||||
'date-string': '"\\"" date "\\"" space',
|
||||
'time-string': '"\\"" time "\\"" space',
|
||||
'date-time-string': '"\\"" date-time "\\"" space',
|
||||
STRING_FORMAT_RULES = {
|
||||
'date' : BuiltinRule('[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', []),
|
||||
'time' : BuiltinRule('([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', []),
|
||||
'date-time' : BuiltinRule('date "T" time', ['date', 'time']),
|
||||
'date-string' : BuiltinRule('"\\"" date "\\"" space', ['date']),
|
||||
'time-string' : BuiltinRule('"\\"" time "\\"" space', ['time']),
|
||||
'date-time-string': BuiltinRule('"\\"" date-time "\\"" space', ['date-time']),
|
||||
}
|
||||
|
||||
RESERVED_NAMES = set(["root", *PRIMITIVE_RULES.keys(), *DATE_RULES.keys()])
|
||||
DOTALL = '[\\U00000000-\\U0010FFFF]'
|
||||
DOT = '[\\U00000000-\\x09\\x0B\\x0C\\x0E-\\U0010FFFF]'
|
||||
|
||||
RESERVED_NAMES = set(["root", "dot", *PRIMITIVE_RULES.keys(), *STRING_FORMAT_RULES.keys()])
|
||||
|
||||
INVALID_RULE_CHARS_RE = re.compile(r'[^a-zA-Z0-9-]+')
|
||||
GRAMMAR_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"]')
|
||||
|
@ -46,8 +53,6 @@ GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']'
|
|||
NON_LITERAL_SET = set('|.()[]{}*+?')
|
||||
ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('[]()|{}*+?')
|
||||
|
||||
DATE_PATTERN = '[0-9]{4}-(0[1-9]|1[0-2])-([0-2][0-9]|3[0-1])'
|
||||
TIME_PATTERN = '([01][0-9]|2[0-3])(:[0-5][0-9]){2}(\\.[0-9]{1,3})?(Z|[+-](([01][0-9]|2[0-3]):[0-5][0-9]))' # Cap millisecond precision w/ 3 digits
|
||||
|
||||
class SchemaConverter:
|
||||
def __init__(self, *, prop_order, allow_fetch, dotall, raw_pattern):
|
||||
|
@ -55,7 +60,9 @@ class SchemaConverter:
|
|||
self._allow_fetch = allow_fetch
|
||||
self._dotall = dotall
|
||||
self._raw_pattern = raw_pattern
|
||||
self._rules = {'space': SPACE_RULE}
|
||||
self._rules = {
|
||||
'space': SPACE_RULE,
|
||||
}
|
||||
self._refs = {}
|
||||
self._refs_being_resolved = set()
|
||||
|
||||
|
@ -65,6 +72,29 @@ class SchemaConverter:
|
|||
)
|
||||
return f'"{escaped}"'
|
||||
|
||||
def not_literal(self, literal: str, dotall: bool = True, maybe_escaped_underscores = False) -> str:
|
||||
'''
|
||||
not_literal('a') -> '[^a]'
|
||||
not_literal('abc') -> '([^a] | "a" ([^b] | "b" ([^c])?)?)?'
|
||||
'''
|
||||
assert len(literal) > 0, 'Empty literal not supported'
|
||||
def recurse(i: int):
|
||||
c = literal[i]
|
||||
if maybe_escaped_underscores and c == '_':
|
||||
yield f'[^{c}\\\\]'
|
||||
yield ' | '
|
||||
yield f'"\\\\"? "{c}"'
|
||||
else:
|
||||
yield f'[^{c}]'
|
||||
if i < len(literal) - 1:
|
||||
yield ' | '
|
||||
yield self._format_literal(c)
|
||||
yield ' ('
|
||||
yield from recurse(i + 1)
|
||||
yield ')?'
|
||||
|
||||
return ''.join(('(', *recurse(0), ')'))
|
||||
|
||||
def _add_rule(self, name, rule):
|
||||
esc_name = INVALID_RULE_CHARS_RE.sub('-', name)
|
||||
if esc_name not in self._rules or self._rules[esc_name] == rule:
|
||||
|
@ -169,10 +199,10 @@ class SchemaConverter:
|
|||
|
||||
def get_dot():
|
||||
if self._dotall:
|
||||
rule = '[\\U00000000-\\U0010FFFF]'
|
||||
rule = DOTALL
|
||||
else:
|
||||
# Accept any character... except \n and \r line break chars (\x0A and \xOD)
|
||||
rule = '[\\U00000000-\\x09\\x0B\\x0C\\x0E-\\U0010FFFF]'
|
||||
rule = DOT
|
||||
return self._add_rule(f'dot', rule)
|
||||
|
||||
def join_seq():
|
||||
|
@ -394,28 +424,32 @@ class SchemaConverter:
|
|||
return self._visit_pattern(schema['pattern'], rule_name)
|
||||
|
||||
elif schema_type in (None, 'string') and re.match(r'^uuid[1-5]?$', schema_format or ''):
|
||||
return self._add_rule(
|
||||
return self._add_primitive(
|
||||
'root' if rule_name == 'root' else schema_format,
|
||||
PRIMITIVE_RULES['uuid']
|
||||
)
|
||||
|
||||
elif schema_type in (None, 'string') and schema_format in DATE_RULES:
|
||||
for t, r in DATE_RULES.items():
|
||||
self._add_rule(t, r)
|
||||
return schema_format + '-string'
|
||||
elif schema_type in (None, 'string') and f'{schema_format}-string' in STRING_FORMAT_RULES:
|
||||
prim_name = f'{schema_format}-string'
|
||||
return self._add_rule(rule_name, self._add_primitive(prim_name, STRING_FORMAT_RULES[prim_name]))
|
||||
|
||||
elif (schema_type == 'object') or (len(schema) == 0):
|
||||
for n in OBJECT_RULE_NAMES:
|
||||
self._add_rule(n, PRIMITIVE_RULES[n])
|
||||
return self._add_rule(rule_name, 'object')
|
||||
return self._add_rule(rule_name, self._add_primitive('object', PRIMITIVE_RULES['object']))
|
||||
|
||||
else:
|
||||
assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}'
|
||||
# TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
||||
return self._add_rule(
|
||||
'root' if rule_name == 'root' else schema_type,
|
||||
PRIMITIVE_RULES[schema_type]
|
||||
)
|
||||
return self._add_primitive('root' if rule_name == 'root' else schema_type, PRIMITIVE_RULES[schema_type])
|
||||
|
||||
def _add_primitive(self, name: str, rule: BuiltinRule):
|
||||
n = self._add_rule(name, rule.content)
|
||||
|
||||
for dep in rule.deps:
|
||||
dep_rule = PRIMITIVE_RULES.get(dep) or STRING_FORMAT_RULES.get(dep)
|
||||
assert dep_rule, f'Rule {dep} not known'
|
||||
if dep not in self._rules:
|
||||
self._add_primitive(dep, dep_rule)
|
||||
return n
|
||||
|
||||
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]):
|
||||
prop_order = self._prop_order
|
||||
|
@ -437,7 +471,7 @@ class SchemaConverter:
|
|||
value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value')
|
||||
prop_kv_rule_names["*"] = self._add_rule(
|
||||
f'{sub_name}-kv',
|
||||
self._add_rule('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}'
|
||||
self._add_primitive('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}'
|
||||
)
|
||||
optional_props.append("*")
|
||||
|
||||
|
@ -547,4 +581,4 @@ def main(args_in = None):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
main()
|
||||
|
|
0
examples/server/copy.mjs.hpp
Normal file
0
examples/server/copy.mjs.hpp
Normal file
0
examples/server/json-schema-to-grammar.hpp
Normal file
0
examples/server/json-schema-to-grammar.hpp
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,33 +1,39 @@
|
|||
// WARNING: This file was ported from json-schema-to-grammar.py, please fix bugs / add features there first.
|
||||
// WARNING: This file was ported from json_schema_to_grammar.py, please fix bugs / add features there first.
|
||||
const SPACE_RULE = '" "?';
|
||||
|
||||
class BuiltinRule {
|
||||
constructor(content, deps) {
|
||||
this.content = content;
|
||||
this.deps = deps || [];
|
||||
}
|
||||
}
|
||||
|
||||
const PRIMITIVE_RULES = {
|
||||
boolean: '("true" | "false") space',
|
||||
number: '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space',
|
||||
integer: '("-"? ([0-9] | [1-9] [0-9]*)) space',
|
||||
value: 'object | array | string | number | boolean',
|
||||
object: '"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space',
|
||||
array: '"[" space ( value ("," space value)* )? "]" space',
|
||||
uuid: '"\\"" ' + [8, 4, 4, 4, 12].map(n => [...new Array(n)].map(_ => '[0-9a-fA-F]').join('')).join(' "-" ') + ' "\\"" space',
|
||||
string: ` "\\"" (
|
||||
boolean : new BuiltinRule('("true" | "false") space', []),
|
||||
number : new BuiltinRule('("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space', []),
|
||||
integer : new BuiltinRule('("-"? ([0-9] | [1-9] [0-9]*)) space', []),
|
||||
value : new BuiltinRule('object | array | string | number | boolean | null', ['object', 'array', 'string', 'number', 'boolean', 'null']),
|
||||
object : new BuiltinRule('"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', ['string', 'value']),
|
||||
array : new BuiltinRule('"[" space ( value ("," space value)* )? "]" space', ['value']),
|
||||
uuid : new BuiltinRule('"\\"" ' + [8, 4, 4, 4, 12].map(n => [...new Array(n)].map(_ => '[0-9a-fA-F]').join('')).join(' "-" ') + ' "\\"" space', []),
|
||||
string : new BuiltinRule(` "\\"" (
|
||||
[^"\\\\] |
|
||||
"\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\\"" space`,
|
||||
null: '"null" space',
|
||||
)* "\\"" space`, []),
|
||||
null: new BuiltinRule('"null" space', []),
|
||||
};
|
||||
const OBJECT_RULE_NAMES = ['object', 'array', 'string', 'number', 'boolean', 'null', 'value'];
|
||||
|
||||
// TODO: support "uri", "email" string formats
|
||||
const DATE_RULES = {
|
||||
'date' : '[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )',
|
||||
'time' : '([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )',
|
||||
'date-time': 'date "T" time',
|
||||
'date-string': '"\\"" date "\\"" space',
|
||||
'time-string': '"\\"" time "\\"" space',
|
||||
'date-time-string': '"\\"" date-time "\\"" space',
|
||||
};
|
||||
const STRING_FORMAT_RULES = {
|
||||
'date' : new BuiltinRule('[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', []),
|
||||
'time' : new BuiltinRule('([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', []),
|
||||
'date-time' : new BuiltinRule('date "T" time', ['date', 'time']),
|
||||
'date-string' : new BuiltinRule('"\\"" date "\\"" space', ['date']),
|
||||
'time-string' : new BuiltinRule('"\\"" time "\\"" space', ['time']),
|
||||
'date-time-string': new BuiltinRule('"\\"" date-time "\\"" space', ['date-time']),
|
||||
}
|
||||
|
||||
const RESERVED_NAMES = {'root': true, ...PRIMITIVE_RULES, ...DATE_RULES};
|
||||
const RESERVED_NAMES = {'root': true, ...PRIMITIVE_RULES, ...STRING_FORMAT_RULES};
|
||||
|
||||
const INVALID_RULE_CHARS_RE = /[^\dA-Za-z-]+/g;
|
||||
const GRAMMAR_LITERAL_ESCAPE_RE = /[\n\r"]/g;
|
||||
|
@ -415,28 +421,38 @@ export class SchemaConverter {
|
|||
} else if ((schemaType === undefined || schemaType === 'string') && 'pattern' in schema) {
|
||||
return this._visitPattern(schema.pattern, ruleName);
|
||||
} else if ((schemaType === undefined || schemaType === 'string') && /^uuid[1-5]?$/.test(schema.format || '')) {
|
||||
return this._addRule(
|
||||
ruleName === 'root' ? 'root' : schemaFormat,
|
||||
PRIMITIVE_RULES['uuid'])
|
||||
} else if ((schemaType === undefined || schemaType === 'string') && schema.format in DATE_RULES) {
|
||||
for (const [t, r] of Object.entries(DATE_RULES)) {
|
||||
this._addRule(t, r);
|
||||
}
|
||||
return schemaFormat + '-string';
|
||||
return this._addPrimitive(
|
||||
ruleName === 'root' ? 'root' : schemaFormat,
|
||||
PRIMITIVE_RULES['uuid']
|
||||
);
|
||||
} else if ((schemaType === undefined || schemaType === 'string') && `${schema.format}-string` in STRING_FORMAT_RULES) {
|
||||
const primName = `${schema.format}-string`
|
||||
return this._addRule(ruleName, this._addPrimitive(primName, STRING_FORMAT_RULES[primName]));
|
||||
} else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
|
||||
for (const n of OBJECT_RULE_NAMES) {
|
||||
this._addRule(n, PRIMITIVE_RULES[n]);
|
||||
}
|
||||
return this._addRule(ruleName, 'object');
|
||||
return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object']));
|
||||
} else {
|
||||
if (!(schemaType in PRIMITIVE_RULES)) {
|
||||
throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);
|
||||
}
|
||||
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
||||
return this._addRule(ruleName === 'root' ? 'root' : schemaType, PRIMITIVE_RULES[schemaType]);
|
||||
return this._addPrimitive(ruleName === 'root' ? 'root' : schemaType, PRIMITIVE_RULES[schemaType]);
|
||||
}
|
||||
}
|
||||
|
||||
_addPrimitive(name, rule) {
|
||||
let n = this._addRule(name, rule.content);
|
||||
for (const dep of rule.deps) {
|
||||
const depRule = PRIMITIVE_RULES[dep] || STRING_FORMAT_RULES[dep];
|
||||
if (!depRule) {
|
||||
throw new Error(`Rule ${dep} not known`);
|
||||
}
|
||||
if (!(dep in this._rules)) {
|
||||
this._addPrimitive(dep, depRule);
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
_buildObjectRule(properties, required, name, additionalProperties) {
|
||||
const propOrder = this._propOrder;
|
||||
// sort by position in prop_order (if specified) then by original order
|
||||
|
@ -462,7 +478,7 @@ export class SchemaConverter {
|
|||
const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`);
|
||||
propKvRuleNames['*'] = this._addRule(
|
||||
`${subName}-kv`,
|
||||
`${this._addRule('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`);
|
||||
`${this._addPrimitive('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`);
|
||||
optionalProps.push('*');
|
||||
}
|
||||
|
||||
|
|
|
@ -113,7 +113,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space
|
||||
value ::= object | array | string | number | boolean
|
||||
value ::= object | array | string | number | boolean | null
|
||||
)"""
|
||||
});
|
||||
|
||||
|
@ -133,10 +133,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||
date-string ::= "\"" date "\"" space
|
||||
date-time ::= date "T" time
|
||||
date-time-string ::= "\"" date-time "\"" space
|
||||
root ::= "[" space date-string "," space uuid "," space time-string "," space date-time-string "]" space
|
||||
root ::= "[" space tuple-0 "," space uuid "," space tuple-2 "," space tuple-3 "]" space
|
||||
space ::= " "?
|
||||
time ::= ([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )
|
||||
time-string ::= "\"" time "\"" space
|
||||
tuple-0 ::= date-string
|
||||
tuple-2 ::= time-string
|
||||
tuple-3 ::= date-time-string
|
||||
uuid ::= "\"" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "\"" space
|
||||
)"""
|
||||
});
|
||||
|
@ -529,7 +532,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space
|
||||
value ::= object | array | string | number | boolean
|
||||
value ::= object | array | string | number | boolean | null
|
||||
)"""
|
||||
});
|
||||
|
||||
|
@ -551,7 +554,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||
)* "\"" space
|
||||
value ::= object | array | string | number | boolean
|
||||
value ::= object | array | string | number | boolean | null
|
||||
)"""
|
||||
});
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue