json
: fix nested $ref
s & allow mix of properties & anyOf (https://github.com/ggerganov/llama.cpp/issues/8073)
This commit is contained in:
parent
cb0b06a8a6
commit
d63c953185
12 changed files with 427 additions and 374 deletions
|
@ -8,6 +8,7 @@
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
using json = nlohmann::ordered_json;
|
using json = nlohmann::ordered_json;
|
||||||
|
|
||||||
|
@ -392,10 +393,10 @@ private:
|
||||||
std::function<json(const std::string &)> _fetch_json;
|
std::function<json(const std::string &)> _fetch_json;
|
||||||
bool _dotall;
|
bool _dotall;
|
||||||
std::map<std::string, std::string> _rules;
|
std::map<std::string, std::string> _rules;
|
||||||
std::unordered_map<std::string, json> _refs;
|
|
||||||
std::unordered_set<std::string> _refs_being_resolved;
|
|
||||||
std::vector<std::string> _errors;
|
std::vector<std::string> _errors;
|
||||||
std::vector<std::string> _warnings;
|
std::vector<std::string> _warnings;
|
||||||
|
std::unordered_map<std::string, json> _external_refs;
|
||||||
|
std::vector<json> _ref_context;
|
||||||
|
|
||||||
std::string _add_rule(const std::string & name, const std::string & rule) {
|
std::string _add_rule(const std::string & name, const std::string & rule) {
|
||||||
std::string esc_name = regex_replace(name, INVALID_RULE_CHARS_RE, "-");
|
std::string esc_name = regex_replace(name, INVALID_RULE_CHARS_RE, "-");
|
||||||
|
@ -683,17 +684,6 @@ private:
|
||||||
return out.str();
|
return out.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string _resolve_ref(const std::string & ref) {
|
|
||||||
std::string ref_name = ref.substr(ref.find_last_of('/') + 1);
|
|
||||||
if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) {
|
|
||||||
_refs_being_resolved.insert(ref);
|
|
||||||
json resolved = _refs[ref];
|
|
||||||
ref_name = visit(resolved, ref_name);
|
|
||||||
_refs_being_resolved.erase(ref);
|
|
||||||
}
|
|
||||||
return ref_name;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string _build_object_rule(
|
std::string _build_object_rule(
|
||||||
const std::vector<std::pair<std::string, json>> & properties,
|
const std::vector<std::pair<std::string, json>> & properties,
|
||||||
const std::unordered_set<std::string> & required,
|
const std::unordered_set<std::string> & required,
|
||||||
|
@ -815,78 +805,79 @@ public:
|
||||||
_rules["space"] = SPACE_RULE;
|
_rules["space"] = SPACE_RULE;
|
||||||
}
|
}
|
||||||
|
|
||||||
void resolve_refs(json & schema, const std::string & url) {
|
|
||||||
/*
|
|
||||||
* Resolves all $ref fields in the given schema, fetching any remote schemas,
|
|
||||||
* replacing each $ref with absolute reference URL and populates _refs with the
|
|
||||||
* respective referenced (sub)schema dictionaries.
|
|
||||||
*/
|
|
||||||
std::function<void(json &)> visit_refs = [&](json & n) {
|
|
||||||
if (n.is_array()) {
|
|
||||||
for (auto & x : n) {
|
|
||||||
visit_refs(x);
|
|
||||||
}
|
|
||||||
} else if (n.is_object()) {
|
|
||||||
if (n.contains("$ref")) {
|
|
||||||
std::string ref = n["$ref"];
|
|
||||||
if (_refs.find(ref) == _refs.end()) {
|
|
||||||
json target;
|
|
||||||
if (ref.find("https://") == 0) {
|
|
||||||
std::string base_url = ref.substr(0, ref.find('#'));
|
|
||||||
auto it = _refs.find(base_url);
|
|
||||||
if (it != _refs.end()) {
|
|
||||||
target = it->second;
|
|
||||||
} else {
|
|
||||||
// Fetch the referenced schema and resolve its refs
|
|
||||||
auto referenced = _fetch_json(ref);
|
|
||||||
resolve_refs(referenced, base_url);
|
|
||||||
_refs[base_url] = referenced;
|
|
||||||
}
|
|
||||||
if (ref.find('#') == std::string::npos || ref.substr(ref.find('#') + 1).empty()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
} else if (ref.find("#/") == 0) {
|
|
||||||
target = schema;
|
|
||||||
n["$ref"] = url + ref;
|
|
||||||
ref = url + ref;
|
|
||||||
} else {
|
|
||||||
_errors.push_back("Unsupported ref: " + ref);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
std::string pointer = ref.substr(ref.find('#') + 1);
|
|
||||||
std::vector<std::string> tokens = split(pointer, "/");
|
|
||||||
for (size_t i = 1; i < tokens.size(); ++i) {
|
|
||||||
std::string sel = tokens[i];
|
|
||||||
if (target.is_null() || !target.contains(sel)) {
|
|
||||||
_errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
target = target[sel];
|
|
||||||
}
|
|
||||||
_refs[ref] = target;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (auto & kv : n.items()) {
|
|
||||||
visit_refs(kv.value());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
visit_refs(schema);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string _generate_constant_rule(const json & value) {
|
std::string _generate_constant_rule(const json & value) {
|
||||||
return format_literal(value.dump());
|
return format_literal(value.dump());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ResolvedRef {
|
||||||
|
json target;
|
||||||
|
std::string name;
|
||||||
|
bool is_local;
|
||||||
|
};
|
||||||
|
|
||||||
|
ResolvedRef _resolve_ref(const std::string & ref) {
|
||||||
|
auto parts = split(ref, "#");
|
||||||
|
if (parts.size() != 2) {
|
||||||
|
_errors.push_back("Unsupported ref: " + ref);
|
||||||
|
return {json(), "", false};
|
||||||
|
}
|
||||||
|
const auto & url = parts[0];
|
||||||
|
json target;
|
||||||
|
bool is_local = url.empty();
|
||||||
|
if (is_local) {
|
||||||
|
if (_ref_context.empty()) {
|
||||||
|
_errors.push_back("Error resolving ref " + ref + ": no context");
|
||||||
|
return {json(), "", false};
|
||||||
|
}
|
||||||
|
target = _ref_context.back();
|
||||||
|
} else {
|
||||||
|
auto it = _external_refs.find(url);
|
||||||
|
if (it != _external_refs.end()) {
|
||||||
|
target = it->second;
|
||||||
|
} else {
|
||||||
|
// Fetch the referenced schema and resolve its refs
|
||||||
|
target = _fetch_json(url);
|
||||||
|
_external_refs[url] = target;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto tokens = split(parts[1], "/");
|
||||||
|
for (size_t i = 1; i < tokens.size(); ++i) {
|
||||||
|
const auto & sel = tokens[i];
|
||||||
|
if (target.is_null() || !target.contains(sel)) {
|
||||||
|
_errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump());
|
||||||
|
return {json(), "", false};
|
||||||
|
}
|
||||||
|
target = target[sel];
|
||||||
|
}
|
||||||
|
return {target, tokens.empty() ? "" : tokens[tokens.size() - 1], is_local};
|
||||||
|
}
|
||||||
|
|
||||||
std::string visit(const json & schema, const std::string & name) {
|
std::string visit(const json & schema, const std::string & name) {
|
||||||
json schema_type = schema.contains("type") ? schema["type"] : json();
|
json schema_type = schema.contains("type") ? schema["type"] : json();
|
||||||
std::string schema_format = schema.contains("format") ? schema["format"].get<std::string>() : "";
|
std::string schema_format = schema.contains("format") ? schema["format"].get<std::string>() : "";
|
||||||
std::string rule_name = is_reserved_name(name) ? name + "-" : name.empty() ? "root" : name;
|
std::string rule_name = is_reserved_name(name) ? name + "-" : name.empty() ? "root" : name;
|
||||||
|
|
||||||
if (schema.contains("$ref")) {
|
if (_ref_context.empty()) {
|
||||||
return _add_rule(rule_name, _resolve_ref(schema["$ref"]));
|
_ref_context.push_back(schema);
|
||||||
|
auto ret = visit(schema, name);
|
||||||
|
_ref_context.pop_back();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (schema.contains("$ref") && schema["$ref"].is_string()) {
|
||||||
|
const auto & ref = schema["$ref"].get<std::string>();
|
||||||
|
auto resolved = _resolve_ref(ref);
|
||||||
|
if (resolved.target.is_null()) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
if (!resolved.is_local) {
|
||||||
|
_ref_context.push_back(resolved.target);
|
||||||
|
}
|
||||||
|
auto ret = visit(resolved.target, (name.empty() || resolved.name.empty()) ? name : resolved.name);
|
||||||
|
if (!resolved.is_local) {
|
||||||
|
_ref_context.pop_back();
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
} else if (schema.contains("oneOf") || schema.contains("anyOf")) {
|
} else if (schema.contains("oneOf") || schema.contains("anyOf")) {
|
||||||
std::vector<json> alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get<std::vector<json>>() : schema["anyOf"].get<std::vector<json>>();
|
std::vector<json> alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get<std::vector<json>>() : schema["anyOf"].get<std::vector<json>>();
|
||||||
return _add_rule(rule_name, _generate_union_rule(name, alt_schemas));
|
return _add_rule(rule_name, _generate_union_rule(name, alt_schemas));
|
||||||
|
@ -906,55 +897,6 @@ public:
|
||||||
enum_values.push_back(_generate_constant_rule(v));
|
enum_values.push_back(_generate_constant_rule(v));
|
||||||
}
|
}
|
||||||
return _add_rule(rule_name, "(" + join(enum_values.begin(), enum_values.end(), " | ") + ") space");
|
return _add_rule(rule_name, "(" + join(enum_values.begin(), enum_values.end(), " | ") + ") space");
|
||||||
} else if ((schema_type.is_null() || schema_type == "object")
|
|
||||||
&& (schema.contains("properties") ||
|
|
||||||
(schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
|
|
||||||
std::unordered_set<std::string> required;
|
|
||||||
if (schema.contains("required") && schema["required"].is_array()) {
|
|
||||||
for (const auto & item : schema["required"]) {
|
|
||||||
if (item.is_string()) {
|
|
||||||
required.insert(item.get<std::string>());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::vector<std::pair<std::string, json>> properties;
|
|
||||||
if (schema.contains("properties")) {
|
|
||||||
for (const auto & prop : schema["properties"].items()) {
|
|
||||||
properties.emplace_back(prop.key(), prop.value());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return _add_rule(rule_name,
|
|
||||||
_build_object_rule(
|
|
||||||
properties, required, name,
|
|
||||||
schema.contains("additionalProperties") ? schema["additionalProperties"] : json()));
|
|
||||||
} else if ((schema_type.is_null() || schema_type == "object") && schema.contains("allOf")) {
|
|
||||||
std::unordered_set<std::string> required;
|
|
||||||
std::vector<std::pair<std::string, json>> properties;
|
|
||||||
std::string hybrid_name = name;
|
|
||||||
std::function<void(const json &, bool)> add_component = [&](const json & comp_schema, bool is_required) {
|
|
||||||
if (comp_schema.contains("$ref")) {
|
|
||||||
add_component(_refs[comp_schema["$ref"]], is_required);
|
|
||||||
} else if (comp_schema.contains("properties")) {
|
|
||||||
for (const auto & prop : comp_schema["properties"].items()) {
|
|
||||||
properties.emplace_back(prop.key(), prop.value());
|
|
||||||
if (is_required) {
|
|
||||||
required.insert(prop.key());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// todo warning
|
|
||||||
}
|
|
||||||
};
|
|
||||||
for (auto & t : schema["allOf"]) {
|
|
||||||
if (t.contains("anyOf")) {
|
|
||||||
for (auto & tt : t["anyOf"]) {
|
|
||||||
add_component(tt, false);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
add_component(t, true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json()));
|
|
||||||
} else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) {
|
} else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) {
|
||||||
json items = schema.contains("items") ? schema["items"] : schema["prefixItems"];
|
json items = schema.contains("items") ? schema["items"] : schema["prefixItems"];
|
||||||
if (items.is_array()) {
|
if (items.is_array()) {
|
||||||
|
@ -1005,8 +947,71 @@ public:
|
||||||
_build_min_max_int(min_value, max_value, out);
|
_build_min_max_int(min_value, max_value, out);
|
||||||
out << ") space";
|
out << ") space";
|
||||||
return _add_rule(rule_name, out.str());
|
return _add_rule(rule_name, out.str());
|
||||||
} else if (schema.empty() || schema_type == "object") {
|
} else if ((schema_type.is_null() || schema_type == "object")) {
|
||||||
return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
|
std::unordered_set<std::string> required;
|
||||||
|
std::vector<std::pair<std::string, json>> properties;
|
||||||
|
auto is_explicit_object = schema_type == "object";
|
||||||
|
json additional_properties;
|
||||||
|
if (schema.contains("additionalProperties")) {
|
||||||
|
is_explicit_object = true;
|
||||||
|
additional_properties = schema["additionalProperties"];
|
||||||
|
}
|
||||||
|
if (schema.contains("properties") && schema["properties"].is_object()) {
|
||||||
|
is_explicit_object = true;
|
||||||
|
for (const auto & prop : schema["properties"].items()) {
|
||||||
|
if (prop.value().is_object()) {
|
||||||
|
properties.emplace_back(prop.key(), prop.value());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (schema.contains("required") && schema["required"].is_array()) {
|
||||||
|
for (const auto & item : schema["required"]) {
|
||||||
|
if (item.is_string()) {
|
||||||
|
required.insert(item.get<std::string>());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (schema.contains("allOf") && schema["allOf"].is_array()) {
|
||||||
|
std::function<void(const json &, bool)> add_component = [&](const json & comp_schema, bool is_required) {
|
||||||
|
if (comp_schema.contains("$ref") && comp_schema["$ref"].is_string()) {
|
||||||
|
auto resolved = _resolve_ref(comp_schema["$ref"].get<std::string>());
|
||||||
|
add_component(resolved.target, is_required);
|
||||||
|
} else if (comp_schema.contains("properties")) {
|
||||||
|
for (const auto & prop : comp_schema["properties"].items()) {
|
||||||
|
properties.emplace_back(prop.key(), prop.value());
|
||||||
|
if (is_required) {
|
||||||
|
required.insert(prop.key());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (comp_schema.contains("additionalProperties")) {
|
||||||
|
if (additional_properties.is_null()) {
|
||||||
|
additional_properties = comp_schema["additionalProperties"];
|
||||||
|
} else if (additional_properties != comp_schema["additionalProperties"]) {
|
||||||
|
_warnings.push_back("Inconsistent additionalProperties in allOf");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// todo warning
|
||||||
|
}
|
||||||
|
};
|
||||||
|
for (auto & t : schema["allOf"]) {
|
||||||
|
if (t.contains("anyOf")) {
|
||||||
|
for (auto & tt : t["anyOf"]) {
|
||||||
|
add_component(tt, false);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
add_component(t, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (properties.empty() && (additional_properties == true || additional_properties.is_null())) {
|
||||||
|
return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
|
||||||
|
}
|
||||||
|
auto default_additional_properties = is_explicit_object ? json() : json(false);
|
||||||
|
return _add_rule(rule_name,
|
||||||
|
_build_object_rule(
|
||||||
|
properties, required, name,
|
||||||
|
additional_properties.is_null() ? default_additional_properties : additional_properties));
|
||||||
} else {
|
} else {
|
||||||
if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
|
if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
|
||||||
_errors.push_back("Unrecognized schema: " + schema.dump());
|
_errors.push_back("Unrecognized schema: " + schema.dump());
|
||||||
|
@ -1038,7 +1043,6 @@ public:
|
||||||
std::string json_schema_to_grammar(const json & schema) {
|
std::string json_schema_to_grammar(const json & schema) {
|
||||||
SchemaConverter converter([](const std::string &) { return json::object(); }, /* dotall= */ false);
|
SchemaConverter converter([](const std::string &) { return json::object(); }, /* dotall= */ false);
|
||||||
auto copy = schema;
|
auto copy = schema;
|
||||||
converter.resolve_refs(copy, "input");
|
|
||||||
converter.visit(copy, "");
|
converter.visit(copy, "");
|
||||||
converter.check_errors();
|
converter.check_errors();
|
||||||
return converter.format_grammar();
|
return converter.format_grammar();
|
||||||
|
|
|
@ -243,8 +243,8 @@ class SchemaConverter:
|
||||||
self._rules = {
|
self._rules = {
|
||||||
'space': SPACE_RULE,
|
'space': SPACE_RULE,
|
||||||
}
|
}
|
||||||
self._refs = {}
|
self._external_refs = {}
|
||||||
self._refs_being_resolved = set()
|
self._ref_context = []
|
||||||
|
|
||||||
def _format_literal(self, literal):
|
def _format_literal(self, literal):
|
||||||
escaped = GRAMMAR_LITERAL_ESCAPE_RE.sub(
|
escaped = GRAMMAR_LITERAL_ESCAPE_RE.sub(
|
||||||
|
@ -332,51 +332,6 @@ class SchemaConverter:
|
||||||
self._rules[key] = rule
|
self._rules[key] = rule
|
||||||
return key
|
return key
|
||||||
|
|
||||||
def resolve_refs(self, schema: dict, url: str):
|
|
||||||
'''
|
|
||||||
Resolves all $ref fields in the given schema, fetching any remote schemas,
|
|
||||||
replacing $ref with absolute reference URL and populating self._refs with the
|
|
||||||
respective referenced (sub)schema dictionaries.
|
|
||||||
'''
|
|
||||||
def visit(n: dict):
|
|
||||||
if isinstance(n, list):
|
|
||||||
return [visit(x) for x in n]
|
|
||||||
elif isinstance(n, dict):
|
|
||||||
ref = n.get('$ref')
|
|
||||||
if ref is not None and ref not in self._refs:
|
|
||||||
if ref.startswith('https://'):
|
|
||||||
assert self._allow_fetch, 'Fetching remote schemas is not allowed (use --allow-fetch for force)'
|
|
||||||
import requests
|
|
||||||
|
|
||||||
frag_split = ref.split('#')
|
|
||||||
base_url = frag_split[0]
|
|
||||||
|
|
||||||
target = self._refs.get(base_url)
|
|
||||||
if target is None:
|
|
||||||
target = self.resolve_refs(requests.get(ref).json(), base_url)
|
|
||||||
self._refs[base_url] = target
|
|
||||||
|
|
||||||
if len(frag_split) == 1 or frag_split[-1] == '':
|
|
||||||
return target
|
|
||||||
elif ref.startswith('#/'):
|
|
||||||
target = schema
|
|
||||||
ref = f'{url}{ref}'
|
|
||||||
n['$ref'] = ref
|
|
||||||
else:
|
|
||||||
raise ValueError(f'Unsupported ref {ref}')
|
|
||||||
|
|
||||||
for sel in ref.split('#')[-1].split('/')[1:]:
|
|
||||||
assert target is not None and sel in target, f'Error resolving ref {ref}: {sel} not in {target}'
|
|
||||||
target = target[sel]
|
|
||||||
|
|
||||||
self._refs[ref] = target
|
|
||||||
else:
|
|
||||||
for v in n.values():
|
|
||||||
visit(v)
|
|
||||||
|
|
||||||
return n
|
|
||||||
return visit(schema)
|
|
||||||
|
|
||||||
def _generate_union_rule(self, name, alt_schemas):
|
def _generate_union_rule(self, name, alt_schemas):
|
||||||
return ' | '.join((
|
return ' | '.join((
|
||||||
self.visit(alt_schema, f'{name}{"-" if name else "alternative-"}{i}')
|
self.visit(alt_schema, f'{name}{"-" if name else "alternative-"}{i}')
|
||||||
|
@ -541,25 +496,59 @@ class SchemaConverter:
|
||||||
else "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space")
|
else "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space")
|
||||||
|
|
||||||
|
|
||||||
def _resolve_ref(self, ref):
|
|
||||||
ref_name = ref.split('/')[-1]
|
|
||||||
if ref_name not in self._rules and ref not in self._refs_being_resolved:
|
|
||||||
self._refs_being_resolved.add(ref)
|
|
||||||
resolved = self._refs[ref]
|
|
||||||
ref_name = self.visit(resolved, ref_name)
|
|
||||||
self._refs_being_resolved.remove(ref)
|
|
||||||
return ref_name
|
|
||||||
|
|
||||||
def _generate_constant_rule(self, value):
|
def _generate_constant_rule(self, value):
|
||||||
return self._format_literal(json.dumps(value))
|
return self._format_literal(json.dumps(value))
|
||||||
|
|
||||||
|
class ResolvedRef:
|
||||||
|
def __init__(self, target: Any, name: str, is_local: bool):
|
||||||
|
self.target = target
|
||||||
|
self.name = name
|
||||||
|
self.is_local = is_local
|
||||||
|
|
||||||
|
def _resolve_ref(self, ref: str):
|
||||||
|
parts = ref.split('#')
|
||||||
|
assert len(parts) == 2, f'Unsupported ref: {ref}'
|
||||||
|
url = parts[0]
|
||||||
|
target = None
|
||||||
|
is_local = not url
|
||||||
|
if is_local:
|
||||||
|
assert self._ref_context, f'Error resolving ref {ref}: no context'
|
||||||
|
target = self._ref_context[-1]
|
||||||
|
else:
|
||||||
|
target = self._external_refs.get(url)
|
||||||
|
if target is None:
|
||||||
|
# Fetch the referenced schema and resolve its refs
|
||||||
|
target = self._fetch_json(url)
|
||||||
|
self._external_refs[url] = target
|
||||||
|
|
||||||
|
tokens = parts[1].split('/')
|
||||||
|
for sel in tokens[1:]:
|
||||||
|
assert target is not None and sel in target, f'Error resolving ref {ref}: {sel} not in {target}'
|
||||||
|
target = target[sel]
|
||||||
|
|
||||||
|
return self.ResolvedRef(target, tokens[-1] if tokens else '', is_local)
|
||||||
|
|
||||||
def visit(self, schema, name):
|
def visit(self, schema, name):
|
||||||
schema_type = schema.get('type')
|
schema_type = schema.get('type')
|
||||||
schema_format = schema.get('format')
|
schema_format = schema.get('format')
|
||||||
rule_name = name + '-' if name in RESERVED_NAMES else name or 'root'
|
rule_name = name + '-' if name in RESERVED_NAMES else name or 'root'
|
||||||
|
|
||||||
|
if not self._ref_context:
|
||||||
|
self._ref_context.append(schema)
|
||||||
|
try:
|
||||||
|
return self.visit(schema, name)
|
||||||
|
finally:
|
||||||
|
self._ref_context.pop()
|
||||||
|
|
||||||
if (ref := schema.get('$ref')) is not None:
|
if (ref := schema.get('$ref')) is not None:
|
||||||
return self._add_rule(rule_name, self._resolve_ref(ref))
|
resolved = self._resolve_ref(ref)
|
||||||
|
if not resolved.is_local:
|
||||||
|
self._ref_context.append(resolved.target)
|
||||||
|
try:
|
||||||
|
return self.visit(resolved.target, name if name == '' or resolved.name == '' else resolved.name)
|
||||||
|
finally:
|
||||||
|
if not resolved.is_local:
|
||||||
|
self._ref_context.pop()
|
||||||
|
|
||||||
elif 'oneOf' in schema or 'anyOf' in schema:
|
elif 'oneOf' in schema or 'anyOf' in schema:
|
||||||
return self._add_rule(rule_name, self._generate_union_rule(name, schema.get('oneOf') or schema['anyOf']))
|
return self._add_rule(rule_name, self._generate_union_rule(name, schema.get('oneOf') or schema['anyOf']))
|
||||||
|
@ -574,36 +563,6 @@ class SchemaConverter:
|
||||||
rule = '(' + ' | '.join((self._generate_constant_rule(v) for v in schema['enum'])) + ') space'
|
rule = '(' + ' | '.join((self._generate_constant_rule(v) for v in schema['enum'])) + ') space'
|
||||||
return self._add_rule(rule_name, rule)
|
return self._add_rule(rule_name, rule)
|
||||||
|
|
||||||
elif schema_type in (None, 'object') and \
|
|
||||||
('properties' in schema or \
|
|
||||||
('additionalProperties' in schema and schema['additionalProperties'] is not True)):
|
|
||||||
required = set(schema.get('required', []))
|
|
||||||
properties = list(schema.get('properties', {}).items())
|
|
||||||
return self._add_rule(rule_name, self._build_object_rule(properties, required, name, schema.get('additionalProperties')))
|
|
||||||
|
|
||||||
elif schema_type in (None, 'object') and 'allOf' in schema:
|
|
||||||
required = set()
|
|
||||||
properties = []
|
|
||||||
hybrid_name = name
|
|
||||||
def add_component(comp_schema, is_required):
|
|
||||||
if (ref := comp_schema.get('$ref')) is not None:
|
|
||||||
comp_schema = self._refs[ref]
|
|
||||||
|
|
||||||
if 'properties' in comp_schema:
|
|
||||||
for prop_name, prop_schema in comp_schema['properties'].items():
|
|
||||||
properties.append((prop_name, prop_schema))
|
|
||||||
if is_required:
|
|
||||||
required.add(prop_name)
|
|
||||||
|
|
||||||
for t in schema['allOf']:
|
|
||||||
if 'anyOf' in t:
|
|
||||||
for tt in t['anyOf']:
|
|
||||||
add_component(tt, is_required=False)
|
|
||||||
else:
|
|
||||||
add_component(t, is_required=True)
|
|
||||||
|
|
||||||
return self._add_rule(rule_name, self._build_object_rule(properties, required, hybrid_name, additional_properties=[]))
|
|
||||||
|
|
||||||
elif schema_type in (None, 'array') and ('items' in schema or 'prefixItems' in schema):
|
elif schema_type in (None, 'array') and ('items' in schema or 'prefixItems' in schema):
|
||||||
items = schema.get('items') or schema['prefixItems']
|
items = schema.get('items') or schema['prefixItems']
|
||||||
if isinstance(items, list):
|
if isinstance(items, list):
|
||||||
|
@ -658,8 +617,44 @@ class SchemaConverter:
|
||||||
out.append(") space")
|
out.append(") space")
|
||||||
return self._add_rule(rule_name, ''.join(out))
|
return self._add_rule(rule_name, ''.join(out))
|
||||||
|
|
||||||
elif (schema_type == 'object') or (len(schema) == 0):
|
elif (schema_type == 'object') or (schema_type is None):
|
||||||
return self._add_rule(rule_name, self._add_primitive('object', PRIMITIVE_RULES['object']))
|
required = set(schema.get('required', []))
|
||||||
|
properties = list(schema.get('properties', {}).items())
|
||||||
|
is_explicit_object = schema_type == 'object' or 'properties' in schema or 'additionalProperties' in schema
|
||||||
|
additional_properties = schema.get('additionalProperties')
|
||||||
|
|
||||||
|
def add_component(comp_schema, is_required):
|
||||||
|
if (ref := comp_schema.get('$ref')) is not None:
|
||||||
|
resolved = self._resolve_ref(ref)
|
||||||
|
comp_schema = resolved.target
|
||||||
|
|
||||||
|
if 'properties' in comp_schema:
|
||||||
|
for prop_name, prop_schema in comp_schema['properties'].items():
|
||||||
|
properties.append((prop_name, prop_schema))
|
||||||
|
if is_required:
|
||||||
|
required.add(prop_name)
|
||||||
|
if 'additionalProperties' in comp_schema:
|
||||||
|
if additional_properties is None:
|
||||||
|
additional_properties = comp_schema['additionalProperties']
|
||||||
|
elif additional_properties != comp_schema['additionalProperties']:
|
||||||
|
raise ValueError('Inconsistent additionalProperties in allOf')
|
||||||
|
|
||||||
|
for t in schema.get('allOf', []):
|
||||||
|
if 'anyOf' in t:
|
||||||
|
for tt in t['anyOf']:
|
||||||
|
add_component(tt, is_required=False)
|
||||||
|
else:
|
||||||
|
add_component(t, is_required=True)
|
||||||
|
|
||||||
|
if not properties and (additional_properties == True or additional_properties is None):
|
||||||
|
return self._add_rule(rule_name, self._add_primitive('object', PRIMITIVE_RULES['object']))
|
||||||
|
|
||||||
|
default_additional_properties = None if is_explicit_object else False
|
||||||
|
return self._add_rule(
|
||||||
|
rule_name,
|
||||||
|
self._build_object_rule(
|
||||||
|
properties, required, name,
|
||||||
|
additional_properties if additional_properties is not None else default_additional_properties))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}'
|
assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}'
|
||||||
|
@ -800,7 +795,6 @@ def main(args_in = None):
|
||||||
allow_fetch=args.allow_fetch,
|
allow_fetch=args.allow_fetch,
|
||||||
dotall=args.dotall,
|
dotall=args.dotall,
|
||||||
raw_pattern=args.raw_pattern)
|
raw_pattern=args.raw_pattern)
|
||||||
schema = converter.resolve_refs(schema, url)
|
|
||||||
converter.visit(schema, '')
|
converter.visit(schema, '')
|
||||||
print(converter.format_grammar())
|
print(converter.format_grammar())
|
||||||
|
|
||||||
|
|
|
@ -26,9 +26,8 @@ const propOrder = grammarJsonSchemaPropOrder
|
||||||
|
|
||||||
let grammar = null
|
let grammar = null
|
||||||
if (grammarJsonSchemaFile) {
|
if (grammarJsonSchemaFile) {
|
||||||
let schema = JSON.parse(readFileSync(grammarJsonSchemaFile, 'utf-8'))
|
const schema = JSON.parse(readFileSync(grammarJsonSchemaFile, 'utf-8'))
|
||||||
const converter = new SchemaConverter({prop_order: propOrder, allow_fetch: true})
|
const converter = new SchemaConverter({prop_order: propOrder, allow_fetch: true})
|
||||||
schema = await converter.resolveRefs(schema, grammarJsonSchemaFile)
|
|
||||||
converter.visit(schema, '')
|
converter.visit(schema, '')
|
||||||
grammar = converter.formatGrammar()
|
grammar = converter.formatGrammar()
|
||||||
}
|
}
|
||||||
|
|
|
@ -558,14 +558,13 @@ const ConfigForm = (props) => {
|
||||||
const updateGrammarJsonSchemaPropOrder = (el) => grammarJsonSchemaPropOrder.value = el.target.value
|
const updateGrammarJsonSchemaPropOrder = (el) => grammarJsonSchemaPropOrder.value = el.target.value
|
||||||
const convertJSONSchemaGrammar = async () => {
|
const convertJSONSchemaGrammar = async () => {
|
||||||
try {
|
try {
|
||||||
let schema = JSON.parse(params.value.grammar)
|
const schema = JSON.parse(params.value.grammar)
|
||||||
const converter = new SchemaConverter({
|
const converter = new SchemaConverter({
|
||||||
prop_order: grammarJsonSchemaPropOrder.value
|
prop_order: grammarJsonSchemaPropOrder.value
|
||||||
.split(',')
|
.split(',')
|
||||||
.reduce((acc, cur, i) => ({ ...acc, [cur.trim()]: i }), {}),
|
.reduce((acc, cur, i) => ({ ...acc, [cur.trim()]: i }), {}),
|
||||||
allow_fetch: true,
|
allow_fetch: true,
|
||||||
})
|
})
|
||||||
schema = await converter.resolveRefs(schema, 'input')
|
|
||||||
converter.visit(schema, '')
|
converter.visit(schema, '')
|
||||||
params.value = {
|
params.value = {
|
||||||
...params.value,
|
...params.value,
|
||||||
|
|
|
@ -707,14 +707,13 @@
|
||||||
const updateGrammarJsonSchemaPropOrder = (el) => grammarJsonSchemaPropOrder.value = el.target.value
|
const updateGrammarJsonSchemaPropOrder = (el) => grammarJsonSchemaPropOrder.value = el.target.value
|
||||||
const convertJSONSchemaGrammar = async () => {
|
const convertJSONSchemaGrammar = async () => {
|
||||||
try {
|
try {
|
||||||
let schema = JSON.parse(params.value.grammar)
|
const schema = JSON.parse(params.value.grammar)
|
||||||
const converter = new SchemaConverter({
|
const converter = new SchemaConverter({
|
||||||
prop_order: grammarJsonSchemaPropOrder.value
|
prop_order: grammarJsonSchemaPropOrder.value
|
||||||
.split(',')
|
.split(',')
|
||||||
.reduce((acc, cur, i) => ({ ...acc, [cur.trim()]: i }), {}),
|
.reduce((acc, cur, i) => ({ ...acc, [cur.trim()]: i }), {}),
|
||||||
allow_fetch: true,
|
allow_fetch: true,
|
||||||
})
|
})
|
||||||
schema = await converter.resolveRefs(schema, 'input')
|
|
||||||
converter.visit(schema, '')
|
converter.visit(schema, '')
|
||||||
params.value = {
|
params.value = {
|
||||||
...params.value,
|
...params.value,
|
||||||
|
|
|
@ -268,7 +268,8 @@ export class SchemaConverter {
|
||||||
this._dotall = options.dotall || false;
|
this._dotall = options.dotall || false;
|
||||||
this._rules = {'space': SPACE_RULE};
|
this._rules = {'space': SPACE_RULE};
|
||||||
this._refs = {};
|
this._refs = {};
|
||||||
this._refsBeingResolved = new Set();
|
this._externalRefs = new Map();
|
||||||
|
this._refContext = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
_formatLiteral(literal) {
|
_formatLiteral(literal) {
|
||||||
|
@ -306,60 +307,6 @@ export class SchemaConverter {
|
||||||
return key;
|
return key;
|
||||||
}
|
}
|
||||||
|
|
||||||
async resolveRefs(schema, url) {
|
|
||||||
const visit = async (n) => {
|
|
||||||
if (Array.isArray(n)) {
|
|
||||||
return Promise.all(n.map(visit));
|
|
||||||
} else if (typeof n === 'object' && n !== null) {
|
|
||||||
let ref = n.$ref;
|
|
||||||
let target;
|
|
||||||
if (ref !== undefined && !this._refs[ref]) {
|
|
||||||
if (ref.startsWith('https://')) {
|
|
||||||
if (!this._allowFetch) {
|
|
||||||
throw new Error('Fetching remote schemas is not allowed (use --allow-fetch for force)');
|
|
||||||
}
|
|
||||||
const fetch = (await import('node-fetch')).default;
|
|
||||||
|
|
||||||
const fragSplit = ref.split('#');
|
|
||||||
const baseUrl = fragSplit[0];
|
|
||||||
|
|
||||||
target = this._refs[baseUrl];
|
|
||||||
if (!target) {
|
|
||||||
target = await this.resolveRefs(await fetch(ref).then(res => res.json()), baseUrl);
|
|
||||||
this._refs[baseUrl] = target;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fragSplit.length === 1 || fragSplit[fragSplit.length - 1] === '') {
|
|
||||||
return target;
|
|
||||||
}
|
|
||||||
} else if (ref.startsWith('#/')) {
|
|
||||||
target = schema;
|
|
||||||
ref = `${url}${ref}`;
|
|
||||||
n.$ref = ref;
|
|
||||||
} else {
|
|
||||||
throw new Error(`Unsupported ref ${ref}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const selectors = ref.split('#')[1].split('/').slice(1);
|
|
||||||
for (const sel of selectors) {
|
|
||||||
if (!target || !(sel in target)) {
|
|
||||||
throw new Error(`Error resolving ref ${ref}: ${sel} not in ${JSON.stringify(target)}`);
|
|
||||||
}
|
|
||||||
target = target[sel];
|
|
||||||
}
|
|
||||||
|
|
||||||
this._refs[ref] = target;
|
|
||||||
} else {
|
|
||||||
await Promise.all(Object.values(n).map(visit));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return n;
|
|
||||||
};
|
|
||||||
|
|
||||||
return visit(schema);
|
|
||||||
}
|
|
||||||
|
|
||||||
_generateUnionRule(name, altSchemas) {
|
_generateUnionRule(name, altSchemas) {
|
||||||
return altSchemas
|
return altSchemas
|
||||||
.map((altSchema, i) => this.visit(altSchema, `${name ?? ''}${name ? '-' : 'alternative-'}${i}`))
|
.map((altSchema, i) => this.visit(altSchema, `${name ?? ''}${name ? '-' : 'alternative-'}${i}`))
|
||||||
|
@ -590,29 +537,69 @@ export class SchemaConverter {
|
||||||
return out.join('');
|
return out.join('');
|
||||||
}
|
}
|
||||||
|
|
||||||
_resolveRef(ref) {
|
|
||||||
let refName = ref.split('/').pop();
|
|
||||||
if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
|
|
||||||
this._refsBeingResolved.add(ref);
|
|
||||||
const resolved = this._refs[ref];
|
|
||||||
refName = this.visit(resolved, refName);
|
|
||||||
this._refsBeingResolved.delete(ref);
|
|
||||||
}
|
|
||||||
return refName;
|
|
||||||
}
|
|
||||||
|
|
||||||
_generateConstantRule(value) {
|
_generateConstantRule(value) {
|
||||||
return this._formatLiteral(JSON.stringify(value));
|
return this._formatLiteral(JSON.stringify(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_resolveRef(ref) {
|
||||||
|
const parts = ref.split('#');
|
||||||
|
if (parts.length !== 2) {
|
||||||
|
throw new Error(`Unsupported ref: ${ref}`);
|
||||||
|
}
|
||||||
|
const url = parts[0];
|
||||||
|
let target = null;
|
||||||
|
let isLocal = !url;
|
||||||
|
if (isLocal) {
|
||||||
|
if (this._refContext.length === 0) {
|
||||||
|
throw new Error(`Error resolving ref ${ref}: no context`);
|
||||||
|
}
|
||||||
|
target = this._refContext[this._refContext.length - 1];
|
||||||
|
} else {
|
||||||
|
target = this._externalRefs.get(url);
|
||||||
|
if (target === undefined) {
|
||||||
|
// Fetch the referenced schema and resolve its refs
|
||||||
|
target = this._fetchJson(url);
|
||||||
|
this._externalRefs.set(url, target);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const tokens = parts[1].split('/');
|
||||||
|
for (const sel of tokens.slice(1)) {
|
||||||
|
if (target === null || !(sel in target)) {
|
||||||
|
throw new Error(`Error resolving ref ${ref}: ${sel} not in ${JSON.stringify(target)}`);
|
||||||
|
}
|
||||||
|
target = target[sel];
|
||||||
|
}
|
||||||
|
const name = tokens[tokens.length - 1] || '';
|
||||||
|
return {target, name, isLocal};
|
||||||
|
}
|
||||||
|
|
||||||
visit(schema, name) {
|
visit(schema, name) {
|
||||||
const schemaType = schema.type;
|
const schemaType = schema.type;
|
||||||
const schemaFormat = schema.format;
|
const schemaFormat = schema.format;
|
||||||
const ruleName = name in RESERVED_NAMES ? name + '-' : name == '' ? 'root' : name;
|
const ruleName = name in RESERVED_NAMES ? name + '-' : name == '' ? 'root' : name;
|
||||||
|
|
||||||
|
if (this._refContext.length === 0) {
|
||||||
|
this._refContext.push(schema);
|
||||||
|
try {
|
||||||
|
return this.visit(schema, name);
|
||||||
|
} finally {
|
||||||
|
this._refContext.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const ref = schema.$ref;
|
const ref = schema.$ref;
|
||||||
if (ref !== undefined) {
|
if (ref !== undefined) {
|
||||||
return this._addRule(ruleName, this._resolveRef(ref));
|
const resolved = this._resolveRef(ref);
|
||||||
|
if (!resolved.isLocal) {
|
||||||
|
this._refContext.push(resolved.target);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
return this.visit(resolved.target, name === '' || resolved.name === '' ? name : resolved.name);
|
||||||
|
} finally {
|
||||||
|
if (!resolved.isLocal) {
|
||||||
|
this._refContext.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
} else if (schema.oneOf || schema.anyOf) {
|
} else if (schema.oneOf || schema.anyOf) {
|
||||||
return this._addRule(ruleName, this._generateUnionRule(name, schema.oneOf || schema.anyOf));
|
return this._addRule(ruleName, this._generateUnionRule(name, schema.oneOf || schema.anyOf));
|
||||||
} else if (Array.isArray(schemaType)) {
|
} else if (Array.isArray(schemaType)) {
|
||||||
|
@ -622,42 +609,6 @@ export class SchemaConverter {
|
||||||
} else if ('enum' in schema) {
|
} else if ('enum' in schema) {
|
||||||
const rule = '(' + schema.enum.map(v => this._generateConstantRule(v)).join(' | ') + ') space';
|
const rule = '(' + schema.enum.map(v => this._generateConstantRule(v)).join(' | ') + ') space';
|
||||||
return this._addRule(ruleName, rule);
|
return this._addRule(ruleName, rule);
|
||||||
} else if ((schemaType === undefined || schemaType === 'object') &&
|
|
||||||
('properties' in schema ||
|
|
||||||
('additionalProperties' in schema && schema.additionalProperties !== true))) {
|
|
||||||
const required = new Set(schema.required || []);
|
|
||||||
const properties = Object.entries(schema.properties ?? {});
|
|
||||||
return this._addRule(ruleName, this._buildObjectRule(properties, required, name, schema.additionalProperties));
|
|
||||||
} else if ((schemaType === undefined || schemaType === 'object') && 'allOf' in schema) {
|
|
||||||
const required = new Set();
|
|
||||||
const properties = [];
|
|
||||||
const addComponent = (compSchema, isRequired) => {
|
|
||||||
const ref = compSchema.$ref;
|
|
||||||
if (ref !== undefined) {
|
|
||||||
compSchema = this._refs[ref];
|
|
||||||
}
|
|
||||||
|
|
||||||
if ('properties' in compSchema) {
|
|
||||||
for (const [propName, propSchema] of Object.entries(compSchema.properties)) {
|
|
||||||
properties.push([propName, propSchema]);
|
|
||||||
if (isRequired) {
|
|
||||||
required.add(propName);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
for (const t of schema.allOf) {
|
|
||||||
if ('anyOf' in t) {
|
|
||||||
for (const tt of t.anyOf) {
|
|
||||||
addComponent(tt, false);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
addComponent(t, true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return this._addRule(ruleName, this._buildObjectRule(properties, required, name, null));
|
|
||||||
} else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) {
|
} else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) {
|
||||||
const items = schema.items ?? schema.prefixItems;
|
const items = schema.items ?? schema.prefixItems;
|
||||||
if (Array.isArray(items)) {
|
if (Array.isArray(items)) {
|
||||||
|
@ -706,8 +657,58 @@ export class SchemaConverter {
|
||||||
_generateMinMaxInt(minValue, maxValue, out);
|
_generateMinMaxInt(minValue, maxValue, out);
|
||||||
out.push(") space");
|
out.push(") space");
|
||||||
return this._addRule(ruleName, out.join(''));
|
return this._addRule(ruleName, out.join(''));
|
||||||
} else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
|
} else if (schemaType === undefined || schemaType === 'object') {
|
||||||
return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object']));
|
const required = new Set(schema.required || []);
|
||||||
|
const properties = Object.entries(schema.properties ?? {});
|
||||||
|
const isExplicitObject = schemaType === 'object' || 'properties' in schema || 'additionalProperties' in schema;
|
||||||
|
let additionalProperties = schema.additionalProperties;
|
||||||
|
|
||||||
|
const addComponent = (compSchema, isRequired) => {
|
||||||
|
const ref = compSchema.$ref;
|
||||||
|
if (ref !== undefined) {
|
||||||
|
const resolved = this._resolveRef(ref);
|
||||||
|
compSchema = resolved.target;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ('properties' in compSchema) {
|
||||||
|
for (const [propName, propSchema] of Object.entries(compSchema.properties)) {
|
||||||
|
properties.push([propName, propSchema]);
|
||||||
|
if (isRequired) {
|
||||||
|
required.add(propName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ('additionalProperties' in compSchema) {
|
||||||
|
if (additionalProperties === null) {
|
||||||
|
additionalProperties = compSchema.additionalProperties;
|
||||||
|
} else if (additionalProperties !== compSchema.additionalProperties) {
|
||||||
|
throw new Error('Inconsistent additionalProperties in allOf');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if ('allOf' in schema) {
|
||||||
|
for (const t of schema.allOf) {
|
||||||
|
if ('anyOf' in t) {
|
||||||
|
for (const tt of t.anyOf) {
|
||||||
|
addComponent(tt, false);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
addComponent(t, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (properties.length === 0 && (additionalProperties === true || additionalProperties == null)) {
|
||||||
|
return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object']));
|
||||||
|
}
|
||||||
|
|
||||||
|
const defaultAdditionalProperties = isExplicitObject ? null : false;
|
||||||
|
return this._addRule(
|
||||||
|
ruleName,
|
||||||
|
this._buildObjectRule(properties, required, name, additionalProperties ?? defaultAdditionalProperties)
|
||||||
|
);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
if (!(schemaType in PRIMITIVE_RULES)) {
|
if (!(schemaType in PRIMITIVE_RULES)) {
|
||||||
throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);
|
throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);
|
||||||
|
|
|
@ -634,14 +634,13 @@
|
||||||
const updateGrammarJsonSchemaPropOrder = (el) => grammarJsonSchemaPropOrder.value = el.target.value
|
const updateGrammarJsonSchemaPropOrder = (el) => grammarJsonSchemaPropOrder.value = el.target.value
|
||||||
const convertJSONSchemaGrammar = async () => {
|
const convertJSONSchemaGrammar = async () => {
|
||||||
try {
|
try {
|
||||||
let schema = JSON.parse(params.value.grammar)
|
const schema = JSON.parse(params.value.grammar)
|
||||||
const converter = new SchemaConverter({
|
const converter = new SchemaConverter({
|
||||||
prop_order: grammarJsonSchemaPropOrder.value
|
prop_order: grammarJsonSchemaPropOrder.value
|
||||||
.split(',')
|
.split(',')
|
||||||
.reduce((acc, cur, i) => ({ ...acc, [cur.trim()]: i }), {}),
|
.reduce((acc, cur, i) => ({ ...acc, [cur.trim()]: i }), {}),
|
||||||
allow_fetch: true,
|
allow_fetch: true,
|
||||||
})
|
})
|
||||||
schema = await converter.resolveRefs(schema, 'input')
|
|
||||||
converter.visit(schema, '')
|
converter.visit(schema, '')
|
||||||
params.value = {
|
params.value = {
|
||||||
...params.value,
|
...params.value,
|
||||||
|
|
|
@ -637,14 +637,13 @@
|
||||||
const updateGrammarJsonSchemaPropOrder = (el) => grammarJsonSchemaPropOrder.value = el.target.value
|
const updateGrammarJsonSchemaPropOrder = (el) => grammarJsonSchemaPropOrder.value = el.target.value
|
||||||
const convertJSONSchemaGrammar = async () => {
|
const convertJSONSchemaGrammar = async () => {
|
||||||
try {
|
try {
|
||||||
let schema = JSON.parse(params.value.grammar)
|
const schema = JSON.parse(params.value.grammar)
|
||||||
const converter = new SchemaConverter({
|
const converter = new SchemaConverter({
|
||||||
prop_order: grammarJsonSchemaPropOrder.value
|
prop_order: grammarJsonSchemaPropOrder.value
|
||||||
.split(',')
|
.split(',')
|
||||||
.reduce((acc, cur, i) => ({ ...acc, [cur.trim()]: i }), {}),
|
.reduce((acc, cur, i) => ({ ...acc, [cur.trim()]: i }), {}),
|
||||||
allow_fetch: true,
|
allow_fetch: true,
|
||||||
})
|
})
|
||||||
schema = await converter.resolveRefs(schema, 'input')
|
|
||||||
converter.visit(schema, '')
|
converter.visit(schema, '')
|
||||||
params.value = {
|
params.value = {
|
||||||
...params.value,
|
...params.value,
|
||||||
|
|
|
@ -183,10 +183,9 @@ space ::= | " " | "\n" [ \t]{0,20}
|
||||||
Here is also a list of known limitations (contributions welcome):
|
Here is also a list of known limitations (contributions welcome):
|
||||||
|
|
||||||
- Unsupported features are skipped silently. It is currently advised to use the command-line Python converter (see above) to see any warnings, and to inspect the resulting grammar / test it w/ [llama-gbnf-validator](../examples/gbnf-validator/gbnf-validator.cpp).
|
- Unsupported features are skipped silently. It is currently advised to use the command-line Python converter (see above) to see any warnings, and to inspect the resulting grammar / test it w/ [llama-gbnf-validator](../examples/gbnf-validator/gbnf-validator.cpp).
|
||||||
- Can't mix `properties` w/ `anyOf` / `oneOf` in the same type (https://github.com/ggerganov/llama.cpp/issues/7703)
|
- Can't mix `properties` w/ `oneOf` in the same type (https://github.com/ggerganov/llama.cpp/issues/7703)
|
||||||
- [prefixItems](https://json-schema.org/draft/2020-12/json-schema-core#name-prefixitems) is broken (but [items](https://json-schema.org/draft/2020-12/json-schema-core#name-items) works)
|
- [prefixItems](https://json-schema.org/draft/2020-12/json-schema-core#name-prefixitems) is broken (but [items](https://json-schema.org/draft/2020-12/json-schema-core#name-items) works)
|
||||||
- `minimum`, `exclusiveMinimum`, `maximum`, `exclusiveMaximum`: only supported for `"type": "integer"` for now, not `number`
|
- `minimum`, `exclusiveMinimum`, `maximum`, `exclusiveMaximum`: only supported for `"type": "integer"` for now, not `number`
|
||||||
- Nested `$ref`s are broken (https://github.com/ggerganov/llama.cpp/issues/8073)
|
|
||||||
- [pattern](https://json-schema.org/draft/2020-12/json-schema-validation#name-pattern)s must start with `^` and end with `$`
|
- [pattern](https://json-schema.org/draft/2020-12/json-schema-validation#name-pattern)s must start with `^` and end with `$`
|
||||||
- Remote `$ref`s not supported in the C++ version (Python & JavaScript versions fetch https refs)
|
- Remote `$ref`s not supported in the C++ version (Python & JavaScript versions fetch https refs)
|
||||||
- `string` [formats](https://json-schema.org/draft/2020-12/json-schema-validation#name-defined-formats) lack `uri`, `email`
|
- `string` [formats](https://json-schema.org/draft/2020-12/json-schema-validation#name-defined-formats) lack `uri`, `email`
|
||||||
|
|
|
@ -3,8 +3,7 @@ import { SchemaConverter } from "../examples/server/public/json-schema-to-gramma
|
||||||
|
|
||||||
const [, , file] = process.argv
|
const [, , file] = process.argv
|
||||||
const url = `file://${file}`
|
const url = `file://${file}`
|
||||||
let schema = JSON.parse(readFileSync(file, "utf8"));
|
const schema = JSON.parse(readFileSync(file, "utf8"));
|
||||||
const converter = new SchemaConverter({})
|
const converter = new SchemaConverter({})
|
||||||
schema = await converter.resolveRefs(schema, url)
|
|
||||||
converter.visit(schema, '')
|
converter.visit(schema, '')
|
||||||
console.log(converter.formatGrammar())
|
console.log(converter.formatGrammar())
|
||||||
|
|
|
@ -1266,6 +1266,48 @@ static void test_json_schema() {
|
||||||
// R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
|
// R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
test_schema(
|
||||||
|
"refs",
|
||||||
|
// Schema
|
||||||
|
R"""({
|
||||||
|
"type": "array",
|
||||||
|
"minItems": 1,
|
||||||
|
"maxItems": 15,
|
||||||
|
"items": { "$ref": "#/$defs/TALK" },
|
||||||
|
|
||||||
|
"$defs": {
|
||||||
|
"characters": { "enum": ["Biff", "Alice"] },
|
||||||
|
"emotes": { "enum": ["EXCLAMATION", "CONFUSION", "CHEERFUL", "LOVE", "ANGRY"] },
|
||||||
|
|
||||||
|
"TALK": {
|
||||||
|
"type": "object",
|
||||||
|
"required": [ "character", "emote", "dialog" ],
|
||||||
|
"properties": {
|
||||||
|
"character": { "$ref": "#/$defs/characters" },
|
||||||
|
"emote": { "$ref": "#/$defs/emotes" },
|
||||||
|
"dialog": {
|
||||||
|
"type": "string",
|
||||||
|
"minLength": 1,
|
||||||
|
"maxLength": 200
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})""",
|
||||||
|
// Passing strings
|
||||||
|
{
|
||||||
|
R"""([{
|
||||||
|
"character": "Alice",
|
||||||
|
"emote": "EXCLAMATION",
|
||||||
|
"dialog": "Hello, world!"
|
||||||
|
}])""",
|
||||||
|
},
|
||||||
|
// Failing strings
|
||||||
|
{
|
||||||
|
}
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
|
|
|
@ -344,6 +344,48 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test({
|
||||||
|
SUCCESS,
|
||||||
|
"nested $refs (https://github.com/ggerganov/llama.cpp/issues/8073)",
|
||||||
|
R"""({
|
||||||
|
"type": "array",
|
||||||
|
"minItems": 15,
|
||||||
|
"maxItems": 15,
|
||||||
|
"items": { "$ref": "#/$defs/talk" },
|
||||||
|
|
||||||
|
"$defs": {
|
||||||
|
"characters": { "enum": ["Biff", "Alice"] },
|
||||||
|
"emotes": { "enum": ["EXCLAMATION", "CONFUSION", "CHEERFUL", "LOVE", "ANGRY"] },
|
||||||
|
"talk": {
|
||||||
|
"type": "object",
|
||||||
|
"required": [ "character", "emote", "dialog" ],
|
||||||
|
"properties": {
|
||||||
|
"character": { "$ref": "#/$defs/characters" },
|
||||||
|
"emote": { "$ref": "#/$defs/emotes" },
|
||||||
|
"dialog": {
|
||||||
|
"type": "string",
|
||||||
|
"minLength": 1,
|
||||||
|
"maxLength": 200
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})""",
|
||||||
|
R"""(
|
||||||
|
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
|
||||||
|
characters ::= ("\"Biff\"" | "\"Alice\"") space
|
||||||
|
emotes ::= ("\"EXCLAMATION\"" | "\"CONFUSION\"" | "\"CHEERFUL\"" | "\"LOVE\"" | "\"ANGRY\"") space
|
||||||
|
root ::= "[" space talk ("," space talk){14,14} "]" space
|
||||||
|
space ::= | " " | "\n" [ \t]{0,20}
|
||||||
|
talk ::= "{" space talk-character-kv "," space talk-emote-kv "," space talk-dialog-kv "}" space
|
||||||
|
talk-character-kv ::= "\"character\"" space ":" space characters
|
||||||
|
talk-dialog ::= "\"" char{1,200} "\"" space
|
||||||
|
talk-dialog-kv ::= "\"dialog\"" space ":" space talk-dialog
|
||||||
|
talk-emote-kv ::= "\"emote\"" space ":" space emotes
|
||||||
|
)""",
|
||||||
|
});
|
||||||
|
|
||||||
test({
|
test({
|
||||||
SUCCESS,
|
SUCCESS,
|
||||||
"exotic formats",
|
"exotic formats",
|
||||||
|
@ -1090,10 +1132,9 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||||
}
|
}
|
||||||
})""",
|
})""",
|
||||||
R"""(
|
R"""(
|
||||||
|
a-kv ::= "\"a\"" space ":" space string
|
||||||
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
|
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
|
||||||
foo ::= "{" space foo-a-kv "}" space
|
root ::= "{" space a-kv "}" space
|
||||||
foo-a-kv ::= "\"a\"" space ":" space string
|
|
||||||
root ::= foo
|
|
||||||
space ::= | " " | "\n" [ \t]{0,20}
|
space ::= | " " | "\n" [ \t]{0,20}
|
||||||
string ::= "\"" char* "\"" space
|
string ::= "\"" char* "\"" space
|
||||||
)"""
|
)"""
|
||||||
|
@ -1109,39 +1150,27 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||||
],
|
],
|
||||||
"definitions": {
|
"definitions": {
|
||||||
"foo": {
|
"foo": {
|
||||||
"properties": {"a": {"type": "number"}}
|
"properties": {"a": {"type": "number"}},
|
||||||
|
"additionalProperties": false
|
||||||
},
|
},
|
||||||
"bar": {
|
"bar": {
|
||||||
"properties": {"b": {"type": "number"}}
|
"properties": {"b": {"type": "number"}},
|
||||||
|
"additionalProperties": false
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"type": "object"
|
"type": "object",
|
||||||
|
"additionalProperties": false
|
||||||
})""",
|
})""",
|
||||||
R"""(
|
R"""(
|
||||||
alternative-0 ::= foo
|
bar ::= "{" space (bar-b-kv )? "}" space
|
||||||
alternative-1 ::= bar
|
|
||||||
array ::= "[" space ( value ("," space value)* )? "]" space
|
|
||||||
bar ::= "{" space (bar-b-kv bar-b-rest | bar-additional-kv ( "," space bar-additional-kv )* )? "}" space
|
|
||||||
bar-additional-k ::= ["] ( [b] char+ | [^"b] char* )? ["] space
|
|
||||||
bar-additional-kv ::= bar-additional-k ":" space value
|
|
||||||
bar-b-kv ::= "\"b\"" space ":" space number
|
bar-b-kv ::= "\"b\"" space ":" space number
|
||||||
bar-b-rest ::= ( "," space bar-additional-kv )*
|
|
||||||
boolean ::= ("true" | "false") space
|
|
||||||
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
|
|
||||||
decimal-part ::= [0-9]{1,16}
|
decimal-part ::= [0-9]{1,16}
|
||||||
foo ::= "{" space (foo-a-kv foo-a-rest | foo-additional-kv ( "," space foo-additional-kv )* )? "}" space
|
foo ::= "{" space (foo-a-kv )? "}" space
|
||||||
foo-a-kv ::= "\"a\"" space ":" space number
|
foo-a-kv ::= "\"a\"" space ":" space number
|
||||||
foo-a-rest ::= ( "," space foo-additional-kv )*
|
|
||||||
foo-additional-k ::= ["] ( [a] char+ | [^"a] char* )? ["] space
|
|
||||||
foo-additional-kv ::= foo-additional-k ":" space value
|
|
||||||
integral-part ::= [0] | [1-9] [0-9]{0,15}
|
integral-part ::= [0] | [1-9] [0-9]{0,15}
|
||||||
null ::= "null" space
|
|
||||||
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||||
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
|
root ::= foo | bar
|
||||||
root ::= alternative-0 | alternative-1
|
|
||||||
space ::= | " " | "\n" [ \t]{0,20}
|
space ::= | " " | "\n" [ \t]{0,20}
|
||||||
string ::= "\"" char* "\"" space
|
|
||||||
value ::= object | array | string | number | boolean | null
|
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1173,29 +1202,19 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
||||||
"properties": {"d": {"type": "number"}}
|
"properties": {"d": {"type": "number"}}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"type": "object"
|
"additionalProperties": false
|
||||||
})""",
|
})""",
|
||||||
R"""(
|
R"""(
|
||||||
a-kv ::= "\"a\"" space ":" space number
|
a-kv ::= "\"a\"" space ":" space number
|
||||||
additional-k ::= ["] ( [a] char+ | [b] char+ | [c] char+ | [d] char+ | [^"abcd] char* )? ["] space
|
|
||||||
additional-kv ::= additional-k ":" space value
|
|
||||||
array ::= "[" space ( value ("," space value)* )? "]" space
|
|
||||||
b-kv ::= "\"b\"" space ":" space number
|
b-kv ::= "\"b\"" space ":" space number
|
||||||
boolean ::= ("true" | "false") space
|
|
||||||
c-kv ::= "\"c\"" space ":" space number
|
c-kv ::= "\"c\"" space ":" space number
|
||||||
c-rest ::= ( "," space additional-kv )*
|
|
||||||
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
|
|
||||||
d-kv ::= "\"d\"" space ":" space number
|
d-kv ::= "\"d\"" space ":" space number
|
||||||
d-rest ::= ( "," space c-kv )? c-rest
|
d-rest ::= ( "," space c-kv )?
|
||||||
decimal-part ::= [0-9]{1,16}
|
decimal-part ::= [0-9]{1,16}
|
||||||
integral-part ::= [0] | [1-9] [0-9]{0,15}
|
integral-part ::= [0] | [1-9] [0-9]{0,15}
|
||||||
null ::= "null" space
|
|
||||||
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||||
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
|
root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
|
||||||
root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv c-rest | additional-kv ( "," space additional-kv )* ) )? "}" space
|
|
||||||
space ::= | " " | "\n" [ \t]{0,20}
|
space ::= | " " | "\n" [ \t]{0,20}
|
||||||
string ::= "\"" char* "\"" space
|
|
||||||
value ::= object | array | string | number | boolean | null
|
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue