json: space before & refs for consistency

This commit is contained in:
ochafik 2024-03-21 10:07:47 +00:00
parent ad6c4755e0
commit c26e7b87ce
2 changed files with 54 additions and 54 deletions

View file

@ -42,7 +42,7 @@ std::unordered_map<std::string, std::string> DATE_RULES = {
{"date-time-string", "\"\\\"\" date-time \"\\\"\" space"} {"date-time-string", "\"\\\"\" date-time \"\\\"\" space"}
}; };
static bool is_reserved_name(const std::string& name) { static bool is_reserved_name(const std::string & name) {
static std::unordered_set<std::string> RESERVED_NAMES; static std::unordered_set<std::string> RESERVED_NAMES;
if (RESERVED_NAMES.empty()) { if (RESERVED_NAMES.empty()) {
RESERVED_NAMES.insert("root"); RESERVED_NAMES.insert("root");
@ -63,7 +63,7 @@ std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '
std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'[', ']', '(', ')', '|', '{', '}', '*', '+', '?'}; std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
template <typename Iterator> template <typename Iterator>
std::string join(Iterator begin, Iterator end, const std::string& separator) { std::string join(Iterator begin, Iterator end, const std::string & separator) {
std::ostringstream result; std::ostringstream result;
if (begin != end) { if (begin != end) {
result << *begin; result << *begin;
@ -74,7 +74,7 @@ std::string join(Iterator begin, Iterator end, const std::string& separator) {
return result.str(); return result.str();
} }
static std::vector<std::string> split(const std::string& str, const std::string& delimiter) { static std::vector<std::string> split(const std::string & str, const std::string & delimiter) {
std::vector<std::string> tokens; std::vector<std::string> tokens;
size_t start = 0; size_t start = 0;
size_t end = str.find(delimiter); size_t end = str.find(delimiter);
@ -90,7 +90,7 @@ static std::vector<std::string> split(const std::string& str, const std::string&
return tokens; return tokens;
} }
static std::string repeat(const std::string& str, size_t n) { static std::string repeat(const std::string & str, size_t n) {
if (n == 0) { if (n == 0) {
return ""; return "";
} }
@ -105,7 +105,7 @@ static std::string repeat(const std::string& str, size_t n) {
return result; return result;
} }
static std::string replacePattern(const std::string& input, const std::regex& regex, const std::function<std::string(const std::smatch &)>& replacement) { static std::string replacePattern(const std::string & input, const std::regex & regex, const std::function<std::string(const std::smatch &)> & replacement) {
std::smatch match; std::smatch match;
std::string result; std::string result;
@ -123,8 +123,8 @@ static std::string replacePattern(const std::string& input, const std::regex& re
return result; return result;
} }
static std::string format_literal(const std::string& literal) { static std::string format_literal(const std::string & literal) {
std::string escaped = replacePattern(json(literal).dump(), GRAMMAR_LITERAL_ESCAPE_RE, [&](const std::smatch& match) { std::string escaped = replacePattern(json(literal).dump(), GRAMMAR_LITERAL_ESCAPE_RE, [&](const std::smatch & match) {
char c = match.str()[0]; char c = match.str()[0];
return GRAMMAR_LITERAL_ESCAPES.at(c); return GRAMMAR_LITERAL_ESCAPES.at(c);
}); });
@ -134,7 +134,7 @@ static std::string format_literal(const std::string& literal) {
class SchemaConverter { class SchemaConverter {
private: private:
std::function<json(const std::string&)> _fetch_json; std::function<json(const std::string &)> _fetch_json;
bool _dotall; bool _dotall;
std::map<std::string, std::string> _rules; std::map<std::string, std::string> _rules;
std::unordered_map<std::string, nlohmann::json> _refs; std::unordered_map<std::string, nlohmann::json> _refs;
@ -142,7 +142,7 @@ private:
std::vector<std::string> _errors; std::vector<std::string> _errors;
std::vector<std::string> _warnings; std::vector<std::string> _warnings;
std::string _add_rule(const std::string& name, const std::string& rule) { std::string _add_rule(const std::string & name, const std::string & rule) {
std::string esc_name = regex_replace(name, INVALID_RULE_CHARS_RE, "-"); std::string esc_name = regex_replace(name, INVALID_RULE_CHARS_RE, "-");
if (_rules.find(esc_name) == _rules.end() || _rules[esc_name] == rule) { if (_rules.find(esc_name) == _rules.end() || _rules[esc_name] == rule) {
_rules[esc_name] = rule; _rules[esc_name] = rule;
@ -158,7 +158,7 @@ private:
} }
} }
std::string _generate_union_rule(const std::string& name, const std::vector<json>& alt_schemas) { std::string _generate_union_rule(const std::string & name, const std::vector<json> & alt_schemas) {
std::vector<std::string> rules; std::vector<std::string> rules;
for (size_t i = 0; i < alt_schemas.size(); i++) { for (size_t i = 0; i < alt_schemas.size(); i++) {
rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i))); rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i)));
@ -166,7 +166,7 @@ private:
return join(rules.begin(), rules.end(), " | "); return join(rules.begin(), rules.end(), " | ");
} }
std::string _visit_pattern(const std::string& pattern, const std::string& name) { std::string _visit_pattern(const std::string & pattern, const std::string & name) {
if (!(pattern.front() == '^' && pattern.back() == '$')) { if (!(pattern.front() == '^' && pattern.back() == '$')) {
_errors.push_back("Pattern must start with '^' and end with '$'"); _errors.push_back("Pattern must start with '^' and end with '$'");
return ""; return "";
@ -178,7 +178,7 @@ private:
size_t length = sub_pattern.length(); size_t length = sub_pattern.length();
using literal_or_rule = std::pair<std::string, bool>; using literal_or_rule = std::pair<std::string, bool>;
auto to_rule = [&](const literal_or_rule& ls) { auto to_rule = [&](const literal_or_rule & ls) {
auto is_literal = ls.second; auto is_literal = ls.second;
auto s = ls.first; auto s = ls.first;
return is_literal ? "\"" + s + "\"" : s; return is_literal ? "\"" + s + "\"" : s;
@ -211,7 +211,7 @@ private:
return true; return true;
}; };
for (const auto& item : seq) { for (const auto & item : seq) {
auto is_literal = item.second; auto is_literal = item.second;
if (is_literal) { if (is_literal) {
literal += item.first; literal += item.first;
@ -223,7 +223,7 @@ private:
flush_literal(); flush_literal();
std::vector<std::string> results; std::vector<std::string> results;
for (const auto& item : ret) { for (const auto & item : ret) {
results.push_back(to_rule(item)); results.push_back(to_rule(item));
} }
return std::make_pair(join(results.begin(), results.end(), " "), false); return std::make_pair(join(results.begin(), results.end(), " "), false);
@ -300,7 +300,7 @@ private:
max_times = std::stoi(nums[1]); max_times = std::stoi(nums[1]);
} }
} }
} catch (const std::invalid_argument& e) { } catch (const std::invalid_argument & e) {
_errors.push_back("Invalid number in curly brackets"); _errors.push_back("Invalid number in curly brackets");
return std::make_pair("", false); return std::make_pair("", false);
} }
@ -316,7 +316,7 @@ private:
sub += "+"; sub += "+";
} else { } else {
if (!sub_is_literal) { if (!sub_is_literal) {
std::string& sub_id = sub_rule_ids[sub]; std::string & sub_id = sub_rule_ids[sub];
if (sub_id.empty()) { if (sub_id.empty()) {
sub_id = _add_rule(name + "-" + std::to_string(sub_rule_ids.size()), sub); sub_id = _add_rule(name + "-" + std::to_string(sub_rule_ids.size()), sub);
} }
@ -386,7 +386,7 @@ private:
return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space"); return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space");
} }
std::string _resolve_ref(const std::string& ref) { std::string _resolve_ref(const std::string & ref) {
std::string ref_name = ref.substr(ref.find_last_of('/') + 1); std::string ref_name = ref.substr(ref.find_last_of('/') + 1);
if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) { if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) {
_refs_being_resolved.insert(ref); _refs_being_resolved.insert(ref);
@ -398,15 +398,15 @@ private:
} }
std::string _build_object_rule( std::string _build_object_rule(
const std::vector<std::pair<std::string, json>>& properties, const std::vector<std::pair<std::string, json>> & properties,
const std::unordered_set<std::string>& required, const std::unordered_set<std::string> & required,
const std::string& name, const std::string & name,
const json& additional_properties) const json & additional_properties)
{ {
std::vector<std::string> required_props; std::vector<std::string> required_props;
std::vector<std::string> optional_props; std::vector<std::string> optional_props;
std::unordered_map<std::string, std::string> prop_kv_rule_names; std::unordered_map<std::string, std::string> prop_kv_rule_names;
for (const auto& kv : properties) { for (const auto & kv : properties) {
const auto &prop_name = kv.first; const auto &prop_name = kv.first;
const auto &prop_schema = kv.second; const auto &prop_schema = kv.second;
@ -443,7 +443,7 @@ private:
rule += " \",\" space ( "; rule += " \",\" space ( ";
} }
std::function<std::string(const std::vector<std::string>&, bool)> get_recursive_refs = [&](const std::vector<std::string>& ks, bool first_is_optional) { std::function<std::string(const std::vector<std::string>&, bool)> get_recursive_refs = [&](const std::vector<std::string> & ks, bool first_is_optional) {
std::string res; std::string res;
if (ks.empty()) { if (ks.empty()) {
return res; return res;
@ -488,22 +488,22 @@ private:
public: public:
SchemaConverter( SchemaConverter(
const std::function<json(const std::string&)>& fetch_json, const std::function<json(const std::string &)> & fetch_json,
bool dotall) bool dotall)
: _fetch_json(fetch_json), _dotall(dotall) : _fetch_json(fetch_json), _dotall(dotall)
{ {
_rules["space"] = SPACE_RULE; _rules["space"] = SPACE_RULE;
} }
void resolve_refs(nlohmann::json& schema, const std::string& url) { void resolve_refs(nlohmann::json & schema, const std::string & url) {
/* /*
* Resolves all $ref fields in the given schema, fetching any remote schemas, * Resolves all $ref fields in the given schema, fetching any remote schemas,
* replacing each $ref with absolute reference URL and populates _refs with the * replacing each $ref with absolute reference URL and populates _refs with the
* respective referenced (sub)schema dictionaries. * respective referenced (sub)schema dictionaries.
*/ */
std::function<void(json&)> visit_refs = [&](json& n) { std::function<void(json &)> visit_refs = [&](json & n) {
if (n.is_array()) { if (n.is_array()) {
for (auto& x : n) { for (auto & x : n) {
visit_refs(x); visit_refs(x);
} }
} else if (n.is_object()) { } else if (n.is_object()) {
@ -546,7 +546,7 @@ public:
_refs[ref] = target; _refs[ref] = target;
} }
} else { } else {
for (auto& kv : n.items()) { for (auto & kv : n.items()) {
visit_refs(kv.value()); visit_refs(kv.value());
} }
} }
@ -556,7 +556,7 @@ public:
visit_refs(schema); visit_refs(schema);
} }
std::string _generate_constant_rule(const json& value) { std::string _generate_constant_rule(const json & value) {
if (!value.is_string()) { if (!value.is_string()) {
_errors.push_back("Only std::string constants are supported, got " + value.dump()); _errors.push_back("Only std::string constants are supported, got " + value.dump());
return ""; return "";
@ -564,7 +564,7 @@ public:
return format_literal(value.get<std::string>()); return format_literal(value.get<std::string>());
} }
std::string visit(const json& schema, const std::string& name) { std::string visit(const json & schema, const std::string & name) {
json schema_type = schema.contains("type") ? schema["type"] : json(); json schema_type = schema.contains("type") ? schema["type"] : json();
std::string schema_format = schema.contains("format") ? schema["format"].get<std::string>() : ""; std::string schema_format = schema.contains("format") ? schema["format"].get<std::string>() : "";
std::string rule_name = is_reserved_name(name) ? name + "-" : name.empty() ? "root" : name; std::string rule_name = is_reserved_name(name) ? name + "-" : name.empty() ? "root" : name;
@ -576,7 +576,7 @@ public:
return _add_rule(rule_name, _generate_union_rule(name, alt_schemas)); return _add_rule(rule_name, _generate_union_rule(name, alt_schemas));
} else if (schema_type.is_array()) { } else if (schema_type.is_array()) {
std::vector<json> schema_types; std::vector<json> schema_types;
for (const auto& t : schema_type) { for (const auto & t : schema_type) {
schema_types.push_back({{"type", t}}); schema_types.push_back({{"type", t}});
} }
return _add_rule(rule_name, _generate_union_rule(name, schema_types)); return _add_rule(rule_name, _generate_union_rule(name, schema_types));
@ -584,7 +584,7 @@ public:
return _add_rule(rule_name, _generate_constant_rule(schema["const"])); return _add_rule(rule_name, _generate_constant_rule(schema["const"]));
} else if (schema.contains("enum")) { } else if (schema.contains("enum")) {
std::vector<std::string> enum_values; std::vector<std::string> enum_values;
for (const auto& v : schema["enum"]) { for (const auto & v : schema["enum"]) {
enum_values.push_back(_generate_constant_rule(v)); enum_values.push_back(_generate_constant_rule(v));
} }
return _add_rule(rule_name, join(enum_values.begin(), enum_values.end(), " | ")); return _add_rule(rule_name, join(enum_values.begin(), enum_values.end(), " | "));
@ -593,7 +593,7 @@ public:
(schema.contains("additionalProperties") && schema["additionalProperties"] != true))) { (schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
std::unordered_set<std::string> required; std::unordered_set<std::string> required;
if (schema.contains("required") && schema["required"].is_array()) { if (schema.contains("required") && schema["required"].is_array()) {
for (const auto& item : schema["required"]) { for (const auto & item : schema["required"]) {
if (item.is_string()) { if (item.is_string()) {
required.insert(item.get<std::string>()); required.insert(item.get<std::string>());
} }
@ -601,7 +601,7 @@ public:
} }
std::vector<std::pair<std::string, json>> properties; std::vector<std::pair<std::string, json>> properties;
if (schema.contains("properties")) { if (schema.contains("properties")) {
for (const auto& prop : schema["properties"].items()) { for (const auto & prop : schema["properties"].items()) {
properties.emplace_back(prop.key(), prop.value()); properties.emplace_back(prop.key(), prop.value());
} }
} }
@ -613,11 +613,11 @@ public:
std::unordered_set<std::string> required; std::unordered_set<std::string> required;
std::vector<std::pair<std::string, json>> properties; std::vector<std::pair<std::string, json>> properties;
std::string hybrid_name = name; std::string hybrid_name = name;
std::function<void(const json&, bool)> add_component = [&](const json& comp_schema, bool is_required) { std::function<void(const json&, bool)> add_component = [&](const json & comp_schema, bool is_required) {
if (comp_schema.contains("$ref")) { if (comp_schema.contains("$ref")) {
add_component(_refs[comp_schema["$ref"]], is_required); add_component(_refs[comp_schema["$ref"]], is_required);
} else if (comp_schema.contains("properties")) { } else if (comp_schema.contains("properties")) {
for (const auto& prop : comp_schema["properties"].items()) { for (const auto & prop : comp_schema["properties"].items()) {
properties.emplace_back(prop.key(), prop.value()); properties.emplace_back(prop.key(), prop.value());
if (is_required) { if (is_required) {
required.insert(prop.key()); required.insert(prop.key());
@ -627,9 +627,9 @@ public:
// todo warning // todo warning
} }
}; };
for (auto& t : schema["allOf"]) { for (auto & t : schema["allOf"]) {
if (t.contains("anyOf")) { if (t.contains("anyOf")) {
for (auto& tt : t["anyOf"]) { for (auto & tt : t["anyOf"]) {
add_component(tt, false); add_component(tt, false);
} }
} else { } else {
@ -678,12 +678,12 @@ public:
} else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) { } else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
return _add_rule(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid")); return _add_rule(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid"));
} else if ((schema_type.is_null() || schema_type == "string") && DATE_RULES.find(schema_format) != DATE_RULES.end()) { } else if ((schema_type.is_null() || schema_type == "string") && DATE_RULES.find(schema_format) != DATE_RULES.end()) {
for (const auto& kv : DATE_RULES) { for (const auto & kv : DATE_RULES) {
_add_rule(kv.first, kv.second); _add_rule(kv.first, kv.second);
} }
return schema_format + "-string"; return schema_format + "-string";
} else if (schema.empty() || schema_type == "object") { } else if (schema.empty() || schema_type == "object") {
for (const auto& n : OBJECT_RULE_NAMES) { for (const auto & n : OBJECT_RULE_NAMES) {
_add_rule(n, PRIMITIVE_RULES.at(n)); _add_rule(n, PRIMITIVE_RULES.at(n));
} }
return _add_rule(rule_name, "object"); return _add_rule(rule_name, "object");
@ -708,15 +708,15 @@ public:
std::string format_grammar() { std::string format_grammar() {
std::stringstream ss; std::stringstream ss;
for (const auto& kv : _rules) { for (const auto & kv : _rules) {
ss << kv.first << " ::= " << kv.second << std::endl; ss << kv.first << " ::= " << kv.second << std::endl;
} }
return ss.str(); return ss.str();
} }
}; };
std::string json_schema_to_grammar(const json& schema) { std::string json_schema_to_grammar(const json & schema) {
SchemaConverter converter([](const std::string&) { return json::object(); }, /* dotall= */ false); SchemaConverter converter([](const std::string &) { return json::object(); }, /* dotall= */ false);
auto copy = schema; auto copy = schema;
converter.resolve_refs(copy, "input"); converter.resolve_refs(copy, "input");
converter.visit(copy, ""); converter.visit(copy, "");

View file

@ -30,7 +30,7 @@ struct TestCase {
void _print_failure_header() const { void _print_failure_header() const {
fprintf(stderr, "#\n# Test '%s' failed.\n#\n%s\n", name.c_str(), schema.c_str()); fprintf(stderr, "#\n# Test '%s' failed.\n#\n%s\n", name.c_str(), schema.c_str());
} }
void verify(const std::string& actual_grammar) const { void verify(const std::string & actual_grammar) const {
if (trim(actual_grammar) != trim(expected_grammar)) { if (trim(actual_grammar) != trim(expected_grammar)) {
_print_failure_header(); _print_failure_header();
fprintf(stderr, "# EXPECTED:\n%s\n# ACTUAL:\n%s\n", expected_grammar.c_str(), actual_grammar.c_str()); fprintf(stderr, "# EXPECTED:\n%s\n# ACTUAL:\n%s\n", expected_grammar.c_str(), actual_grammar.c_str());
@ -43,7 +43,7 @@ struct TestCase {
if (state.symbol_ids.find("root") == state.symbol_ids.end()) { if (state.symbol_ids.find("root") == state.symbol_ids.end()) {
throw std::runtime_error("Grammar failed to parse:\n" + expected_grammar); throw std::runtime_error("Grammar failed to parse:\n" + expected_grammar);
} }
} catch (const std::runtime_error& ex) { } catch (const std::runtime_error & ex) {
_print_failure_header(); _print_failure_header();
fprintf(stderr, "# GRAMMAR ERROR: %s\n", ex.what()); fprintf(stderr, "# GRAMMAR ERROR: %s\n", ex.what());
assert(false); assert(false);
@ -59,22 +59,22 @@ struct TestCase {
} }
}; };
static void write(const std::string& file, const std::string& content) { static void write(const std::string & file, const std::string & content) {
std::ofstream f; std::ofstream f;
f.open(file.c_str()); f.open(file.c_str());
f << content.c_str(); f << content.c_str();
f.close(); f.close();
} }
static std::string read(const std::string& file) { static std::string read(const std::string & file) {
std::ostringstream actuals; std::ostringstream actuals;
actuals << std::ifstream(file.c_str()).rdbuf(); actuals << std::ifstream(file.c_str()).rdbuf();
return actuals.str(); return actuals.str();
} }
static void test_all(const std::string& lang, std::function<void(const TestCase&)> runner) { static void test_all(const std::string & lang, std::function<void(const TestCase &)> runner) {
fprintf(stderr, "#\n# Testing JSON schema conversion (%s)\n#\n", lang.c_str()); fprintf(stderr, "#\n# Testing JSON schema conversion (%s)\n#\n", lang.c_str());
auto test = [&](const TestCase& tc) { auto test = [&](const TestCase & tc) {
fprintf(stderr, "- %s%s\n", tc.name.c_str(), tc.expected_status == FAILURE ? " (failure expected)" : ""); fprintf(stderr, "- %s%s\n", tc.name.c_str(), tc.expected_status == FAILURE ? " (failure expected)" : "");
runner(tc); runner(tc);
}; };
@ -794,29 +794,29 @@ static void test_all(const std::string& lang, std::function<void(const TestCase&
} }
int main() { int main() {
test_all("C++", [](const TestCase& tc) { test_all("C++", [](const TestCase & tc) {
try { try {
tc.verify(json_schema_to_grammar(nlohmann::json::parse(tc.schema))); tc.verify(json_schema_to_grammar(nlohmann::json::parse(tc.schema)));
tc.verify_status(SUCCESS); tc.verify_status(SUCCESS);
} catch (const std::runtime_error& ex) { } catch (const std::runtime_error & ex) {
fprintf(stderr, "Error: %s\n", ex.what()); fprintf(stderr, "Error: %s\n", ex.what());
tc.verify_status(FAILURE); tc.verify_status(FAILURE);
} }
}); });
test_all("Python", [](const TestCase& tc) { test_all("Python", [](const TestCase & tc) {
write("test-json-schema-input.tmp", tc.schema); write("test-json-schema-input.tmp", tc.schema);
tc.verify_status(std::system( tc.verify_status(std::system(
"python ./examples/json-schema-to-grammar.py test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE); "python ./examples/json-schema-to-grammar.py test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE);
tc.verify(read("test-grammar-output.tmp")); tc.verify(read("test-grammar-output.tmp"));
}); });
test_all("JavaScript", [](const TestCase& tc) { test_all("JavaScript", [](const TestCase & tc) {
write("test-json-schema-input.tmp", tc.schema); write("test-json-schema-input.tmp", tc.schema);
tc.verify_status(std::system( tc.verify_status(std::system(
"node ./tests/run-json-schema-to-grammar.mjs test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE); "node ./tests/run-json-schema-to-grammar.mjs test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE);
tc.verify(read("test-grammar-output.tmp")); tc.verify(read("test-grammar-output.tmp"));
}); });
test_all("Check Expectations Validity", [](const TestCase& tc) { test_all("Check Expectations Validity", [](const TestCase & tc) {
if (tc.expected_status == SUCCESS) { if (tc.expected_status == SUCCESS) {
tc.verify_expectation_parseable(); tc.verify_expectation_parseable();
} }