json: test (& simplify output of) empty schema

This commit is contained in:
ochafik 2024-03-17 21:51:10 +00:00
parent 5c50ffaeac
commit 84e383c1d7
4 changed files with 30 additions and 10 deletions

View file

@ -862,7 +862,7 @@ tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp examples/server/json-schema-to-grammar.cpp examples/server/json-schema-to-grammar.h
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

View file

@ -33,6 +33,7 @@ unordered_map<string, string> PRIMITIVE_RULES = {
" )* \"\\\"\" space"}, " )* \"\\\"\" space"},
{"null", "\"null\" space"} {"null", "\"null\" space"}
}; };
vector<string> OBJECT_RULE_NAMES = {"object", "array", "string", "number", "boolean", "null"};
unordered_map<string, string> DATE_RULES = { unordered_map<string, string> DATE_RULES = {
{"date", "[0-9] [0-9] [0-9] [0-9] \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( [0-2] [0-9] | \"3\" [0-1] )"}, {"date", "[0-9] [0-9] [0-9] [0-9] \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( [0-2] [0-9] | \"3\" [0-1] )"},
@ -663,10 +664,10 @@ public:
} else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) { } else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
return _visit_pattern(schema["pattern"], rule_name); return _visit_pattern(schema["pattern"], rule_name);
} else if (schema.empty() || (schema.size() == 1 && schema_type == "object")) { } else if (schema.empty() || (schema.size() == 1 && schema_type == "object")) {
for (const auto& kv : PRIMITIVE_RULES) { for (const auto& n : OBJECT_RULE_NAMES) {
_add_rule(kv.first, kv.second); _add_rule(n, PRIMITIVE_RULES.at(n));
} }
return "object"; return _add_rule(rule_name, "object");
} else if ((schema_type.is_null() || schema_type == "string") && regex_match(schema_format, regex("^uuid[1-5]?$"))) { } else if ((schema_type.is_null() || schema_type == "string") && regex_match(schema_format, regex("^uuid[1-5]?$"))) {
return _add_rule(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid")); return _add_rule(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid"));
} else if ((schema_type.is_null() || schema_type == "string") && DATE_RULES.find(schema_format) != DATE_RULES.end()) { } else if ((schema_type.is_null() || schema_type == "string") && DATE_RULES.find(schema_format) != DATE_RULES.end()) {

View file

@ -15,6 +15,7 @@ const PRIMITIVE_RULES = {
)* "\\"" space`, )* "\\"" space`,
null: '"null" space', null: '"null" space',
}; };
const OBJECT_RULE_NAMES = ['object', 'array', 'string', 'number', 'boolean', 'null'];
// TODO: support "uri", "email" string formats // TODO: support "uri", "email" string formats
const DATE_RULES = { const DATE_RULES = {
@ -426,12 +427,11 @@ export class SchemaConverter {
this._addRule(t, r); this._addRule(t, r);
} }
return schemaFormat + '-string'; return schemaFormat + '-string';
} else if (schemaType === 'object' && Object.keys(schema).length === 1 || schemaType === undefined && Object.keys(schema).length === 0) { } else if ((schemaType === 'object' && Object.keys(schema).length === 1) || (Object.keys(schema).length === 0)) {
// This depends on all primitive types for (const n of OBJECT_RULE_NAMES) {
for (const [t, r] of Object.entries(PRIMITIVE_RULES)) { this._addRule(n, PRIMITIVE_RULES[n]);
this._addRule(t, r);
} }
return 'object'; return this._addRule(ruleName, 'object');
} else { } else {
if (!(schemaType in PRIMITIVE_RULES)) { if (!(schemaType in PRIMITIVE_RULES)) {
throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`); throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);

View file

@ -68,7 +68,7 @@ static string read(const string& file) {
} }
static void test_all(const string& lang, std::function<void(const TestCase&)> runner) { static void test_all(const string& lang, std::function<void(const TestCase&)> runner) {
cerr << "Testing JSON schema conversion (" << lang.c_str() << ")" << endl; cerr << "#\n# Testing JSON schema conversion (" << lang.c_str() << ")\n#" << endl;
auto test = [&](const TestCase& tc) { auto test = [&](const TestCase& tc) {
cerr << "- " << tc.name.c_str() << (tc.expected_status == FAILURE ? " (failure expected)" : "") << endl; cerr << "- " << tc.name.c_str() << (tc.expected_status == FAILURE ? " (failure expected)" : "") << endl;
runner(tc); runner(tc);
@ -92,6 +92,25 @@ static void test_all(const string& lang, std::function<void(const TestCase&)> ru
"" ""
}); });
test({
SUCCESS,
"empty schema (object)",
"{}",
R"""(
array ::= "[" space ( value ("," space value)* )? "]" space
boolean ::= ("true" | "false") space
null ::= "null" space
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
root ::= object
space ::= " "?
string ::= "\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
)"""
});
test({ test({
SUCCESS, SUCCESS,
"exotic formats", "exotic formats",