json: test (& simplify output of) empty schema

This commit is contained in:
ochafik 2024-03-17 21:51:10 +00:00
parent 5c50ffaeac
commit 84e383c1d7
4 changed files with 30 additions and 10 deletions

View file

@ -862,7 +862,7 @@ tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp
tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp examples/server/json-schema-to-grammar.cpp examples/server/json-schema-to-grammar.h
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

View file

@ -33,6 +33,7 @@ unordered_map<string, string> PRIMITIVE_RULES = {
" )* \"\\\"\" space"},
{"null", "\"null\" space"}
};
vector<string> OBJECT_RULE_NAMES = {"object", "array", "string", "number", "boolean", "null"};
unordered_map<string, string> DATE_RULES = {
{"date", "[0-9] [0-9] [0-9] [0-9] \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( [0-2] [0-9] | \"3\" [0-1] )"},
@ -663,10 +664,10 @@ public:
} else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
return _visit_pattern(schema["pattern"], rule_name);
} else if (schema.empty() || (schema.size() == 1 && schema_type == "object")) {
for (const auto& kv : PRIMITIVE_RULES) {
_add_rule(kv.first, kv.second);
for (const auto& n : OBJECT_RULE_NAMES) {
_add_rule(n, PRIMITIVE_RULES.at(n));
}
return "object";
return _add_rule(rule_name, "object");
} else if ((schema_type.is_null() || schema_type == "string") && regex_match(schema_format, regex("^uuid[1-5]?$"))) {
return _add_rule(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid"));
} else if ((schema_type.is_null() || schema_type == "string") && DATE_RULES.find(schema_format) != DATE_RULES.end()) {

View file

@ -15,6 +15,7 @@ const PRIMITIVE_RULES = {
)* "\\"" space`,
null: '"null" space',
};
const OBJECT_RULE_NAMES = ['object', 'array', 'string', 'number', 'boolean', 'null'];
// TODO: support "uri", "email" string formats
const DATE_RULES = {
@ -426,12 +427,11 @@ export class SchemaConverter {
this._addRule(t, r);
}
return schemaFormat + '-string';
} else if (schemaType === 'object' && Object.keys(schema).length === 1 || schemaType === undefined && Object.keys(schema).length === 0) {
// This depends on all primitive types
for (const [t, r] of Object.entries(PRIMITIVE_RULES)) {
this._addRule(t, r);
} else if ((schemaType === 'object' && Object.keys(schema).length === 1) || (Object.keys(schema).length === 0)) {
for (const n of OBJECT_RULE_NAMES) {
this._addRule(n, PRIMITIVE_RULES[n]);
}
return 'object';
return this._addRule(ruleName, 'object');
} else {
if (!(schemaType in PRIMITIVE_RULES)) {
throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);

View file

@ -68,7 +68,7 @@ static string read(const string& file) {
}
static void test_all(const string& lang, std::function<void(const TestCase&)> runner) {
cerr << "Testing JSON schema conversion (" << lang.c_str() << ")" << endl;
cerr << "#\n# Testing JSON schema conversion (" << lang.c_str() << ")\n#" << endl;
auto test = [&](const TestCase& tc) {
cerr << "- " << tc.name.c_str() << (tc.expected_status == FAILURE ? " (failure expected)" : "") << endl;
runner(tc);
@ -92,6 +92,25 @@ static void test_all(const string& lang, std::function<void(const TestCase&)> ru
""
});
test({
SUCCESS,
"empty schema (object)",
"{}",
R"""(
array ::= "[" space ( value ("," space value)* )? "]" space
boolean ::= ("true" | "false") space
null ::= "null" space
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
root ::= object
space ::= " "?
string ::= "\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
)"""
});
test({
SUCCESS,
"exotic formats",