diff --git a/Makefile b/Makefile index 22778bf1f..4fbbf758e 100644 --- a/Makefile +++ b/Makefile @@ -862,7 +862,7 @@ tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS) $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) -tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp +tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp examples/server/json-schema-to-grammar.cpp examples/server/json-schema-to-grammar.h $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) diff --git a/examples/server/json-schema-to-grammar.cpp b/examples/server/json-schema-to-grammar.cpp index 79046ffa7..9481f513c 100644 --- a/examples/server/json-schema-to-grammar.cpp +++ b/examples/server/json-schema-to-grammar.cpp @@ -33,6 +33,7 @@ unordered_map PRIMITIVE_RULES = { " )* \"\\\"\" space"}, {"null", "\"null\" space"} }; +vector OBJECT_RULE_NAMES = {"object", "array", "string", "number", "boolean", "null"}; unordered_map DATE_RULES = { {"date", "[0-9] [0-9] [0-9] [0-9] \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( [0-2] [0-9] | \"3\" [0-1] )"}, @@ -663,10 +664,10 @@ public: } else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) { return _visit_pattern(schema["pattern"], rule_name); } else if (schema.empty() || (schema.size() == 1 && schema_type == "object")) { - for (const auto& kv : PRIMITIVE_RULES) { - _add_rule(kv.first, kv.second); + for (const auto& n : OBJECT_RULE_NAMES) { + _add_rule(n, PRIMITIVE_RULES.at(n)); } - return "object"; + return _add_rule(rule_name, "object"); } else if ((schema_type.is_null() || schema_type == "string") && regex_match(schema_format, regex("^uuid[1-5]?$"))) { return _add_rule(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid")); } else if ((schema_type.is_null() || schema_type == "string") && DATE_RULES.find(schema_format) != DATE_RULES.end()) { diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs index 1db99de6c..67354a295 100644 --- a/examples/server/public/json-schema-to-grammar.mjs +++ b/examples/server/public/json-schema-to-grammar.mjs @@ -15,6 +15,7 @@ const PRIMITIVE_RULES = { )* "\\"" space`, null: '"null" space', }; +const OBJECT_RULE_NAMES = ['object', 'array', 'string', 'number', 'boolean', 'null']; // TODO: support "uri", "email" string formats const DATE_RULES = { @@ -426,12 +427,11 @@ export class SchemaConverter { this._addRule(t, r); } return schemaFormat + '-string'; - } else if (schemaType === 'object' && Object.keys(schema).length === 1 || schemaType === undefined && Object.keys(schema).length === 0) { - // This depends on all primitive types - for (const [t, r] of Object.entries(PRIMITIVE_RULES)) { - this._addRule(t, r); + } else if ((schemaType === 'object' && Object.keys(schema).length === 1) || (Object.keys(schema).length === 0)) { + for (const n of OBJECT_RULE_NAMES) { + this._addRule(n, PRIMITIVE_RULES[n]); } - return 'object'; + return this._addRule(ruleName, 'object'); } else { if (!(schemaType in PRIMITIVE_RULES)) { throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`); diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index 450d0233b..e0a80ea6c 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -68,7 +68,7 @@ static string read(const string& file) { } static void test_all(const string& lang, std::function runner) { - cerr << "Testing JSON schema conversion (" << lang.c_str() << ")" << endl; + cerr << "#\n# Testing JSON schema conversion (" << lang.c_str() << ")\n#" << endl; auto test = [&](const TestCase& tc) { cerr << "- " << tc.name.c_str() << (tc.expected_status == FAILURE ? " (failure expected)" : "") << endl; runner(tc); @@ -92,6 +92,25 @@ static void test_all(const string& lang, std::function ru "" }); + test({ + SUCCESS, + "empty schema (object)", + "{}", + R"""( + array ::= "[" space ( value ("," space value)* )? "]" space + boolean ::= ("true" | "false") space + null ::= "null" space + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space + root ::= object + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + test({ SUCCESS, "exotic formats",