Adding additional examples as documented in #7789 . Also adding the ability to automatically output improperly failing grammars to debug output files so they can more easily be examined in the gbnf-validator program.

This commit is contained in:
Clint Herron 2024-06-05 22:29:25 -07:00
parent 2b174dd9c5
commit 74985def80

View file

@ -86,6 +86,23 @@ static void test_grammar(const std::string & test_desc, const std::string & gram
if (!matched) {
fprintf(stderr, "❌ (failed to match)\n");
// DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
// DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
if (grammar_file) {
fprintf(grammar_file, "%s", grammar_str.c_str());
fclose(grammar_file);
}
// DEBUG: Write the test string to test-grammar-integration.string.txt
FILE* string_file = fopen("test-grammar-integration.string.txt", "w");
if (string_file) {
fprintf(string_file, "%s", test_string.c_str());
fclose(string_file);
}
fprintf(stderr, " Analyze in detail by running: `./gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt`\n");
} else {
fprintf(stdout, "✅︎\n");
}
@ -475,7 +492,7 @@ static void test_json_schema() {
// Otherwise, this test structure is the same.
test_grammar(
"empty schema",
"empty schema (object)",
// Grammar
json_schema_to_grammar(nlohmann::ordered_json::parse(
R"""(
@ -492,6 +509,536 @@ static void test_json_schema() {
"",
}
);
test_grammar(
"exotic formats (list)",
// Grammar
json_schema_to_grammar(nlohmann::ordered_json::parse(
R"""(
{
"items": [
{ "format": "date" },
{ "format": "uuid" },
{ "format": "time" },
{ "format": "date-time" }
]
}
)"""
)),
// Passing strings
{
// "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
// "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
//R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
//R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
},
// Failing strings
{
R"""(["foo", "bar"])""",
R"""(["12345678-1234-1234-1234-1234567890ab"])""",
}
);
test_grammar(
"string",
// Grammar
json_schema_to_grammar(nlohmann::ordered_json::parse(
R"""(
{
"type": "string"
}
)"""
)),
// Passing strings
{
"\"foo\"",
"\"bar\"",
"\"\"",
},
// Failing strings
{
"{}",
"\"foo\": \"bar\"",
}
);
test_grammar(
"string w/ min length 1",
// Grammar
json_schema_to_grammar(nlohmann::ordered_json::parse(
R"""(
{
"type": "string",
"minLength": 1
}
)"""
)),
// Passing strings
{
"\"foo\"",
"\"bar\"",
},
// Failing strings
{
"\"\"",
"{}",
"\"foo\": \"bar\"",
}
);
test_grammar(
"string w/ min length 3",
// Grammar
json_schema_to_grammar(nlohmann::ordered_json::parse(
R"""(
{
"type": "string",
"minLength": 3
}
)"""
)),
// Passing strings
{
"\"foo\"",
"\"bar\"",
"\"foobar\"",
},
// Failing strings
{
"\"\"",
"\"f\"",
"\"fo\"",
}
);
test_grammar(
"string w/ max length",
// Grammar
json_schema_to_grammar(nlohmann::ordered_json::parse(
R"""(
{
"type": "string",
"maxLength": 3
}
)"""
)),
// Passing strings
{
"\"foo\"",
"\"bar\"",
"\"\"",
"\"f\"",
"\"fo\"",
},
// Failing strings
{
"\"foobar\"",
}
);
test_grammar(
"string w/ min & max length",
// Grammar
json_schema_to_grammar(nlohmann::ordered_json::parse(
R"""(
{
"type": "string",
"minLength": 1,
"maxLength": 4
}
)"""
)),
// Passing strings
{
"\"foo\"",
"\"bar\"",
"\"f\"",
"\"barf\"",
},
// Failing strings
{
"\"\"",
"\"barfo\"",
"\"foobar\"",
}
);
test_grammar(
"boolean",
// Grammar
json_schema_to_grammar(nlohmann::ordered_json::parse(
R"""(
{
"type": "boolean"
}
)"""
)),
// Passing strings
{
"true",
"false",
},
// Failing strings
{
"\"\"",
"\"true\"",
"True",
"FALSE",
}
);
test_grammar(
"integer",
// Grammar
json_schema_to_grammar(nlohmann::ordered_json::parse(
R"""(
{
"type": "integer"
}
)"""
)),
// Passing strings
{
"0",
"12345",
"1234567890123456"
},
// Failing strings
{
"",
"01",
"007",
"12345678901234567"
}
);
test_grammar(
"string const",
// Grammar
json_schema_to_grammar(nlohmann::ordered_json::parse(
R"""(
{
"const": "foo"
}
)"""
)),
// Passing strings
{
"\"foo\"",
},
// Failing strings
{
"foo",
"\"bar\"",
}
);
test_grammar(
"non-string const",
// Grammar
json_schema_to_grammar(nlohmann::ordered_json::parse(
R"""(
{
"const": true
}
)"""
)),
// Passing strings
{
"true",
},
// Failing strings
{
"",
"foo",
"\"true\"",
}
);
test_grammar(
"non-string const",
// Grammar
json_schema_to_grammar(nlohmann::ordered_json::parse(
R"""(
{
"enum": ["red", "amber", "green", null, 42, ["foo"]]
}
)"""
)),
// Passing strings
{
"\"red\"",
"null",
"42",
"[\"foo\"]",
},
// Failing strings
{
"",
"420",
"true",
"foo",
}
);
test_grammar(
"min+max items",
// Grammar
json_schema_to_grammar(nlohmann::ordered_json::parse(
R"""(
{
"items": {
"type": ["number", "integer"]
},
"minItems": 3,
"maxItems": 5
}
)"""
)),
// Passing strings
{
"[1, 2, 3]",
"[1, 2, 3, 4]",
"[1, 2, 3, 4, 5]",
},
// Failing strings
{
"[1, 2]",
"[1, 2, 3, 4, 5, 6]",
"1"
}
);
// Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
test_grammar(
"object properties",
// Grammar
json_schema_to_grammar(nlohmann::ordered_json::parse(
R"""(
{
"type": "object",
"properties": {
"number": { "type": "number" },
"street_name": { "type": "string" },
"street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
}
}
)"""
)),
// Passing strings
{
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
// "By default, leaving out properties is valid"
R"""({ "street_name": "Pennsylvania" })""",
R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
// "By extension, even an empty object is valid"
R"""({})""",
// "By default, providing additional properties is valid"
// TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default.
// R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
// TODO: Spaces should be permitted around enum values, but currently they fail to pass.
// R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
},
// Failing strings
{
// Change datatype from number to string
R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
// Reorder properties
R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
// Reorder properties
R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
}
);
// Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
test_grammar(
"object properties, additionalProperties: true",
// Grammar
json_schema_to_grammar(nlohmann::ordered_json::parse(
R"""(
{
"type": "object",
"properties": {
"number": { "type": "number" },
"street_name": { "type": "string" },
"street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
},
"additionalProperties": true
}
)"""
)),
// Passing strings
{
//R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
// "By default, leaving out properties is valid"
//R"""({ "street_name": "Pennsylvania" })""",
//R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
// "By extension, even an empty object is valid"
R"""({})""",
// "By default, providing additional properties is valid"
// TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default.
//R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
// TODO: Spaces should be permitted around enum values, but currently they fail to pass.
// R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
},
// Failing strings
{
// Change datatype from number to string
R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
// Reorder properties
R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
}
);
// Additional properties: false
test_grammar(
"required + optional props each in original order",
// Grammar
json_schema_to_grammar(nlohmann::ordered_json::parse(
R"""(
{
"type": "object",
"properties": {
"number": { "type": "number" },
"street_name": { "type": "string" },
"street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
},
"additionalProperties": false
}
)"""
)),
// Passing strings
{
R"""({ "street_name": "Pennsylvania" })""",
R"""({ "number": 1600, "street_type":"Avenue"})""",
R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
// TODO: Spaces should be permitted around enum values, but currently they fail to pass.
// R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
},
// Failing strings
{
// Reorder properties
R"""({ "street_type": "Avenue", "number": 1600 })""",
// Add "direction"
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
}
);
test_grammar(
"required + optional props each in original order",
// Grammar
json_schema_to_grammar(nlohmann::ordered_json::parse(
R"""(
{
"properties": {
"b": {"type": "string"},
"a": {"type": "string"},
"d": {"type": "string"},
"c": {"type": "string"}
},
"required": ["a", "b"],
"additionalProperties": false
}
)"""
)),
// Passing strings
{
"{\"b\": \"foo\", \"a\": \"bar\"}",
"{\"b\":\"foo\",\"a\":\"bar\",\"d\":\"qux\"}",
"{\"b\":\"foo\", \"a\":\"bar\", \"d\":\"qux\", \"c\":\"baz\"}",
},
// Failing strings
{
"{\"a\": \"foo\", \"b\": \"bar\"}",
"{\"b\": \"bar\"}",
"{\"a\": \"foo\", \"c\": \"baz\"}",
"{\"a\":\"foo\", \"b\":\"bar\", \"c\":\"baz\", \"d\":\"qux\"}",
}
);
// NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
test_grammar(
"required props",
// Grammar
json_schema_to_grammar(nlohmann::ordered_json::parse(
R"""(
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://example.com/product.schema.json",
"title": "Product",
"description": "A product from Acme's catalog",
"type": "object",
"properties": {
"productId": {
"description": "The unique identifier for a product",
"type": "integer"
},
"productName": {
"description": "Name of the product",
"type": "string"
},
"price": {
"description": "The price of the product",
"type": "number",
"exclusiveMinimum": 0
},
"tags": {
"description": "Tags for the product",
"type": "array",
"items": {
"type": "string"
},
"minItems": 1,
"uniqueItems": true
},
"dimensions": {
"type": "object",
"properties": {
"length": {
"type": "number"
},
"width": {
"type": "number"
},
"height": {
"type": "number"
}
},
"required": [ "length", "width", "height" ]
}
},
"required": [ "productId", "productName", "price" ]
}
)"""
)),
// Passing strings
{
"{\"productId\": 1, \"productName\": \"A green door\", \"price\": 12.50}",
"{\"productId\": 1, \"productName\": \"A green door\", \"price\": 12.50, \"tags\": [\"home\", \"green\"]}",
"{\"productId\": 1, \"productName\": \"A green door\", \"price\": 12.50, \"tags\": [\"home\", \"green\"], \"dimensions\": {\"length\": 785, \"width\": 250.5, \"height\": -0.359}}",
},
// Failing strings
{
"{}", // Missing all required properties
"{\"productName\": \"A green door\", \"price\": 12.50, \"productId\": 1}", // Out of order properties
// TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
// Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
// "{\"productId\": 1, \"productName\": \"A green door\", \"price\": -12.50}",
"{\"productId\": 1, \"productName\": \"A green door\"}", // Missing required property (price)
"{\"productName\": \"A green door\", \"price\": 12.50}", // Missing required property (productId)
"{\"productId\": 1, \"productName\": \"A green door\", \"price\": 12.50, \"tags\": []}", // tags is empty, but minItems is 1
"{\"productId\": 1, \"productName\": \"A green door\", \"price\": 12.50, \"dimensions\": {\"length\": 785, \"width\": 250.5, \"height\": -0.359}, \"tags\": [\"home\", \"green\"]}", // Tags and dimensions are out of order
// TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
// "{\"productId\": 1, \"productName\": \"A green door\", \"price\": 12.50, \"tags\": [\"home\", \"green\", \"home\"]}",
}
);
}
int main() {