Separating test calls to give more helpful stack traces on failure. Adding verbose messages to give visibility for what is being tested.

This commit is contained in:
Clint Herron 2024-04-27 09:38:45 -04:00
parent 7b04c215eb
commit b7f24dba00

View file

@ -52,6 +52,49 @@ static bool match_string(const std::string & input, llama_grammar* grammar) {
return false;
}
static void test_grammar(const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
fprintf(stderr, "🟢 Testing grammar: %s\n", grammar_str.c_str());
auto grammar = build_grammar(grammar_str);
// Save the original grammar stacks so that we can reset after every new string we want to test
auto original_stacks = grammar->stacks;
// Passing strings
for (const auto & test_string : passing_strings) {
bool matched = match_string(test_string, grammar);
if (!matched) {
fprintf(stderr, " ❌ Failed to match string: %s\n", test_string.c_str());
} else {
fprintf(stdout, " ✅︎ Matched string: %s\n", test_string.c_str());
}
assert(matched);
// Reset the grammar stacks
grammar->stacks = original_stacks;
}
// Failing strings
for (const auto & test_string : failing_strings) {
bool matched = match_string(test_string, grammar);
if (matched) {
fprintf(stderr, " ❌ Improperly matched string: %s\n", test_string.c_str());
} else {
fprintf(stdout, " ✅︎ Correctly did not match string: %s\n", test_string.c_str());
}
assert(!matched);
// Reset the grammar stacks
grammar->stacks = original_stacks;
}
// Clean up allocated memory
llama_grammar_free(grammar);
}
static void test_simple_grammar() {
// Test case for a simple grammar
const std::string grammar_str = R"""(root ::= expr
@ -71,225 +114,151 @@ number ::= [0-9]+)""";
static void test_complex_grammar() {
// Test case for a more complex grammar, with both failure strings and success strings
const std::string grammar_str = R"""(root ::= expression
expression ::= term ws (("+"|"-") ws term)*
term ::= factor ws (("*"|"/") ws factor)*
factor ::= number | variable | "(" expression ")" | function-call
number ::= [0-9]+
variable ::= [a-zA-Z_][a-zA-Z0-9_]*
function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
ws ::= [ \t\n\r]?)""";
auto grammar = build_grammar(grammar_str);
// Save the original grammar stacks so that we can reset after every new string we want to test
auto original_stacks = grammar->stacks;
// Test a few strings
std::vector<std::string> test_strings_pass = {
"42",
"1*2*3*4*5",
"x",
"x+10",
"x1+y2",
"(a+b)*(c-d)",
"func()",
"func(x,y+2)",
"a*(b+c)-d/e",
"f(g(x),h(y,z))",
"x + 10",
"x1 + y2",
"(a + b) * (c - d)",
"func()",
"func(x, y + 2)",
"a * (b + c) - d / e",
"f(g(x), h(y, z))",
"123+456",
"123*456*789-123/456+789*123",
"123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
};
std::vector<std::string> test_strings_fail = {
"+",
"/ 3x",
"x + + y",
"a * / b",
"func(,)",
"func(x y)",
"(a + b",
"x + y)",
"a + b * (c - d",
"42 +",
"x +",
"x + 10 +",
"(a + b) * (c - d",
"func(",
"func(x, y + 2",
"a * (b + c) - d /",
"f(g(x), h(y, z)",
"123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
};
// Passing strings
for (const auto & test_string : test_strings_pass) {
bool matched = match_string(test_string, grammar);
assert(matched);
// Reset the grammar stacks
grammar->stacks = original_stacks;
}
// Failing strings
for (const auto & test_string : test_strings_fail) {
bool matched = match_string(test_string, grammar);
assert(!matched);
// Reset the grammar stacks
grammar->stacks = original_stacks;
}
// Clean up allocated memory
llama_grammar_free(grammar);
test_grammar(
// Grammar
R"""(
root ::= expression
expression ::= term ws (("+"|"-") ws term)*
term ::= factor ws (("*"|"/") ws factor)*
factor ::= number | variable | "(" expression ")" | function-call
number ::= [0-9]+
variable ::= [a-zA-Z_][a-zA-Z0-9_]*
function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
ws ::= [ \t\n\r]?)""",
// Passing strings
{
"42",
"1*2*3*4*5",
"x",
"x+10",
"x1+y2",
"(a+b)*(c-d)",
"func()",
"func(x,y+2)",
"a*(b+c)-d/e",
"f(g(x),h(y,z))",
"x + 10",
"x1 + y2",
"(a + b) * (c - d)",
"func()",
"func(x, y + 2)",
"a * (b + c) - d / e",
"f(g(x), h(y, z))",
"123+456",
"123*456*789-123/456+789*123",
"123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
},
// Failing strings
{
"+",
"/ 3x",
"x + + y",
"a * / b",
"func(,)",
"func(x y)",
"(a + b",
"x + y)",
"a + b * (c - d",
"42 +",
"x +",
"x + 10 +",
"(a + b) * (c - d",
"func(",
"func(x, y + 2",
"a * (b + c) - d /",
"f(g(x), h(y, z)",
"123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
}
);
}
static void test_quantifiers() {
// Populate test data with grammar strings and their associated collections of expected passing and failing strings
const std::vector<
std::tuple<
std::string,
std::vector<std::string>,
std::vector<std::string>>>
test_data = {
{
// Grammar
R"""(root ::= "a"*)""",
// Passing strings
{
"",
"a",
"aaaaa",
"aaaaaaaaaaaaaaaaaa",
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
},
// Failing strings
{
"b",
"ab",
"aab",
"ba",
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
}
},
{
// Grammar
R"""(root ::= "a"+)""",
// Passing strings
{
"a",
"aaaaa",
"aaaaaaaaaaaaaaaaaa",
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
},
// Failing strings
{
"",
"b",
"ab",
"aab",
"ba",
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
}
},
{
// Grammar
R"""(root ::= "a"?)""",
// Passing strings
{
"",
"a"
},
// Failing strings
{
"b",
"ab",
"aa",
"ba",
}
},
{
// Grammar
R"""(
root ::= cons+ vowel* cons? (vowel cons)*
vowel ::= [aeiouy]
cons ::= [bcdfghjklmnpqrstvwxyz]
)""",
// Passing strings
{
"yes",
"no",
"noyes",
"crwth",
"four",
"bryyyy",
},
// Failing strings
{
"yess",
"yesno",
"forty",
"catyyy",
}
}
};
for (const auto & test_datum : test_data) {
const auto & grammar_str = std::get<0>(test_datum);
const auto & passing_strings = std::get<1>(test_datum);
const auto & failing_strings = std::get<2>(test_datum);
auto grammar = build_grammar(grammar_str);
// Save the original grammar stacks so that we can reset after every new string we want to test
auto original_stacks = grammar->stacks;
// A collection of tests to exercise * + and ? quantifiers
test_grammar(
// Grammar
R"""(root ::= "a"*)""",
// Passing strings
for (const auto & test_string : passing_strings) {
bool matched = match_string(test_string, grammar);
if (!matched) {
fprintf(stderr, "Against grammar: %s\n", grammar_str.c_str());
fprintf(stderr, "Failed to match string: %s\n", test_string.c_str());
}
assert(matched);
// Reset the grammar stacks
grammar->stacks = original_stacks;
}
{
"",
"a",
"aaaaa",
"aaaaaaaaaaaaaaaaaa",
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
},
// Failing strings
for (const auto & test_string : failing_strings) {
bool matched = match_string(test_string, grammar);
if (matched) {
fprintf(stderr, "Against grammar: %s\n", grammar_str.c_str());
fprintf(stderr, "Improperly matched string: %s\n", test_string.c_str());
}
assert(!matched);
// Reset the grammar stacks
grammar->stacks = original_stacks;
{
"b",
"ab",
"aab",
"ba",
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
}
// Clean up allocated memory
llama_grammar_free(grammar);
}
);
test_grammar(
// Grammar
R"""(root ::= "a"+)""",
// Passing strings
{
"a",
"aaaaa",
"aaaaaaaaaaaaaaaaaa",
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
},
// Failing strings
{
"",
"b",
"ab",
"aab",
"ba",
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
}
);
test_grammar(
// Grammar
R"""(root ::= "a"?)""",
// Passing strings
{
"",
"a"
},
// Failing strings
{
"b",
"ab",
"aa",
"ba",
}
);
test_grammar(
// Grammar
R"""(
root ::= cons+ vowel* cons? (vowel cons)*
vowel ::= [aeiouy]
cons ::= [bcdfghjklmnpqrstvwxyz]
)""",
// Passing strings
{
"yes",
"no",
"noyes",
"crwth",
"four",
"bryyyy",
},
// Failing strings
{
"yess",
"yesno",
"forty",
"catyyy",
}
);
}
static void test_failure_missing_root() {
fprintf(stderr, "🟢 Testing for missing root node:\n");
// Test case for a grammar that is missing a root rule
const std::string grammar_str = R"""(rot ::= expr
expr ::= term ("+" term)*
@ -303,26 +272,32 @@ number ::= [0-9]+)""";
// Ensure we do NOT have a root node
assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
fprintf(stderr, " ✅︎ Passed\n");
}
static void test_failure_missing_reference() {
fprintf(stderr, "🟢 Testing for missing reference node:\n");
// Test case for a grammar that is missing a referenced rule
const std::string grammar_str = R"""(root ::= expr
const std::string grammar_str =
R"""(root ::= expr
expr ::= term ("+" term)*
term ::= numero
number ::= [0-9]+)""";
fprintf(stderr, "Expected error: ");
fprintf(stderr, " Expected error: ");
grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
// Ensure we did NOT parsed correctly
assert(parsed_grammar.rules.empty());
fprintf(stderr, "End of expected error.\n");
fprintf(stderr, " End of expected error.\n");
fprintf(stderr, " ✅︎ Passed\n");
}
int main() {
fprintf(stdout, "Running grammar integration tests...\n");
test_simple_grammar();
test_complex_grammar();
test_quantifiers();