grammars: more test cases

This commit is contained in:
Olivier Chafik 2024-04-12 19:10:57 +01:00
parent ec913426be
commit 22faba62ff
2 changed files with 77 additions and 1 deletions

View file

@ -172,9 +172,10 @@ namespace grammar_parser {
// S{m,} --> S' ::= Scopy Scopy Scopy (m times) Sstar // S{m,} --> S' ::= Scopy Scopy Scopy (m times) Sstar
// Scopy ::= S // Scopy ::= S
// Sstar ::= Scopy Sstar | // Sstar ::= Scopy Sstar |
// And if S is a reference to a rule, then we skip the S_copy indirection
uint32_t content_rule_id = 0; uint32_t content_rule_id = 0;
if (last_sym_start >= 0 && last_sym_start == out_elements.size() - 1 && out_elements[last_sym_start].type == LLAMA_GRETYPE_RULE_REF) { if (last_sym_start == out_elements.size() - 1 && out_elements[last_sym_start].type == LLAMA_GRETYPE_RULE_REF) {
// The repeated content is already a rule ref, no need to copy it // The repeated content is already a rule ref, no need to copy it
content_rule_id = out_elements[last_sym_start].value; content_rule_id = out_elements[last_sym_start].value;
} else { } else {

View file

@ -173,6 +173,32 @@ int main()
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
}); });
verify_parsing(R"""(
root ::= a+
a ::= "a"
)""", {
{"a", 1},
{"root", 0},
{"root_2", 2},
{"root_star_3", 3},
}, {
// root (index 0)
{LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2},
{LLAMA_GRETYPE_END, 0},
// a (index 1)
{LLAMA_GRETYPE_CHAR, 'a'},
{LLAMA_GRETYPE_END, 0},
// root_2 (index 2)
{LLAMA_GRETYPE_RULE_REF, /* a */ 1},
{LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3},
{LLAMA_GRETYPE_END, 0},
// root_star_3 (index 3)
{LLAMA_GRETYPE_RULE_REF, /* a */ 1},
{LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3},
{LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_END, 0},
});
verify_parsing(R"""( verify_parsing(R"""(
root ::= "a"+ root ::= "a"+
)""", { )""", {
@ -198,6 +224,30 @@ int main()
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
}); });
verify_parsing(R"""(
root ::= a?
a ::= "a"
)""", {
{"a", 1},
{"root", 0},
{"root_1_3", 3},
{"root_2", 2},
}, {
// root (index 0)
{LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2},
{LLAMA_GRETYPE_END, 0},
// a (index 1)
{LLAMA_GRETYPE_CHAR, 'a'},
{LLAMA_GRETYPE_END, 0},
// root_2 (index 2)
{LLAMA_GRETYPE_RULE_REF, /* root_1_3 */ 3},
{LLAMA_GRETYPE_END, 0},
// root_1_3 (index 3)
{LLAMA_GRETYPE_RULE_REF, /* a */ 1},
{LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_END, 0},
});
verify_parsing(R"""( verify_parsing(R"""(
root ::= "a"? root ::= "a"?
)""", { )""", {
@ -221,6 +271,31 @@ int main()
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
}); });
verify_parsing(R"""(
root ::= a*
a ::= "a"
)""", {
{"a", 1},
{"root", 0},
{"root_2", 2},
{"root_star_3", 3},
}, {
// root (index 0)
{LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2},
{LLAMA_GRETYPE_END, 0},
// a (index 1)
{LLAMA_GRETYPE_CHAR, 'a'},
{LLAMA_GRETYPE_END, 0},
// root_2 (index 2)
{LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3},
{LLAMA_GRETYPE_END, 0},
// root_star_3 (index 3)
{LLAMA_GRETYPE_RULE_REF, /* a */ 1},
{LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3},
{LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_END, 0},
});
verify_parsing(R"""( verify_parsing(R"""(
root ::= "a"* root ::= "a"*
)""", { )""", {