grammars: fix copy rule skipping

This commit is contained in:
Olivier Chafik 2024-04-12 17:58:16 +01:00
parent ffe321d01e
commit a9351b8f75
2 changed files with 44 additions and 72 deletions

View file

@ -174,7 +174,7 @@ namespace grammar_parser {
// Sstar ::= Scopy Sstar | // Sstar ::= Scopy Sstar |
uint32_t content_rule_id = 0; uint32_t content_rule_id = 0;
if (out_elements[last_sym_start].type == LLAMA_GRETYPE_RULE_REF) { if (last_sym_start == out_elements.size() - 1) {
// The repeated content is already a rule ref, no need to copy it // The repeated content is already a rule ref, no need to copy it
content_rule_id = out_elements[last_sym_start].value; content_rule_id = out_elements[last_sym_start].value;
} else { } else {

View file

@ -153,22 +153,18 @@ int main()
)""", { )""", {
{"root", 0}, {"root", 0},
{"root_1", 1}, {"root_1", 1},
{"root_2", 2}, {"root_star_2", 2},
{"root_star_3", 3},
}, { }, {
// root (index 0) // root (index 0)
{LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2}, {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
// root_1 (index 1) // root_1 (index 1)
{LLAMA_GRETYPE_CHAR, 'a'}, {LLAMA_GRETYPE_RULE_REF, /* */ 97},
{LLAMA_GRETYPE_RULE_REF, /* root_star_2 */ 2},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
// root_2 (index 2) // root_star_2 (index 2)
{LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, {LLAMA_GRETYPE_RULE_REF, /* */ 97},
{LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3}, {LLAMA_GRETYPE_RULE_REF, /* root_star_2 */ 2},
{LLAMA_GRETYPE_END, 0},
// root_star_3 (index 3)
{LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
{LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3},
{LLAMA_GRETYPE_ALT, 0}, {LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
}); });
@ -178,20 +174,16 @@ int main()
)""", { )""", {
{"root", 0}, {"root", 0},
{"root_1", 1}, {"root_1", 1},
{"root_1_3", 3}, {"root_1_2", 2},
{"root_2", 2},
}, { }, {
// root (index 0) // root (index 0)
{LLAMA_GRETYPE_RULE_REF, /* root_1_3 */ 2}, {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
// root_1 (index 1) // root_1 (index 1)
{LLAMA_GRETYPE_CHAR, 'a'}, {LLAMA_GRETYPE_RULE_REF, /* root_1_2 */ 2},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
// root_1_3 (index 2) // root_1_2 (index 2)
{LLAMA_GRETYPE_RULE_REF, /* root_2 */ 3}, {LLAMA_GRETYPE_RULE_REF, /* */ 97},
{LLAMA_GRETYPE_END, 0},
// root_2 (index 3)
{LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
{LLAMA_GRETYPE_ALT, 0}, {LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
}); });
@ -201,21 +193,17 @@ int main()
)""", { )""", {
{"root", 0}, {"root", 0},
{"root_1", 1}, {"root_1", 1},
{"root_2", 2}, {"root_star_2", 2},
{"root_star_3", 3},
}, { }, {
// root (index 0) // root (index 0)
{LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2}, {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
// root_1 (index 1) // root_1 (index 1)
{LLAMA_GRETYPE_CHAR, 'a'}, {LLAMA_GRETYPE_RULE_REF, /* root_star_2 */ 2},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
// root_2 (index 2) // root_star_2 (index 2)
{LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3}, {LLAMA_GRETYPE_RULE_REF, /* */ 97},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_RULE_REF, /* root_star_2 */ 2},
// root_star_3 (index 3)
{LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
{LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3},
{LLAMA_GRETYPE_ALT, 0}, {LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
}); });
@ -225,17 +213,13 @@ int main()
)""", { )""", {
{"root", 0}, {"root", 0},
{"root_1", 1}, {"root_1", 1},
{"root_2", 2},
}, { }, {
// root (index 0) // root (index 0)
{LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2}, {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
// root_1 (index 1) // root_1 (index 1)
{LLAMA_GRETYPE_CHAR, 'a'}, {LLAMA_GRETYPE_RULE_REF, /* */ 97},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_RULE_REF, /* */ 97},
// root_2 (index 2)
{LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
{LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
}); });
@ -244,23 +228,19 @@ int main()
)""", { )""", {
{"root", 0}, {"root", 0},
{"root_1", 1}, {"root_1", 1},
{"root_2", 2}, {"root_star_2", 2},
{"root_star_3", 3},
}, { }, {
// root (index 0) // root (index 0)
{LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2}, {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
// root_1 (index 1) // root_1 (index 1)
{LLAMA_GRETYPE_CHAR, 'a'}, {LLAMA_GRETYPE_RULE_REF, /* */ 97},
{LLAMA_GRETYPE_RULE_REF, /* */ 97},
{LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
// root_2 (index 2) // root_2 (index 2)
{LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, {LLAMA_GRETYPE_RULE_REF, /* */ 97},
{LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2},
{LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3},
{LLAMA_GRETYPE_END, 0},
// root_star_3 (index 3)
{LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
{LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3},
{LLAMA_GRETYPE_ALT, 0}, {LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
}); });
@ -270,19 +250,15 @@ int main()
)""", { )""", {
{"root", 0}, {"root", 0},
{"root_1", 1}, {"root_1", 1},
{"root_2", 2},
}, { }, {
// root (index 0) // root (index 0)
{LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2}, {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
// root_1 (index 1) // root_1 (index 1)
{LLAMA_GRETYPE_CHAR, 'a'}, {LLAMA_GRETYPE_RULE_REF, /* */ 97},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_RULE_REF, /* */ 97},
// root_2 (index 2) {LLAMA_GRETYPE_RULE_REF, /* */ 97},
{LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, {LLAMA_GRETYPE_RULE_REF, /* */ 97},
{LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
{LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
{LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
}); });
@ -291,28 +267,24 @@ int main()
)""", { )""", {
{"root", 0}, {"root", 0},
{"root_1", 1}, {"root_1", 1},
{"root_1_3", 3}, {"root_1_2", 2},
{"root_2", 2}, {"root_2_3", 3},
{"root_2_4", 4},
}, { }, {
// root (index 0) // root (index 0)
{LLAMA_GRETYPE_RULE_REF, /* root_1_3 */ 2}, {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
// root_1 (index 1) // root_1 (index 1)
{LLAMA_GRETYPE_CHAR, 'a'}, {LLAMA_GRETYPE_RULE_REF, /* */ 97},
{LLAMA_GRETYPE_RULE_REF, /* */ 97},
{LLAMA_GRETYPE_RULE_REF, /* root_2 */ 3},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
// root_1_3 (index 2) // root_1_3 (index 2)
{LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, {LLAMA_GRETYPE_RULE_REF, /* */ 97},
{LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
{LLAMA_GRETYPE_RULE_REF, /* root_2_4 */ 4},
{LLAMA_GRETYPE_END, 0},
// root_2 (index 3)
{LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1},
{LLAMA_GRETYPE_ALT, 0}, {LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
// root_2_4 (index 4) // root_2 (index 3)
{LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, {LLAMA_GRETYPE_RULE_REF, /* */ 97},
{LLAMA_GRETYPE_RULE_REF, /* root_2 */ 3}, {LLAMA_GRETYPE_RULE_REF, /* root_1_3 */ 2},
{LLAMA_GRETYPE_ALT, 0}, {LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_END, 0}, {LLAMA_GRETYPE_END, 0},
}); });