From a9351b8f757f66cbfc85b812658baf273c338aab Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 12 Apr 2024 17:58:16 +0100 Subject: [PATCH] grammars: fix copy rule skipping --- common/grammar-parser.cpp | 2 +- tests/test-grammar-parser.cpp | 114 +++++++++++++--------------------- 2 files changed, 44 insertions(+), 72 deletions(-) diff --git a/common/grammar-parser.cpp b/common/grammar-parser.cpp index 2b012d037..fde9d6981 100644 --- a/common/grammar-parser.cpp +++ b/common/grammar-parser.cpp @@ -174,7 +174,7 @@ namespace grammar_parser { // Sstar ::= Scopy Sstar | uint32_t content_rule_id = 0; - if (out_elements[last_sym_start].type == LLAMA_GRETYPE_RULE_REF) { + if (last_sym_start == out_elements.size() - 1) { // The repeated content is already a rule ref, no need to copy it content_rule_id = out_elements[last_sym_start].value; } else { diff --git a/tests/test-grammar-parser.cpp b/tests/test-grammar-parser.cpp index 4add713ac..a8fbe64e7 100644 --- a/tests/test-grammar-parser.cpp +++ b/tests/test-grammar-parser.cpp @@ -153,22 +153,18 @@ int main() )""", { {"root", 0}, {"root_1", 1}, - {"root_2", 2}, - {"root_star_3", 3}, + {"root_star_2", 2}, }, { // root (index 0) - {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2}, + {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, {LLAMA_GRETYPE_END, 0}, // root_1 (index 1) - {LLAMA_GRETYPE_CHAR, 'a'}, + {LLAMA_GRETYPE_RULE_REF, /* */ 97}, + {LLAMA_GRETYPE_RULE_REF, /* root_star_2 */ 2}, {LLAMA_GRETYPE_END, 0}, - // root_2 (index 2) - {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, - {LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3}, - {LLAMA_GRETYPE_END, 0}, - // root_star_3 (index 3) - {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, - {LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3}, + // root_star_2 (index 2) + {LLAMA_GRETYPE_RULE_REF, /* */ 97}, + {LLAMA_GRETYPE_RULE_REF, /* root_star_2 */ 2}, {LLAMA_GRETYPE_ALT, 0}, {LLAMA_GRETYPE_END, 0}, }); @@ -178,20 +174,16 @@ int main() )""", { {"root", 0}, {"root_1", 1}, - {"root_1_3", 3}, - {"root_2", 2}, + {"root_1_2", 2}, }, { // root (index 0) - {LLAMA_GRETYPE_RULE_REF, /* root_1_3 */ 2}, + {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, {LLAMA_GRETYPE_END, 0}, // root_1 (index 1) - {LLAMA_GRETYPE_CHAR, 'a'}, + {LLAMA_GRETYPE_RULE_REF, /* root_1_2 */ 2}, {LLAMA_GRETYPE_END, 0}, - // root_1_3 (index 2) - {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 3}, - {LLAMA_GRETYPE_END, 0}, - // root_2 (index 3) - {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, + // root_1_2 (index 2) + {LLAMA_GRETYPE_RULE_REF, /* */ 97}, {LLAMA_GRETYPE_ALT, 0}, {LLAMA_GRETYPE_END, 0}, }); @@ -201,21 +193,17 @@ int main() )""", { {"root", 0}, {"root_1", 1}, - {"root_2", 2}, - {"root_star_3", 3}, + {"root_star_2", 2}, }, { // root (index 0) - {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2}, + {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, {LLAMA_GRETYPE_END, 0}, // root_1 (index 1) - {LLAMA_GRETYPE_CHAR, 'a'}, + {LLAMA_GRETYPE_RULE_REF, /* root_star_2 */ 2}, {LLAMA_GRETYPE_END, 0}, - // root_2 (index 2) - {LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3}, - {LLAMA_GRETYPE_END, 0}, - // root_star_3 (index 3) - {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, - {LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3}, + // root_star_2 (index 2) + {LLAMA_GRETYPE_RULE_REF, /* */ 97}, + {LLAMA_GRETYPE_RULE_REF, /* root_star_2 */ 2}, {LLAMA_GRETYPE_ALT, 0}, {LLAMA_GRETYPE_END, 0}, }); @@ -225,17 +213,13 @@ int main() )""", { {"root", 0}, {"root_1", 1}, - {"root_2", 2}, }, { // root (index 0) - {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2}, + {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, {LLAMA_GRETYPE_END, 0}, // root_1 (index 1) - {LLAMA_GRETYPE_CHAR, 'a'}, - {LLAMA_GRETYPE_END, 0}, - // root_2 (index 2) - {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, - {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, + {LLAMA_GRETYPE_RULE_REF, /* */ 97}, + {LLAMA_GRETYPE_RULE_REF, /* */ 97}, {LLAMA_GRETYPE_END, 0}, }); @@ -244,23 +228,19 @@ int main() )""", { {"root", 0}, {"root_1", 1}, - {"root_2", 2}, - {"root_star_3", 3}, + {"root_star_2", 2}, }, { // root (index 0) - {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2}, + {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, {LLAMA_GRETYPE_END, 0}, // root_1 (index 1) - {LLAMA_GRETYPE_CHAR, 'a'}, + {LLAMA_GRETYPE_RULE_REF, /* */ 97}, + {LLAMA_GRETYPE_RULE_REF, /* */ 97}, + {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2}, {LLAMA_GRETYPE_END, 0}, // root_2 (index 2) - {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, - {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, - {LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3}, - {LLAMA_GRETYPE_END, 0}, - // root_star_3 (index 3) - {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, - {LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3}, + {LLAMA_GRETYPE_RULE_REF, /* */ 97}, + {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2}, {LLAMA_GRETYPE_ALT, 0}, {LLAMA_GRETYPE_END, 0}, }); @@ -270,19 +250,15 @@ int main() )""", { {"root", 0}, {"root_1", 1}, - {"root_2", 2}, }, { // root (index 0) - {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2}, + {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, {LLAMA_GRETYPE_END, 0}, // root_1 (index 1) - {LLAMA_GRETYPE_CHAR, 'a'}, - {LLAMA_GRETYPE_END, 0}, - // root_2 (index 2) - {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, - {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, - {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, - {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, + {LLAMA_GRETYPE_RULE_REF, /* */ 97}, + {LLAMA_GRETYPE_RULE_REF, /* */ 97}, + {LLAMA_GRETYPE_RULE_REF, /* */ 97}, + {LLAMA_GRETYPE_RULE_REF, /* */ 97}, {LLAMA_GRETYPE_END, 0}, }); @@ -291,28 +267,24 @@ int main() )""", { {"root", 0}, {"root_1", 1}, - {"root_1_3", 3}, - {"root_2", 2}, - {"root_2_4", 4}, + {"root_1_2", 2}, + {"root_2_3", 3}, }, { // root (index 0) - {LLAMA_GRETYPE_RULE_REF, /* root_1_3 */ 2}, + {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, {LLAMA_GRETYPE_END, 0}, // root_1 (index 1) - {LLAMA_GRETYPE_CHAR, 'a'}, + {LLAMA_GRETYPE_RULE_REF, /* */ 97}, + {LLAMA_GRETYPE_RULE_REF, /* */ 97}, + {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 3}, {LLAMA_GRETYPE_END, 0}, // root_1_3 (index 2) - {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, - {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, - {LLAMA_GRETYPE_RULE_REF, /* root_2_4 */ 4}, - {LLAMA_GRETYPE_END, 0}, - // root_2 (index 3) - {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, + {LLAMA_GRETYPE_RULE_REF, /* */ 97}, {LLAMA_GRETYPE_ALT, 0}, {LLAMA_GRETYPE_END, 0}, - // root_2_4 (index 4) - {LLAMA_GRETYPE_RULE_REF, /* root_1 */ 1}, - {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 3}, + // root_2 (index 3) + {LLAMA_GRETYPE_RULE_REF, /* */ 97}, + {LLAMA_GRETYPE_RULE_REF, /* root_1_3 */ 2}, {LLAMA_GRETYPE_ALT, 0}, {LLAMA_GRETYPE_END, 0}, });