diff --git a/common/grammar-parser.cpp b/common/grammar-parser.cpp index 666ba4125..317b1ca20 100644 --- a/common/grammar-parser.cpp +++ b/common/grammar-parser.cpp @@ -172,9 +172,10 @@ namespace grammar_parser { // S{m,} --> S' ::= Scopy Scopy Scopy (m times) Sstar // Scopy ::= S // Sstar ::= Scopy Sstar | + // And if S is a reference to a rule, then we skip the S_copy indirection uint32_t content_rule_id = 0; - if (last_sym_start >= 0 && last_sym_start == out_elements.size() - 1 && out_elements[last_sym_start].type == LLAMA_GRETYPE_RULE_REF) { + if (last_sym_start == out_elements.size() - 1 && out_elements[last_sym_start].type == LLAMA_GRETYPE_RULE_REF) { // The repeated content is already a rule ref, no need to copy it content_rule_id = out_elements[last_sym_start].value; } else { diff --git a/tests/test-grammar-parser.cpp b/tests/test-grammar-parser.cpp index cf2a92aad..847b8380a 100644 --- a/tests/test-grammar-parser.cpp +++ b/tests/test-grammar-parser.cpp @@ -173,6 +173,32 @@ int main() {LLAMA_GRETYPE_END, 0}, }); + verify_parsing(R"""( + root ::= a+ + a ::= "a" + )""", { + {"a", 1}, + {"root", 0}, + {"root_2", 2}, + {"root_star_3", 3}, + }, { + // root (index 0) + {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2}, + {LLAMA_GRETYPE_END, 0}, + // a (index 1) + {LLAMA_GRETYPE_CHAR, 'a'}, + {LLAMA_GRETYPE_END, 0}, + // root_2 (index 2) + {LLAMA_GRETYPE_RULE_REF, /* a */ 1}, + {LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3}, + {LLAMA_GRETYPE_END, 0}, + // root_star_3 (index 3) + {LLAMA_GRETYPE_RULE_REF, /* a */ 1}, + {LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3}, + {LLAMA_GRETYPE_ALT, 0}, + {LLAMA_GRETYPE_END, 0}, + }); + verify_parsing(R"""( root ::= "a"+ )""", { @@ -198,6 +224,30 @@ int main() {LLAMA_GRETYPE_END, 0}, }); + verify_parsing(R"""( + root ::= a? + a ::= "a" + )""", { + {"a", 1}, + {"root", 0}, + {"root_1_3", 3}, + {"root_2", 2}, + }, { + // root (index 0) + {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2}, + {LLAMA_GRETYPE_END, 0}, + // a (index 1) + {LLAMA_GRETYPE_CHAR, 'a'}, + {LLAMA_GRETYPE_END, 0}, + // root_2 (index 2) + {LLAMA_GRETYPE_RULE_REF, /* root_1_3 */ 3}, + {LLAMA_GRETYPE_END, 0}, + // root_1_3 (index 3) + {LLAMA_GRETYPE_RULE_REF, /* a */ 1}, + {LLAMA_GRETYPE_ALT, 0}, + {LLAMA_GRETYPE_END, 0}, + }); + verify_parsing(R"""( root ::= "a"? )""", { @@ -221,6 +271,31 @@ int main() {LLAMA_GRETYPE_END, 0}, }); + verify_parsing(R"""( + root ::= a* + a ::= "a" + )""", { + {"a", 1}, + {"root", 0}, + {"root_2", 2}, + {"root_star_3", 3}, + }, { + // root (index 0) + {LLAMA_GRETYPE_RULE_REF, /* root_2 */ 2}, + {LLAMA_GRETYPE_END, 0}, + // a (index 1) + {LLAMA_GRETYPE_CHAR, 'a'}, + {LLAMA_GRETYPE_END, 0}, + // root_2 (index 2) + {LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3}, + {LLAMA_GRETYPE_END, 0}, + // root_star_3 (index 3) + {LLAMA_GRETYPE_RULE_REF, /* a */ 1}, + {LLAMA_GRETYPE_RULE_REF, /* root_star_3 */ 3}, + {LLAMA_GRETYPE_ALT, 0}, + {LLAMA_GRETYPE_END, 0}, + }); + verify_parsing(R"""( root ::= "a"* )""", {