diff --git a/common/grammar-parser.cpp b/common/grammar-parser.cpp index ba8b4ef9b..666ba4125 100644 --- a/common/grammar-parser.cpp +++ b/common/grammar-parser.cpp @@ -174,11 +174,11 @@ namespace grammar_parser { // Sstar ::= Scopy Sstar | uint32_t content_rule_id = 0; - if (last_sym_start == out_elements.size() - 1) { + if (last_sym_start >= 0 && last_sym_start == out_elements.size() - 1 && out_elements[last_sym_start].type == LLAMA_GRETYPE_RULE_REF) { // The repeated content is already a rule ref, no need to copy it content_rule_id = out_elements[last_sym_start].value; } else { - content_rule_id = generate_symbol_id(state, rule_name); + content_rule_id = generate_symbol_id(state, rule_name + "_copy"); // add preceding symbol to generated copy rule std::vector copy_rule(out_elements.begin() + last_sym_start, out_elements.end()); copy_rule.push_back({LLAMA_GRETYPE_END, 0}); diff --git a/tests/test-grammar-parser.cpp b/tests/test-grammar-parser.cpp index 545fa95ec..cf2a92aad 100644 --- a/tests/test-grammar-parser.cpp +++ b/tests/test-grammar-parser.cpp @@ -24,6 +24,11 @@ static void verify_parsing(const char *grammar_bytes, const std::vector symbol_names; + for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it) { + symbol_names[it->second] = it->first; + } + auto print_all = [&]() { fprintf(stderr, " verify_parsing(R\"\"\"(%s)\"\"\", {\n", grammar_bytes); for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it) { @@ -31,7 +36,7 @@ static void verify_parsing(const char *grammar_bytes, const std::vector