diff --git a/common/sampling.cpp b/common/sampling.cpp index e4b21ca10..1ca26f1e3 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -151,12 +151,6 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co lparams.no_perf = params.no_perf; - std::vector trigger_words; - trigger_words.reserve(params.grammar_trigger_words.size()); - for (const auto & str : params.grammar_trigger_words) { - trigger_words.push_back(str.word.c_str()); - } - struct llama_sampler * grmr; if (params.grammar.compare(0, 11, "%llguidance") == 0) { #ifdef LLAMA_USE_LLGUIDANCE @@ -165,6 +159,12 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co GGML_ABORT("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled"); #endif // LLAMA_USE_LLGUIDANCE } else { + std::vector trigger_words; + trigger_words.reserve(params.grammar_trigger_words.size()); + for (const auto & str : params.grammar_trigger_words) { + trigger_words.push_back(str.word.c_str()); + } + grmr = params.grammar_lazy ? llama_sampler_init_grammar_lazy(vocab, params.grammar.c_str(), "root", trigger_words.data(), trigger_words.size(),