diff --git a/common/sampling.cpp b/common/sampling.cpp index df2d1958c..18d3be845 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -132,7 +132,7 @@ void gpt_sampler_accept(struct gpt_sampler * gsmpl, llama_token token, bool appl llama_sampler_accept(gsmpl->smpl, token); } -void gpt_sampler_reset (struct gpt_sampler * gsmpl) { +void gpt_sampler_reset(struct gpt_sampler * gsmpl) { llama_constraint_reset(gsmpl->grmr); llama_sampler_reset(gsmpl->smpl); diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp index 0b49b2b06..5cd14c49d 100644 --- a/examples/speculative/speculative.cpp +++ b/examples/speculative/speculative.cpp @@ -37,9 +37,6 @@ int main(int argc, char ** argv) { return 1; } - // for probabilities to be computed even with temp = 0 - params.sparams.n_probs = 16; - // max number of parallel drafting sequences (i.e. tree branches) const int n_seq_dft = params.n_parallel; diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp index dfd618c33..4e44ec417 100644 --- a/src/llama-sampling.cpp +++ b/src/llama-sampling.cpp @@ -855,6 +855,8 @@ struct llama_constraint * llama_constraint_init_mirostat_v2_impl(float tau, floa // grammar struct llama_constraint_context_grammar { + const struct llama_vocab * vocab; + std::string grammar_str; std::string grammar_root; @@ -889,7 +891,7 @@ static struct llama_constraint_i llama_constraint_grammar_i = { /* .copy = */ [](const struct llama_constraint * cnstr) { const auto * ctx_src = (const llama_constraint_context_grammar *) cnstr->ctx; - auto * result = llama_constraint_init_grammar_impl(*ctx_src->grammar->vocab, nullptr, nullptr); + auto * result = llama_constraint_init_grammar_impl(*ctx_src->vocab, nullptr, nullptr); auto * ctx_dst = (llama_constraint_context_grammar *) result->ctx; if (ctx_src->grammar) { @@ -917,12 +919,14 @@ struct llama_constraint * llama_constraint_init_grammar_impl(const struct llama_ if (grammar_str != nullptr && grammar_str[0] != '\0') { *ctx = { + /*.vocab = */ &vocab, /*.grammar_str = */ grammar_str, /*.grammar_root = */ grammar_root, /*.grammar = */ llama_grammar_init_impl(&vocab, grammar_str, grammar_root), }; } else { *ctx = { + /*.vocab = */ &vocab, /*.grammar_str = */ {}, /*.grammar_root = */ {}, /*.grammar = */ nullptr,