diff --git a/common/sampling.cpp b/common/sampling.cpp index 516868079..45d68b26c 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -174,7 +174,7 @@ static llama_token llama_sampling_sample_impl( const float mirostat_eta = params.mirostat_eta; std::vector original_logits; - auto cur_p = llama_sampling_configure_token_candidates(ctx_sampling, ctx_main, ctx_cfg, idx, !is_resampling, &original_logits); + auto cur_p = llama_sampling_prepare(ctx_sampling, ctx_main, ctx_cfg, idx, !is_resampling, &original_logits); if (!is_resampling) { GGML_ASSERT(!original_logits.empty()); } @@ -245,7 +245,7 @@ static llama_token llama_sampling_sample_impl( return id; } -static llama_token_data_array llama_sampling_configure_token_candidates_impl( +static llama_token_data_array llama_sampling_prepare_impl( struct llama_sampling_context * ctx_sampling, struct llama_context * ctx_main, struct llama_context * ctx_cfg, @@ -329,14 +329,14 @@ llama_token llama_sampling_sample( return llama_sampling_sample_impl(ctx_sampling, ctx_main, ctx_cfg, idx, false); } -llama_token_data_array llama_sampling_configure_token_candidates( +llama_token_data_array llama_sampling_prepare( struct llama_sampling_context * ctx_sampling, struct llama_context * ctx_main, struct llama_context * ctx_cfg, const int idx, bool apply_grammar, std::vector * original_logits) { - return llama_sampling_configure_token_candidates_impl(ctx_sampling,ctx_main, ctx_cfg, idx, apply_grammar, original_logits); + return llama_sampling_prepare_impl(ctx_sampling,ctx_main, ctx_cfg, idx, apply_grammar, original_logits); } void llama_sampling_accept( diff --git a/common/sampling.h b/common/sampling.h index 89a6bdf91..56ed991b8 100644 --- a/common/sampling.h +++ b/common/sampling.h @@ -132,7 +132,7 @@ llama_token llama_sampling_sample( int idx = 0); // Prepares and adjusts the set of token candidates for sampling based on penalties, biases, and sampling parameters. -llama_token_data_array llama_sampling_configure_token_candidates( +llama_token_data_array llama_sampling_prepare( struct llama_sampling_context * ctx_sampling, struct llama_context * ctx_main, struct llama_context * ctx_cfg, diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp index 952362770..8b31b678a 100644 --- a/examples/speculative/speculative.cpp +++ b/examples/speculative/speculative.cpp @@ -219,7 +219,7 @@ int main(int argc, char ** argv) { if (params.sparams.temp > 0) { // stochastic verification - llama_token_data_array dist_tgt = llama_sampling_configure_token_candidates(ctx_sampling, ctx_tgt, NULL, drafts[s_keep].i_batch_tgt[i_dft], true, NULL); + llama_token_data_array dist_tgt = llama_sampling_prepare(ctx_sampling, ctx_tgt, NULL, drafts[s_keep].i_batch_tgt[i_dft], true, NULL); llama_sample_softmax(ctx_tgt, &dist_tgt); float p_tgt = 0, p_dft = 0; diff --git a/retrieval b/retrieval new file mode 100755 index 000000000..dd31789f8 Binary files /dev/null and b/retrieval differ