diff --git a/common/sampling.cpp b/common/sampling.cpp
index 516868079..45d68b26c 100644
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -174,7 +174,7 @@ static llama_token llama_sampling_sample_impl(
     const float   mirostat_eta    = params.mirostat_eta;
 
     std::vector<float> original_logits;
-    auto cur_p = llama_sampling_configure_token_candidates(ctx_sampling, ctx_main, ctx_cfg, idx, !is_resampling, &original_logits);
+    auto cur_p = llama_sampling_prepare(ctx_sampling, ctx_main, ctx_cfg, idx, !is_resampling, &original_logits);
     if (!is_resampling) {
         GGML_ASSERT(!original_logits.empty());
     }
@@ -245,7 +245,7 @@ static llama_token llama_sampling_sample_impl(
     return id;
 }
 
-static llama_token_data_array llama_sampling_configure_token_candidates_impl(
+static llama_token_data_array llama_sampling_prepare_impl(
                   struct llama_sampling_context * ctx_sampling,
                   struct llama_context * ctx_main,
                   struct llama_context * ctx_cfg,
@@ -329,14 +329,14 @@ llama_token llama_sampling_sample(
     return llama_sampling_sample_impl(ctx_sampling, ctx_main, ctx_cfg, idx, false);
 }
 
-llama_token_data_array llama_sampling_configure_token_candidates(
+llama_token_data_array llama_sampling_prepare(
                   struct llama_sampling_context * ctx_sampling,
                   struct llama_context * ctx_main,
                   struct llama_context * ctx_cfg,
                   const int idx,
                   bool apply_grammar,
                   std::vector<float> * original_logits) {
-    return llama_sampling_configure_token_candidates_impl(ctx_sampling,ctx_main, ctx_cfg, idx, apply_grammar, original_logits);
+    return llama_sampling_prepare_impl(ctx_sampling,ctx_main, ctx_cfg, idx, apply_grammar, original_logits);
 }
 
 void llama_sampling_accept(
diff --git a/common/sampling.h b/common/sampling.h
index 89a6bdf91..56ed991b8 100644
--- a/common/sampling.h
+++ b/common/sampling.h
@@ -132,7 +132,7 @@ llama_token llama_sampling_sample(
         int idx = 0);
 
 // Prepares and adjusts the set of token candidates for sampling based on penalties, biases, and sampling parameters.
-llama_token_data_array llama_sampling_configure_token_candidates(
+llama_token_data_array llama_sampling_prepare(
         struct llama_sampling_context * ctx_sampling,
         struct llama_context * ctx_main,
         struct llama_context * ctx_cfg,
diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp
index 952362770..8b31b678a 100644
--- a/examples/speculative/speculative.cpp
+++ b/examples/speculative/speculative.cpp
@@ -219,7 +219,7 @@ int main(int argc, char ** argv) {
                 if (params.sparams.temp > 0) {
                     // stochastic verification
 
-                    llama_token_data_array dist_tgt = llama_sampling_configure_token_candidates(ctx_sampling, ctx_tgt, NULL, drafts[s_keep].i_batch_tgt[i_dft], true, NULL);
+                    llama_token_data_array dist_tgt = llama_sampling_prepare(ctx_sampling, ctx_tgt, NULL, drafts[s_keep].i_batch_tgt[i_dft], true, NULL);
                     llama_sample_softmax(ctx_tgt, &dist_tgt);
                     float p_tgt = 0, p_dft = 0;
 
diff --git a/retrieval b/retrieval
new file mode 100755
index 000000000..dd31789f8
Binary files /dev/null and b/retrieval differ