From a9335a5c2a24a22e7c2acc804e6b14ba7385dc0d Mon Sep 17 00:00:00 2001 From: Minsoo Cheong Date: Thu, 22 Feb 2024 13:50:30 +0900 Subject: [PATCH] sample from residual distribution on draft accept failure --- examples/speculative/speculative.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp index 20938cb7d..74d883410 100644 --- a/examples/speculative/speculative.cpp +++ b/examples/speculative/speculative.cpp @@ -298,12 +298,12 @@ int main(int argc, char ** argv) { if (!accept) { // all drafted tokens were rejected // sample from the target model - token_id = llama_sampling_sample(ctx_sampling, ctx_tgt, NULL, drafts[s_keep].i_batch_tgt[i_dft]); + LOG("all drafted tokens were rejected, sampling from residual distribution\n"); + token_id = llama_sample_token(ctx_tgt, &dist_tgt); llama_sampling_accept(ctx_sampling, ctx_tgt, token_id, true); token_str = llama_token_to_piece(ctx_tgt, token_id); } - } else { // greedy verification