From e9e1c20c756c746452f4048bf77a8b0cc9063b44 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 24 Sep 2024 09:02:54 +0300 Subject: [PATCH] sampling : add clarifying comment [no ci] --- common/sampling.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/common/sampling.cpp b/common/sampling.cpp index 345abd221..3dc7f1120 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -210,6 +210,11 @@ struct gpt_sampler * gpt_sampler_init(const struct llama_model * model, const st } } else { if (params.n_probs > 0) { + // some use cases require to sample greedily, but still obtain the probabilities of the top tokens + // ref: https://github.com/ggerganov/llama.cpp/pull/9605 + // + // the following will not produce exactly the same probs as applyging softmax to the full vocabulary, but + // it is much faster, since we avoid sorting all tokens and should give a good approximation llama_sampler_chain_add(result->chain, llama_sampler_init_top_k(params.n_probs)); llama_sampler_chain_add(result->chain, llama_sampler_init_softmax()); }