sampling : add clarifying comment [no ci]

2024-09-24 09:02:54 +03:00 · 2024-09-24 09:02:54 +03:00 · e9e1c20c75
commit e9e1c20c75
parent a5a11bfbc3
1 changed files with 5 additions and 0 deletions
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@ -210,6 +210,11 @@ struct gpt_sampler * gpt_sampler_init(const struct llama_model * model, const st
        }
    } else {
        if (params.n_probs > 0) {
+            // some use cases require to sample greedily, but still obtain the probabilities of the top tokens
+            // ref: https://github.com/ggerganov/llama.cpp/pull/9605
+            //
+            // the following will not produce exactly the same probs as applyging softmax to the full vocabulary, but
+            // it is much faster, since we avoid sorting all tokens and should give a good approximation
            llama_sampler_chain_add(result->chain, llama_sampler_init_top_k(params.n_probs));
            llama_sampler_chain_add(result->chain, llama_sampler_init_softmax());
        }