server: fix reported top tokens for temperature 0 (#7203)

2024-05-11 10:11:28 +02:00 · 2024-05-11 10:11:28 +02:00 · 5ae3426b0b
commit 5ae3426b0b
parent b83cc3f5b3
3 changed files with 7 additions and 7 deletions
--- a/common/sampling.h
+++ b/common/sampling.h
@ -81,7 +81,7 @@ struct llama_sampling_context {
    // TODO: replace with ring-buffer
    std::vector<llama_token>      prev;
    std::vector<llama_token_data> cur;
-    size_t n_considered;
+    size_t n_valid; // Number of correct top tokens with correct probabilities.

    std::mt19937 rng;
 };