server: fix comment about max n_probs

2023-06-22 08:57:35 -07:00 · 2023-06-22 08:57:35 -07:00 · ccf254bd44
commit ccf254bd44
parent ba210e4bc7
2 changed files with 2 additions and 1 deletions
--- a/examples/common.h
+++ b/examples/common.h
@ -31,7 +31,7 @@ struct gpt_params {
    int32_t main_gpu                        = 0;   // the GPU that is used for scratch and small tensors
    float   tensor_split[LLAMA_MAX_DEVICES] = {0}; // how split tensors should be distributed across GPUs
    bool    low_vram                        = 0;   // if true, reduce VRAM usage at the cost of performance
-    int32_t n_probs                         = 0;   // if greater than 1, output the probabilities of top n_probs tokens. Max 5
+    int32_t n_probs                         = 0;   // if greater than 0, output the probabilities of top n_probs tokens.

    // sampling parameters
    std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -354,6 +354,7 @@ struct llama_server_context {
                    result.tok = llama_sample_token(ctx, &candidates_p);
                }
            }
+            // Add maximum of 5 most probable tokens to the result
            for (size_t i = 0; i < std::min(candidates_p.size, std::min((size_t) n_probs, size_t(5))); ++i) {
                result.probs.push_back({candidates_p.data[i].id, candidates_p.data[i].p});
            }