diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 60c7656d3..38c00b7db 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -705,6 +705,11 @@ static std::vector get_token_probabilities(llama_context * ctx cur[token_id] = llama_token_data{token_id, logits[token_id], 0.0f}; } + // sort tokens by logits + std::sort(cur.begin(), cur.end(), [](const llama_token_data & a, const llama_token_data & b) { + return a.logit > b.logit; + }); + // apply softmax float max_l = cur[0].logit; float cum_sum = 0.0f; @@ -717,10 +722,5 @@ static std::vector get_token_probabilities(llama_context * ctx cur[i].p /= cum_sum; } - // sort tokens by probability - std::sort(cur.begin(), cur.end(), [](const llama_token_data & a, const llama_token_data & b) { - return a.p > b.p; - }); - return cur; }