server: fix incorrectly reported token probabilities (#7125)

* server: normalize token probabilities

* fix temperature == 0.0f
This commit is contained in:
Johannes Gäßler 2024-05-07 23:07:58 +02:00 committed by GitHub
parent b6aa670203
commit af0a5b6163
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 31 additions and 11 deletions

View file

@ -81,6 +81,7 @@ struct llama_sampling_context {
// TODO: replace with ring-buffer
std::vector<llama_token> prev;
std::vector<llama_token_data> cur;
size_t n_considered;
std::mt19937 rng;
};