server: fix comment about max n_probs
This commit is contained in:
parent
ba210e4bc7
commit
ccf254bd44
2 changed files with 2 additions and 1 deletions
|
@ -31,7 +31,7 @@ struct gpt_params {
|
||||||
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
|
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
|
||||||
float tensor_split[LLAMA_MAX_DEVICES] = {0}; // how split tensors should be distributed across GPUs
|
float tensor_split[LLAMA_MAX_DEVICES] = {0}; // how split tensors should be distributed across GPUs
|
||||||
bool low_vram = 0; // if true, reduce VRAM usage at the cost of performance
|
bool low_vram = 0; // if true, reduce VRAM usage at the cost of performance
|
||||||
int32_t n_probs = 0; // if greater than 1, output the probabilities of top n_probs tokens. Max 5
|
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
||||||
|
|
||||||
// sampling parameters
|
// sampling parameters
|
||||||
std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens
|
std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens
|
||||||
|
|
|
@ -354,6 +354,7 @@ struct llama_server_context {
|
||||||
result.tok = llama_sample_token(ctx, &candidates_p);
|
result.tok = llama_sample_token(ctx, &candidates_p);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Add maximum of 5 most probable tokens to the result
|
||||||
for (size_t i = 0; i < std::min(candidates_p.size, std::min((size_t) n_probs, size_t(5))); ++i) {
|
for (size_t i = 0; i < std::min(candidates_p.size, std::min((size_t) n_probs, size_t(5))); ++i) {
|
||||||
result.probs.push_back({candidates_p.data[i].id, candidates_p.data[i].p});
|
result.probs.push_back({candidates_p.data[i].id, candidates_p.data[i].p});
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue