llama : default sampling changes + greedy update (#9897)
* llama : deprecate softmax sampler + fix dist sampler ggml-ci * tests : replace macros with functions ggml-ci * sampling : change temperature sampler logic For t <= 0.0f, keep the max logit intact and set the rest to -inf * cont : no need for special "greedy" logic top-k == 1 is the same * tests : init prob correctly * llama : handle temp <= 0.0 in the temp_ext sampler too ggml-ci * cont : avoid extra loop in temperature sampler for sub-zero temp ggml-ci
This commit is contained in:
parent
bc21975084
commit
55e47786e3
7 changed files with 202 additions and 218 deletions
|
@ -217,6 +217,7 @@ extern "C" {
|
|||
|
||||
typedef struct llama_token_data_array {
|
||||
// TODO: consider SoA
|
||||
// NOTE: this pointer can be modified by the samplers
|
||||
llama_token_data * data;
|
||||
size_t size;
|
||||
int64_t selected; // this is the index in the data array (i.e. not the token id)
|
||||
|
@ -1069,12 +1070,13 @@ extern "C" {
|
|||
|
||||
// available samplers:
|
||||
|
||||
LLAMA_API struct llama_sampler * llama_sampler_init_greedy (void);
|
||||
LLAMA_API struct llama_sampler * llama_sampler_init_dist (uint32_t seed);
|
||||
LLAMA_API struct llama_sampler * llama_sampler_init_greedy(void);
|
||||
LLAMA_API struct llama_sampler * llama_sampler_init_dist (uint32_t seed);
|
||||
|
||||
/// @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.
|
||||
/// NOTE: Avoid using on the full vocabulary as the sorting can become slow. For example, apply top-k or top-p sampling first.
|
||||
LLAMA_API struct llama_sampler * llama_sampler_init_softmax (void);
|
||||
DEPRECATED(LLAMA_API struct llama_sampler * llama_sampler_init_softmax (void),
|
||||
"will be removed in the future (see https://github.com/ggerganov/llama.cpp/pull/9896#discussion_r1800920915)");
|
||||
|
||||
/// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
|
||||
LLAMA_API struct llama_sampler * llama_sampler_init_top_k (int32_t k);
|
||||
|
@ -1090,6 +1092,8 @@ extern "C" {
|
|||
|
||||
/// @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
|
||||
LLAMA_API struct llama_sampler * llama_sampler_init_typical (float p, size_t min_keep);
|
||||
|
||||
/// #details Updates the logits l_i` = l_i/t. When t <= 0.0f, the maximum logit is kept at it's original value, the rest are set to -inf
|
||||
LLAMA_API struct llama_sampler * llama_sampler_init_temp (float t);
|
||||
|
||||
/// @details Dynamic temperature implementation (a.k.a. entropy) described in the paper https://arxiv.org/abs/2309.02772.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue