llama : combine repetition, frequency and presence penalties in 1 call

This commit is contained in:
Georgi Gerganov 2023-10-20 17:05:46 +03:00
parent cd1e937821
commit 6e6587656f
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
5 changed files with 51 additions and 118 deletions

View file

@ -10,6 +10,8 @@
// sampling parameters
typedef struct llama_sampling_params {
int32_t n_prev = 256; // number of previous tokens to remember
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
int32_t top_k = 40; // <= 0 to use vocab size
float top_p = 0.95f; // 1.0 = disabled
float tfs_z = 1.00f; // 1.0 = disabled
@ -22,11 +24,9 @@ typedef struct llama_sampling_params {
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
float mirostat_tau = 5.00f; // target entropy
float mirostat_eta = 0.10f; // learning rate
int32_t n_prev = 256; // number of previous tokens to remember
bool penalize_nl = true; // consider newlines as a repeatable token
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
std::string grammar; // optional BNF-like grammar to constrain sampling
// Classifier-Free Guidance
// https://arxiv.org/abs/2306.17806
@ -35,8 +35,6 @@ typedef struct llama_sampling_params {
std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens
std::string grammar = ""; // optional BNF-like grammar to constrain sampling
} llama_sampling_params;
// general sampler context