llama : combine repetition, frequency and presence penalties in 1 call

2023-10-20 17:05:46 +03:00 · 2023-10-20 17:05:46 +03:00 · 6e6587656f
commit 6e6587656f
parent cd1e937821
5 changed files with 51 additions and 118 deletions
--- a/common/sampling.h
+++ b/common/sampling.h
@ -10,6 +10,8 @@

 // sampling parameters
 typedef struct llama_sampling_params {
+    int32_t n_prev            = 256;   // number of previous tokens to remember
+    int32_t n_probs           = 0;     // if greater than 0, output the probabilities of top n_probs tokens.
    int32_t top_k             = 40;    // <= 0 to use vocab size
    float   top_p             = 0.95f; // 1.0 = disabled
    float   tfs_z             = 1.00f; // 1.0 = disabled
@ -22,11 +24,9 @@ typedef struct llama_sampling_params {
    int32_t mirostat          = 0;     // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
    float   mirostat_tau      = 5.00f; // target entropy
    float   mirostat_eta      = 0.10f; // learning rate
-    int32_t n_prev            = 256;   // number of previous tokens to remember
-
    bool    penalize_nl       = true;  // consider newlines as a repeatable token

-    int32_t n_probs           = 0;     // if greater than 0, output the probabilities of top n_probs tokens.
+    std::string grammar;  // optional BNF-like grammar to constrain sampling

    // Classifier-Free Guidance
    // https://arxiv.org/abs/2306.17806
@ -35,8 +35,6 @@ typedef struct llama_sampling_params {

    std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens

-    std::string grammar = "";  // optional BNF-like grammar to constrain sampling
-
 } llama_sampling_params;

 // general sampler context