sampling : refactor init to use llama_sampling_params (#3696)

* sampling : refactor init to use llama_sampling_params * llama : combine repetition, frequency and presence penalties in 1 call * examples : remove embd-input and gptneox-wip * sampling : rename penalty params + reduce size of "prev" vector * sampling : add llama_sampling_print helper * sampling : hide prev behind API and apply #3661 ggml-ci
2023-10-20 21:07:23 +03:00 · 2023-10-20 21:07:23 +03:00 · d1031cf49c
commit d1031cf49c
parent 8cf19d60dc
30 changed files with 365 additions and 4502 deletions
--- a/common/common.h
+++ b/common/common.h
@ -56,7 +56,7 @@ struct gpt_params {
    float   rope_freq_scale                 = 0.0f; // RoPE frequency scaling factor

    // // sampling parameters
-    struct llama_sampling_params sampling_params;
+    struct llama_sampling_params sparams;

    std::string model             = "models/7B/ggml-model-f16.gguf"; // model path
    std::string model_draft       = "";                              // draft model for speculative decoding
@ -66,7 +66,6 @@ struct gpt_params {
    std::string path_prompt_cache = "";  // path to file for saving/loading prompt eval state
    std::string input_prefix      = "";  // string to prefix user inputs with
    std::string input_suffix      = "";  // string to suffix user inputs with
-    std::string grammar           = "";  // optional BNF-like grammar to constrain sampling
    std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted
    std::string logdir            = "";  // directory in which to save YAML log files