common : by default, move the penalties at the end of the sampling chain

ggml-ci
2024-12-12 22:29:09 +02:00 · 2024-12-12 22:29:09 +02:00 · 97261aa216
commit 97261aa216
parent 9847a375f3
2 changed files with 12 additions and 9 deletions
--- a/common/common.h
+++ b/common/common.h
@ -146,6 +146,7 @@ struct common_params_sampling {
        COMMON_SAMPLER_TYPE_MIN_P,
        COMMON_SAMPLER_TYPE_XTC,
        COMMON_SAMPLER_TYPE_TEMPERATURE,
+        COMMON_SAMPLER_TYPE_PENALTIES,
    };

    std::string grammar; // optional BNF-like grammar to constrain sampling
@ -194,9 +195,11 @@ struct common_params {

    // offload params
    std::vector<ggml_backend_dev_t> devices; // devices to use for offloading
+
    int32_t n_gpu_layers      = -1;  // number of layers to store in VRAM (-1 - use default)
    int32_t main_gpu          = 0;   // the GPU that is used for scratch and small tensors
    float   tensor_split[128] = {0}; // how split tensors should be distributed across GPUs
+
    enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs

    struct cpu_params cpuparams;
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@ -166,9 +166,9 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
            switch (cnstr) {
                case COMMON_SAMPLER_TYPE_DRY:
                    {
-                        std::vector<const char*> c_breakers;
+                        std::vector<const char *> c_breakers;
                        c_breakers.reserve(params.dry_sequence_breakers.size());
-                        for (const auto& str : params.dry_sequence_breakers) {
+                        for (const auto & str : params.dry_sequence_breakers) {
                            c_breakers.push_back(str.c_str());
                        }