From 97261aa216922172200f8ffd8729747a5185b6cf Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 12 Dec 2024 22:29:09 +0200 Subject: [PATCH] common : by default, move the penalties at the end of the sampling chain ggml-ci --- common/common.h | 13 ++++++++----- common/sampling.cpp | 8 ++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/common/common.h b/common/common.h index 8d1bbeae1..98b177e78 100644 --- a/common/common.h +++ b/common/common.h @@ -146,6 +146,7 @@ struct common_params_sampling { COMMON_SAMPLER_TYPE_MIN_P, COMMON_SAMPLER_TYPE_XTC, COMMON_SAMPLER_TYPE_TEMPERATURE, + COMMON_SAMPLER_TYPE_PENALTIES, }; std::string grammar; // optional BNF-like grammar to constrain sampling @@ -193,11 +194,13 @@ struct common_params { float defrag_thold = 0.1f; // KV cache defragmentation threshold // offload params - std::vector devices; // devices to use for offloading - int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default) - int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors - float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs - enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs + std::vector devices; // devices to use for offloading + + int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default) + int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors + float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs + + enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs struct cpu_params cpuparams; struct cpu_params cpuparams_batch; diff --git a/common/sampling.cpp b/common/sampling.cpp index 6ba57cc20..e83a971c7 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -164,17 +164,17 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co if (params.mirostat == 0) { for (const auto & cnstr : params.samplers) { switch (cnstr) { - case COMMON_SAMPLER_TYPE_DRY: + case COMMON_SAMPLER_TYPE_DRY: { - std::vector c_breakers; + std::vector c_breakers; c_breakers.reserve(params.dry_sequence_breakers.size()); - for (const auto& str : params.dry_sequence_breakers) { + for (const auto & str : params.dry_sequence_breakers) { c_breakers.push_back(str.c_str()); } llama_sampler_chain_add(result->chain, llama_sampler_init_dry (model, params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size())); } - break; + break; case COMMON_SAMPLER_TYPE_TOP_K: llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k)); break;