common : by default, move the penalties at the end of the sampling chain

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-12-12 22:29:09 +02:00
parent 9847a375f3
commit 97261aa216
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
2 changed files with 12 additions and 9 deletions

View file

@ -146,6 +146,7 @@ struct common_params_sampling {
COMMON_SAMPLER_TYPE_MIN_P,
COMMON_SAMPLER_TYPE_XTC,
COMMON_SAMPLER_TYPE_TEMPERATURE,
COMMON_SAMPLER_TYPE_PENALTIES,
};
std::string grammar; // optional BNF-like grammar to constrain sampling
@ -193,11 +194,13 @@ struct common_params {
float defrag_thold = 0.1f; // KV cache defragmentation threshold
// offload params
std::vector<ggml_backend_dev_t> devices; // devices to use for offloading
int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default)
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs
enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs
std::vector<ggml_backend_dev_t> devices; // devices to use for offloading
int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default)
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs
enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs
struct cpu_params cpuparams;
struct cpu_params cpuparams_batch;

View file

@ -164,17 +164,17 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
if (params.mirostat == 0) {
for (const auto & cnstr : params.samplers) {
switch (cnstr) {
case COMMON_SAMPLER_TYPE_DRY:
case COMMON_SAMPLER_TYPE_DRY:
{
std::vector<const char*> c_breakers;
std::vector<const char *> c_breakers;
c_breakers.reserve(params.dry_sequence_breakers.size());
for (const auto& str : params.dry_sequence_breakers) {
for (const auto & str : params.dry_sequence_breakers) {
c_breakers.push_back(str.c_str());
}
llama_sampler_chain_add(result->chain, llama_sampler_init_dry (model, params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
}
break;
break;
case COMMON_SAMPLER_TYPE_TOP_K:
llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k));
break;