llama : fix defrag bugs + add parameter (#5735)

* llama : fix defrag bugs + enable by default

ggml-ci

* llama : add defrag_thold parameter

ggml-ci

* llama : cont

* llama : disable log message

ggml-ci

* llama : fix graph size check during defrag
This commit is contained in:
Georgi Gerganov 2024-02-27 14:35:51 +02:00 committed by GitHub
parent cbbd1efa06
commit 9d533a77d0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 82 additions and 30 deletions

View file

@ -75,6 +75,7 @@ struct gpt_params {
float yarn_beta_fast = 32.0f; // YaRN low correction dim
float yarn_beta_slow = 1.0f; // YaRN high correction dim
int32_t yarn_orig_ctx = 0; // YaRN original context length
float defrag_thold = -1.0f; // KV cache defragmentation threshold
int32_t rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;