From fde165e9685574597cfa6d11fed7f2d975373572 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 8 Aug 2024 14:02:14 +0200 Subject: [PATCH] default n_swa for phi-3 --- src/llama.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index a7b1c9ebd..33223d73a 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -4892,7 +4892,6 @@ static void llm_load_hparams( } break; case LLM_ARCH_PHI3: { - ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa); ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); switch (hparams.n_layer) { @@ -4901,6 +4900,17 @@ static void llm_load_hparams( case 40: model.type = e_model::MODEL_14B; break; default: model.type = e_model::MODEL_UNKNOWN; } + + if ((hparams.n_layer == 32 || hparams.n_layer == 40)) { + if (hparams.n_ctx_train == 4096) { + // default value for Phi-3-mini-4k-instruct and Phi-3-medium-4k-instruct + hparams.n_swa = 2047; + } else if (hparams.n_ctx_train == 131072) { + // default value for Phi-3-mini-128k-instruct and Phi-3-medium-128k-instruct + hparams.n_swa = 131072; + } + } + ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa, false); } break; case LLM_ARCH_PLAMO: {