From 046c0d77a9f04269171aef16db5a7ad56f075e67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Szymczyk?= Date: Sat, 14 Dec 2024 12:00:19 +0100 Subject: [PATCH] llama : use zero value of n_swa to distinguish Phi-4 from other PHI3 models --- src/llama.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llama.cpp b/src/llama.cpp index 67d75bb08..7b16daaa5 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -5807,7 +5807,7 @@ static void llm_load_hparams( hparams.n_swa = 131072; } bool found_swa = ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa, false); - if (!found_swa && hparams.n_swa == 0 && model.name != "Phi 4") { + if (!found_swa && hparams.n_swa == 0) { throw std::runtime_error("invalid value for sliding_window"); } } break; @@ -12840,7 +12840,7 @@ struct llm_build_context { // KQ_mask (mask for 1 head, it will be broadcasted to all heads) struct ggml_tensor * KQ_mask = nullptr; - if (model.name == "Phi 4") { + if (hparams.n_swa == 0) { // Phi-4 doesn't use sliding window attention KQ_mask = build_inp_KQ_mask(); } else {