diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 3ef2f69e7..27fc9eea6 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -2369,6 +2369,7 @@ class Gemma2Model(Model):
         self.gguf_writer.add_final_logit_softcapping(
             self.hparams["final_logit_softcapping"]
         )
+        self.gguf_writer.add_sliding_window(self.hparams["sliding_window"])
 
     def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
         del bid  # unusem
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 9bfa891d5..e87c58266 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -66,6 +66,7 @@ class Keys:
         Q_LORA_RANK       = "{arch}.attention.q_lora_rank"
         KV_LORA_RANK      = "{arch}.attention.kv_lora_rank"
         REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
+        SLIDING_WINDOW    = "{arch}.attention.sliding_window"
 
     class Rope:
         DIMENSION_COUNT         = "{arch}.rope.dimension_count"
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index 1aeb0d9b0..75a8b2636 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -552,6 +552,9 @@ class GGUFWriter:
     def add_relative_attn_buckets_count(self, value: int) -> None:
         self.add_uint32(Keys.Attention.REL_BUCKETS_COUNT.format(arch=self.arch), value)
 
+    def add_sliding_window(self, value: int) -> None:
+        self.add_uint32(Keys.Attention.SLIDING_WINDOW.format(arch=self.arch), value)
+
     def add_pooling_type(self, value: PoolingType) -> None:
         self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value.value)
 
diff --git a/src/llama.cpp b/src/llama.cpp
index 6838a6fc7..d8852cfe4 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -287,7 +287,6 @@ enum llm_kv {
 
     LLM_KV_VOCAB_SIZE,
     LLM_KV_CONTEXT_LENGTH,
-    LLM_KV_CONTEXT_LENGTH_SWA,
     LLM_KV_EMBEDDING_LENGTH,
     LLM_KV_BLOCK_COUNT,
     LLM_KV_LEADING_DENSE_BLOCK_COUNT,
@@ -318,6 +317,7 @@ enum llm_kv {
     LLM_KV_ATTENTION_Q_LORA_RANK,
     LLM_KV_ATTENTION_KV_LORA_RANK,
     LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
+    LLM_KV_ATTENTION_SLIDING_WINDOW,
 
     LLM_KV_ROPE_DIMENSION_COUNT,
     LLM_KV_ROPE_FREQ_BASE,
@@ -380,7 +380,6 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
 
     { LLM_KV_VOCAB_SIZE,                        "%s.vocab_size"                        },
     { LLM_KV_CONTEXT_LENGTH,                    "%s.context_length"                    },
-    { LLM_KV_CONTEXT_LENGTH_SWA,                "%s.context_length_swa"                },
     { LLM_KV_EMBEDDING_LENGTH,                  "%s.embedding_length"                  },
     { LLM_KV_BLOCK_COUNT,                       "%s.block_count"                       },
     { LLM_KV_LEADING_DENSE_BLOCK_COUNT,         "%s.leading_dense_block_count"         },
@@ -411,6 +410,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
     { LLM_KV_ATTENTION_Q_LORA_RANK,            "%s.attention.q_lora_rank"            },
     { LLM_KV_ATTENTION_KV_LORA_RANK,           "%s.attention.kv_lora_rank"           },
     { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
+    { LLM_KV_ATTENTION_SLIDING_WINDOW,         "%s.attention.sliding_window"         },
 
     { LLM_KV_ROPE_DIMENSION_COUNT,          "%s.rope.dimension_count"                 },
     { LLM_KV_ROPE_FREQ_BASE,                "%s.rope.freq_base"                       },
@@ -2082,7 +2082,6 @@ struct llama_hparams {
 
     uint32_t n_vocab;
     uint32_t n_ctx_train;    // context size the model was trained on
-    int32_t  n_ctx_swa = -1; // context size for sliding window attention (SWA)
     uint32_t n_embd;
     uint32_t n_head;
     uint32_t n_head_kv;
@@ -2102,6 +2101,7 @@ struct llama_hparams {
     uint32_t n_ff_shexp = 0;
     uint32_t n_expert_shared = 0;
     float    expert_weights_scale = 0.0;
+    uint32_t n_sliding = 0; // sliding window attention (SWA)
 
     float f_norm_eps;
     float f_norm_rms_eps;
@@ -4715,8 +4715,8 @@ static void llm_load_hparams(
             } break;
         case LLM_ARCH_GEMMA2:
             {
-                hparams.n_ctx_swa = 4096; // default value
-                ml.get_key(LLM_KV_CONTEXT_LENGTH_SWA, hparams.n_ctx_swa, false);
+                hparams.n_sliding = 4096; // default value of gemma 2
+                ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_sliding, false);
                 ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
                 ml.get_key(LLM_KV_ATTN_LOGIT_SOFTCAPPING, hparams.f_attn_logit_softcapping, false);
                 ml.get_key(LLM_KV_FINAL_LOGIT_SOFTCAPPING, hparams.f_final_logit_softcapping, false);
@@ -12687,11 +12687,11 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
 
             float * data = (float *) lctx.inp_KQ_mask->data;
             float * data_swa = nullptr;
-            const llama_pos n_keep_swa = hparams.n_ctx_swa - batch.n_tokens;
+            const llama_pos n_keep_swa = hparams.n_sliding - batch.n_tokens;
 
             if (lctx.model.arch == LLM_ARCH_GEMMA2) {
                 GGML_ASSERT(!lctx.inp_KQ_mask_l.empty() && "gemma 2 requires different KQ mask per layer");
-                GGML_ASSERT(hparams.n_ctx_swa > 0);
+                GGML_ASSERT(hparams.n_sliding > 0);
                 data_swa = (float *) lctx.inp_KQ_mask_l[0]->data;
                 data     = (float *) lctx.inp_KQ_mask_l[1]->data;
                 // because layer masks are alternate for gemma 2, we only need to take first 2 layers