From a74b1a89b380a2012365eb581c42f2c4edd66944 Mon Sep 17 00:00:00 2001
From: slaren <slarengh@gmail.com>
Date: Thu, 21 Dec 2023 14:18:21 +0100
Subject: [PATCH] do not offload scales

---
 llama.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llama.cpp b/llama.cpp
index 0c9d3f745..2ba6d78c7 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -5652,8 +5652,8 @@ static const std::unordered_map<const char *, llm_offload_func_e> k_offload_map
     { "pos_embd",                   OFFLOAD_FUNC_NR  },
 
     { "inp_pos",                    OFFLOAD_FUNC_FRC }, // this is often used for KQ ops (e.g. rope)
-    { "Q_scale",                    OFFLOAD_FUNC_FRC },
-    { "KQ_scale",                   OFFLOAD_FUNC_FRC },
+    { "Q_scale",                    OFFLOAD_FUNC_NOP },
+    { "KQ_scale",                   OFFLOAD_FUNC_NOP },
     { "KQ_mask",                    OFFLOAD_FUNC_FRC },
     { "K_shift",                    OFFLOAD_FUNC_FRC },