From 6669cd8329e443ca4fae635a47da6318767b0f7b Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Tue, 31 Oct 2023 08:24:07 +0200
Subject: [PATCH] llama : update offload functions for KQ tensors

---
 llama.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/llama.cpp b/llama.cpp
index 75a74c5a4..e744fa217 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -4856,12 +4856,13 @@ struct llm_offload_trie {
 static const std::unordered_map<const char *, llm_offload_func_e> k_offload_map = {
   //{ "inp_tokens",                 OFFLOAD_FUNC_NR  }, // TODO: missing K-quants get_rows kernel
   //{ "inp_embd",                   OFFLOAD_FUNC_NR  }, // TODO: missing K-quants get_rows kernel
-    { "inp_pos",                    OFFLOAD_FUNC_NR  },
     { "pos_embd",                   OFFLOAD_FUNC_NR  },
 
-    { "KQ_mask",                    OFFLOAD_FUNC_NR  },
-    { "K_shift",                    OFFLOAD_FUNC_NR  },
-    { "K_shifted",                  OFFLOAD_FUNC_NR  },
+    { "inp_pos",                    OFFLOAD_FUNC_KQ  }, // this is often used for KQ ops (e.g. rope)
+    { "KQ_scale",                   OFFLOAD_FUNC_KQ  },
+    { "KQ_mask",                    OFFLOAD_FUNC_KQ  },
+    { "K_shift",                    OFFLOAD_FUNC_KQ  },
+    { "K_shifted",                  OFFLOAD_FUNC_KQ  },
 
     { "inp_norm",                   OFFLOAD_FUNC_NR  },
     { "inp_norm_w",                 OFFLOAD_FUNC_NR  },