From a74b1a89b380a2012365eb581c42f2c4edd66944 Mon Sep 17 00:00:00 2001 From: slaren Date: Thu, 21 Dec 2023 14:18:21 +0100 Subject: [PATCH] do not offload scales --- llama.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama.cpp b/llama.cpp index 0c9d3f745..2ba6d78c7 100644 --- a/llama.cpp +++ b/llama.cpp @@ -5652,8 +5652,8 @@ static const std::unordered_map k_offload_map { "pos_embd", OFFLOAD_FUNC_NR }, { "inp_pos", OFFLOAD_FUNC_FRC }, // this is often used for KQ ops (e.g. rope) - { "Q_scale", OFFLOAD_FUNC_FRC }, - { "KQ_scale", OFFLOAD_FUNC_FRC }, + { "Q_scale", OFFLOAD_FUNC_NOP }, + { "KQ_scale", OFFLOAD_FUNC_NOP }, { "KQ_mask", OFFLOAD_FUNC_FRC }, { "K_shift", OFFLOAD_FUNC_FRC },