From 6669cd8329e443ca4fae635a47da6318767b0f7b Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 31 Oct 2023 08:24:07 +0200 Subject: [PATCH] llama : update offload functions for KQ tensors --- llama.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/llama.cpp b/llama.cpp index 75a74c5a4..e744fa217 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4856,12 +4856,13 @@ struct llm_offload_trie { static const std::unordered_map k_offload_map = { //{ "inp_tokens", OFFLOAD_FUNC_NR }, // TODO: missing K-quants get_rows kernel //{ "inp_embd", OFFLOAD_FUNC_NR }, // TODO: missing K-quants get_rows kernel - { "inp_pos", OFFLOAD_FUNC_NR }, { "pos_embd", OFFLOAD_FUNC_NR }, - { "KQ_mask", OFFLOAD_FUNC_NR }, - { "K_shift", OFFLOAD_FUNC_NR }, - { "K_shifted", OFFLOAD_FUNC_NR }, + { "inp_pos", OFFLOAD_FUNC_KQ }, // this is often used for KQ ops (e.g. rope) + { "KQ_scale", OFFLOAD_FUNC_KQ }, + { "KQ_mask", OFFLOAD_FUNC_KQ }, + { "K_shift", OFFLOAD_FUNC_KQ }, + { "K_shifted", OFFLOAD_FUNC_KQ }, { "inp_norm", OFFLOAD_FUNC_NR }, { "inp_norm_w", OFFLOAD_FUNC_NR },