do not offload scales

This commit is contained in:
slaren 2023-12-21 14:18:21 +01:00
parent a4e191f3df
commit a74b1a89b3

View file

@ -5652,8 +5652,8 @@ static const std::unordered_map<const char *, llm_offload_func_e> k_offload_map
{ "pos_embd", OFFLOAD_FUNC_NR }, { "pos_embd", OFFLOAD_FUNC_NR },
{ "inp_pos", OFFLOAD_FUNC_FRC }, // this is often used for KQ ops (e.g. rope) { "inp_pos", OFFLOAD_FUNC_FRC }, // this is often used for KQ ops (e.g. rope)
{ "Q_scale", OFFLOAD_FUNC_FRC }, { "Q_scale", OFFLOAD_FUNC_NOP },
{ "KQ_scale", OFFLOAD_FUNC_FRC }, { "KQ_scale", OFFLOAD_FUNC_NOP },
{ "KQ_mask", OFFLOAD_FUNC_FRC }, { "KQ_mask", OFFLOAD_FUNC_FRC },
{ "K_shift", OFFLOAD_FUNC_FRC }, { "K_shift", OFFLOAD_FUNC_FRC },