From c3efd7df73ebfe8fe371b2edf629a8cf089aaf7e Mon Sep 17 00:00:00 2001 From: Eve <139727413+netrunnereve@users.noreply.github.com> Date: Wed, 1 Jan 2025 16:50:37 -0500 Subject: [PATCH] Revert "subgroup iq4_nl, 3% slower than original" This reverts commit 1d949a62c63d1bd3e53df27de3f9eb0a5de83205. --- ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp | 6 ++---- ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp | 6 ++---- ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp | 4 +--- ggml/src/ggml-vulkan/vulkan-shaders/types.comp | 7 +++---- 4 files changed, 8 insertions(+), 15 deletions(-) diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp index 35eb0c05c..91bb8f8db 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp @@ -1,8 +1,6 @@ #if !defined(DATA_A_F32) && !defined(DATA_A_F16) #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require #endif -#extension GL_KHR_shader_subgroup_shuffle : require -#extension GL_EXT_shader_subgroup_extended_types_float16 : require #include "types.comp" @@ -93,11 +91,11 @@ vec4 dequantize4(uint ib, uint iqs, uint a_offset) { #if defined(DATA_A_IQ4_NL) vec2 dequantize(uint ib, uint iqs, uint a_offset) { const uint vui = uint(data_a[a_offset + ib].qs[iqs]); - return vec2(subgroupShuffle(kvalues_iq4nl, vui & 0xF), subgroupShuffle(kvalues_iq4nl, vui >> 4)); + return vec2(kvalues_iq4nl[vui & 0xF], kvalues_iq4nl[vui >> 4]); } vec4 dequantize4(uint ib, uint iqs, uint a_offset) { const uint vui = uint(data_a_packed16[a_offset + ib].qs[iqs/2]); - return vec4(subgroupShuffle(kvalues_iq4nl, vui & 0xF), subgroupShuffle(kvalues_iq4nl, (vui >> 4) & 0xF), subgroupShuffle(kvalues_iq4nl, (vui >> 8) & 0xF), subgroupShuffle(kvalues_iq4nl, vui >> 12)); + return vec4(kvalues_iq4nl[vui & 0xF], kvalues_iq4nl[(vui >> 4) & 0xF], kvalues_iq4nl[(vui >> 8) & 0xF], kvalues_iq4nl[vui >> 12]); } #endif diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp index 2303031d0..8de14fc03 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp @@ -1,8 +1,6 @@ #version 450 #include "dequant_head.comp" -#extension GL_KHR_shader_subgroup_shuffle : require -#extension GL_EXT_shader_subgroup_extended_types_float16 : require layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; @@ -28,7 +26,7 @@ void main() { const float d = float(data_a[ib].d); [[unroll]] for (uint l = 0; l < 8; ++l) { - data_b[b_idx + l + 0] = D_TYPE(d * subgroupShuffle(kvalues_iq4nl, data_a[ib].qs[q_idx + l] & 0xF)); - data_b[b_idx + l + 16] = D_TYPE(d * subgroupShuffle(kvalues_iq4nl, data_a[ib].qs[q_idx + l] >> 4)); + data_b[b_idx + l + 0] = D_TYPE(d * kvalues_iq4nl[data_a[ib].qs[q_idx + l] & 0xF]); + data_b[b_idx + l + 16] = D_TYPE(d * kvalues_iq4nl[data_a[ib].qs[q_idx + l] >> 4]); } } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp index e41194894..48122cbef 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp @@ -2,8 +2,6 @@ #extension GL_EXT_control_flow_attributes : enable #extension GL_EXT_shader_16bit_storage : require -#extension GL_KHR_shader_subgroup_shuffle : require -#extension GL_EXT_shader_subgroup_extended_types_float16 : require #ifdef FLOAT16 #extension GL_EXT_shader_explicit_arithmetic_types_float16 : require @@ -450,7 +448,7 @@ void main() { const float d = float(data_a[ib].d); const uint vui = uint(data_a[ib].qs[iqs]); - const vec2 v = vec2(subgroupShuffle(kvalues_iq4nl, vui & 0xF), subgroupShuffle(kvalues_iq4nl, vui >> 4)) * d; + const vec2 v = vec2(kvalues_iq4nl[vui & 0xF], kvalues_iq4nl[vui >> 4]) * d; buf_a[buf_idx ] = FLOAT_TYPE(v.x); buf_a[buf_idx + 16] = FLOAT_TYPE(v.y); diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/types.comp b/ggml/src/ggml-vulkan/vulkan-shaders/types.comp index ea576444f..04698cb4c 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/types.comp @@ -3,7 +3,6 @@ #define GGML_TYPES_COMP #extension GL_EXT_shader_explicit_arithmetic_types : require -#extension GL_KHR_shader_subgroup_basic : require #if defined(DATA_A_F32) #define QUANT_K 1 @@ -306,13 +305,13 @@ const int8_t kvalues_iq4nl_const[16] = { int8_t(1), int8_t(13), int8_t(25), int8_t(38), int8_t(53), int8_t(69), int8_t(89), int8_t(113) }; -FLOAT_TYPE kvalues_iq4nl = FLOAT_TYPE(0); +shared FLOAT_TYPE kvalues_iq4nl[16]; void init_iq4nl_shmem() { // copy the table into shared memory and sync - if (gl_SubgroupInvocationID < 16) { - kvalues_iq4nl = FLOAT_TYPE(kvalues_iq4nl_const[gl_SubgroupInvocationID]); + if (gl_LocalInvocationIndex.x < 16) { + kvalues_iq4nl[gl_LocalInvocationIndex.x] = FLOAT_TYPE(kvalues_iq4nl_const[gl_LocalInvocationIndex.x]); } barrier(); }