From 5fbaf121db12aa65ca62e5bfe08a5a056ce9759f Mon Sep 17 00:00:00 2001 From: Eve <139727413+netrunnereve@users.noreply.github.com> Date: Wed, 4 Dec 2024 16:31:14 -0500 Subject: [PATCH] remove a multiply polynomial iq4_nl test (slower but keep as reference) --- ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp | 7 ++++++- ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp | 4 +++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp index 6945b0b51..5e917ffd2 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp @@ -100,6 +100,11 @@ vec4 dequantize4(uint ib, uint iqs, uint a_offset) { #endif #if defined(DATA_A_IQ4_NL) +float iq_helper(uint i) { + const float x = float(i); + return round(((0.080958*x-1.875836)*x+25.907107)*x-127.663571); +} + vec2 dequantize(uint ib, uint iqs, uint a_offset) { const float d = float(data_a[a_offset + ib].d); const uint vui = uint(data_a[a_offset + ib].qs[iqs]); @@ -107,6 +112,6 @@ vec2 dequantize(uint ib, uint iqs, uint a_offset) { } vec4 dequantize4(uint ib, uint iqs, uint a_offset) { const uint vui = uint(data_a_packed16[a_offset + ib].qs[iqs/2]); - return vec4(kvalues_iq4nl[vui & 0xF], kvalues_iq4nl[(vui >> 4) & 0xF], kvalues_iq4nl[(vui >> 8) & 0xF], kvalues_iq4nl[(vui >> 12) & 0xF]); + return vec4(iq_helper(vui & 0xF), iq_helper((vui >> 4) & 0xF), iq_helper((vui >> 8) & 0xF), iq_helper((vui >> 12) & 0xF)); } #endif diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp index 433da8593..bca7abf3f 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp @@ -53,8 +53,10 @@ void iter(inout FLOAT_TYPE temp[NUM_ROWS], const uint first_row, const uint num_ b1 = FLOAT_TYPE(data_b[b_offset + iybs + iqs + y_offset]); } #endif + uint ibi = first_row*p.ncols; [[unroll]] for (uint n = 0; n < num_rows; ++n) { - const uint ib = ((first_row + n)*p.ncols + col)/QUANT_K; // block index + const uint ib = (ibi + col)/QUANT_K; // block index + ibi += p.ncols; #if K_PER_ITER == 8 // TODO: can we dequant as f16 instead of as vec?