From 3b3d30e4ade98185b47dd781a7c7b2e82b0353a7 Mon Sep 17 00:00:00 2001 From: niansa Date: Thu, 22 Jun 2023 13:55:25 +0200 Subject: [PATCH] Cleanups --- ggml-vulkan.cpp | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index 45f426a2f..706a0ffee 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -9,6 +9,10 @@ #include #include +#ifndef __STDC_IEC_559__ +#error Your C implementation is not IEC 559 compliant, which is required for proper Vulkan interop. +#endif + typedef ggml_fp16_t half; #define MULTILINE_QUOTE(...) #__VA_ARGS__ @@ -53,25 +57,20 @@ layout(binding = 0) buffer tensorBlockQ4_0D { float16_t x_d[]; }; layout(binding = 1) buffer tensorBlockQ4_0QS { uint8_t x_qs[]; }; layout(binding = 2) buffer tensorY { float y[]; }; -// Push constants -layout(push_constant) uniform PushConstants { - int k; -} pcs; - void main() { const int qk = QK4_0; const int i = int(gl_GlobalInvocationID.x); const int j = int(gl_GlobalInvocationID.y); - const float16_t d = x_d[i]; - const uint8_t qs = x_qs[i * (QK4_0 / 2) + j]; + const float d = float(x_d[i]); + const uint8_t qs = x_qs[i * (qk / 2) + j]; const int x0 = (qs & 0x0F) - 8; const int x1 = (qs >> 4) - 8; - y[i*qk + j + 0 ] = float16_t(x0)*d; - y[i*qk + j + qk/2] = float16_t(x1)*d; + y[i*qk + j + 0 ] = float(x0)*d; + y[i*qk + j + qk/2] = float(x1)*d; } ); @@ -97,20 +96,20 @@ void ggml_vk_dequantize_row_q4_0(const void *x_, float *y, int k) { const auto x = reinterpret_cast(x_); + assert(k % qk == 0); + auto getVecBlockQ4_0D = [] (const block_q4_0 *x) { - std::vector fres; - fres.reserve(nb); + std::vector fres(nb); for (unsigned it = 0; it != nb; it++) { - fres.push_back(x[it].d); + fres[it] = x[it].d; } return fres; }; auto getVecBlockQ4_0QS = [] (const block_q4_0 *x) { - std::vector fres; - fres.resize(nb*(qk/2)); + std::vector fres(nb*(qk/2)); for (unsigned x_it = 0; x_it != nb; x_it++) { for (unsigned qs_it = 0; qs_it != qk / 2; qs_it++) { - fres.push_back(x[x_it].qs[qs_it]); + fres[x_it * (qk / 2) + qs_it] = x[x_it].qs[qs_it]; } } return fres; @@ -120,15 +119,9 @@ void ggml_vk_dequantize_row_q4_0(const void *x_, float *y, int k) { const auto tensorBlockQ4_0QS = mgr.tensorT(getVecBlockQ4_0QS(x)); const auto tensorY = mgr.tensor(std::vector(y, y+y_size)); - struct PushConsts { - int k; - } pushConsts { - k - }; - mgr.sequence() ->record({tensorBlockQ4_0D, tensorBlockQ4_0QS, tensorY}) - ->record(mgr.algorithm({tensorBlockQ4_0D, tensorBlockQ4_0QS, tensorY}, spirv, {nb, qk/2, 0}, {}, {0}), std::vector{pushConsts}) + ->record(mgr.algorithm({tensorBlockQ4_0D, tensorBlockQ4_0QS, tensorY}, spirv, {nb, qk/2, 0})) ->record({tensorY}) ->eval();