From 2bca812230661b8e527b4aae78cb33e45d391ed6 Mon Sep 17 00:00:00 2001
From: Eve <139727413+netrunnereve@users.noreply.github.com>
Date: Thu, 28 Nov 2024 17:38:12 -0500
Subject: [PATCH] force 16 sequential threads per block

---
 ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp
index e4ceedb1c..760aff854 100644
--- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp
+++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp
@@ -23,11 +23,11 @@ void main() {
     const uint num_blocks_per_row = p.ncols / QUANT_K;
     const uint ib0 = a_offset / QUANT_K + row*num_blocks_per_row;
 
-    uint it_size = gl_WorkGroupSize.x/16;
-
+    // 16 threads are used to process each block
+    const uint it_size = gl_WorkGroupSize.x/16;
     const uint tid = gl_LocalInvocationID.x;
-    const uint itid = tid/it_size;  // 0...16
-    const uint ix  = tid%it_size;
+    const uint itid = tid%16;  // 0...16
+    const uint ix  = tid/16;
 
     const uint step = 8;