From 118b4f08a80eefc07a1b196786323e0c58b69a53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20O?= Date: Sat, 25 Jan 2025 12:20:35 +0100 Subject: [PATCH] Fix array length mismatches --- .../src/ggml-vulkan/vulkan-shaders/types.comp | 32 +++++++------------ 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/types.comp b/ggml/src/ggml-vulkan/vulkan-shaders/types.comp index 9e29ce16e..6b874aad4 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/types.comp @@ -383,10 +383,8 @@ shared uvec2 iq2xxs_grid[256]; void init_iq_shmem() { // copy the table into shared memory and sync - if (gl_LocalInvocationIndex.x < 32) { - for (uint i = gl_LocalInvocationIndex.x; i < 512; i += 32) { - iq2xxs_grid[i] = iq2xxs_grid_const[i]; - } + for (uint i = gl_LocalInvocationIndex.x; i < iq2xxs_grid.length(); i += gl_WorkGroupSize.x) { + iq2xxs_grid[i] = iq2xxs_grid_const[i]; } barrier(); } @@ -552,10 +550,8 @@ shared uvec2 iq2xs_grid[512]; void init_iq_shmem() { // copy the table into shared memory and sync - if (gl_LocalInvocationIndex.x < 32) { - for (uint i = gl_LocalInvocationIndex.x; i < 512; i += 32) { - iq2xs_grid[i] = iq2xs_grid_const[i]; - } + for (uint i = gl_LocalInvocationIndex.x; i < iq2xs_grid.length(); i += gl_WorkGroupSize.x) { + iq2xs_grid[i] = iq2xs_grid_const[i]; } barrier(); } @@ -843,10 +839,8 @@ shared uvec2 iq2s_grid[1024]; void init_iq_shmem() { // copy the table into shared memory and sync - if (gl_LocalInvocationIndex.x < 32) { - for (uint i = gl_LocalInvocationIndex.x; i < 1024; i += 32) { - iq2s_grid[i] = iq2s_grid_const[i]; - } + for (uint i = gl_LocalInvocationIndex.x; i < iq2s_grid.length(); i += gl_WorkGroupSize.x) { + iq2s_grid[i] = iq2s_grid_const[i]; } barrier(); } @@ -908,15 +902,13 @@ const uint32_t iq3xxs_grid_const[256] = { 0x3e1c1c1c, 0x3e1c3404, 0x3e24140c, 0x3e24240c, 0x3e2c0404, 0x3e2c0414, 0x3e2c1424, 0x3e341c04, }; -shared uint32_t iq3xxs_grid[512]; +shared uint32_t iq3xxs_grid[256]; void init_iq_shmem() { // copy the table into shared memory and sync - if (gl_LocalInvocationIndex.x < 32) { - for (uint i = gl_LocalInvocationIndex.x; i < 512; i += 32) { - iq3xxs_grid[i] = iq3xxs_grid_const[i]; - } + for (uint i = gl_LocalInvocationIndex.x; i < iq3xxs_grid.length(); i += gl_WorkGroupSize.x) { + iq3xxs_grid[i] = iq3xxs_grid_const[i]; } barrier(); } @@ -1022,10 +1014,8 @@ shared uint32_t iq3s_grid[512]; void init_iq_shmem() { // copy the table into shared memory and sync - if (gl_LocalInvocationIndex.x < 32) { - for (uint i = gl_LocalInvocationIndex.x; i < 512; i += 32) { - iq3s_grid[i] = iq3s_grid_const[i]; - } + for (uint i = gl_LocalInvocationIndex.x; i < iq3s_grid.length(); i += gl_WorkGroupSize.x) { + iq3s_grid[i] = iq3s_grid_const[i]; } barrier(); }