diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp index afa109262..8cdc640e8 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp @@ -16,6 +16,7 @@ void calc_superblock(const uint a_offset, const uint b_offset, const uint itid, [[unroll]] for (uint n = 0; n < num_rows; ++n) { const uint ib0 = a_offset / QUANT_K + (first_row+n)*num_blocks_per_row; + barrier(); if (!all_threads) { // when we don't have enough blocks to use all threads if (i < num_blocks_per_row) { const uint32_t scale = uint32_t(data_a[ib0 + i].scales[itid]); diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp index 58be53bb8..3116fad16 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp @@ -16,6 +16,7 @@ void calc_superblock(const uint a_offset, const uint b_offset, const uint ix, co const uint ib0 = a_offset / QUANT_K + (first_row+n)*num_blocks_per_row; if (!all_threads) { // when we don't have enough blocks to use all threads + barrier(); if (i < num_blocks_per_row) sccache[ix][v_im][itid8] = FLOAT_TYPE(int8_t(((data_a[ib0+i].scales[itid8] >> v_im4) & 0xF) | (((data_a[ib0+i].scales[itid8%4+8] >> s_shift) & 3) << 4)) - 32); barrier(); @@ -39,6 +40,7 @@ void calc_superblock(const uint a_offset, const uint b_offset, const uint ix, co const vec4 qs_u32_6 = vec4(unpack8((qs_u32 >> 6) & 0x03030303)); if (all_threads) { + barrier(); sccache[ix][v_im][itid8] = FLOAT_TYPE(int8_t(((data_a[ib0+i].scales[itid8] >> v_im4) & 0xF) | (((data_a[ib0+i].scales[itid8%4+8] >> s_shift) & 3) << 4)) - 32); barrier(); } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp index b362c46bb..f05f96b5e 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp @@ -17,6 +17,7 @@ void calc_superblock(const uint a_offset, const uint b_offset, const uint itid, const uint ib0 = a_offset / QUANT_K + (first_row+n)*num_blocks_per_row; if (!all_threads) { // when we don't have enough blocks to use all threads + barrier(); if (i < num_blocks_per_row) sccache[ix][itid] = FLOAT_TYPE(data_a[ib0 + i].scales[itid]); barrier(); @@ -50,6 +51,7 @@ void calc_superblock(const uint a_offset, const uint b_offset, const uint itid, const vec4 q3 = vec4(unpack8(q3_u32)) - 32; if (all_threads) { + barrier(); sccache[ix][itid] = FLOAT_TYPE(data_a[ib0 + i].scales[itid]); barrier(); }