iq3_s: make it work on metal for QK_K = 64

Pleasent surprise: the coding was super-block size independent,
so all it took was to delete some QK_K == 256 guards.
This commit is contained in:
Iwan Kawrakow 2024-02-23 16:20:47 +02:00
parent e6e61e3158
commit cbd950b220

View file

@ -4489,7 +4489,6 @@ void kernel_mul_mv_iq3_s_f32_impl(
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
} }
#if QK_K == 256
const int ix = tiisg; const int ix = tiisg;
device const float * y4 = y + 32 * ix; device const float * y4 = y + 32 * ix;
@ -4535,12 +4534,6 @@ void kernel_mul_mv_iq3_s_f32_impl(
y4 += 32 * 32; y4 += 32 * 32;
} }
#else
(void) x;
(void) y;
(void) yl;
(void) nb32;
#endif
for (int row = 0; row < N_DST; ++row) { for (int row = 0; row < N_DST; ++row) {
all_sum = simd_sum(sumf[row]); all_sum = simd_sum(sumf[row]);