iq3_s: make it work on metal for QK_K = 64
Pleasent surprise: the coding was super-block size independent, so all it took was to delete some QK_K == 256 guards.
This commit is contained in:
parent
e6e61e3158
commit
cbd950b220
1 changed files with 0 additions and 7 deletions
|
@ -4489,7 +4489,6 @@ void kernel_mul_mv_iq3_s_f32_impl(
|
|||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
}
|
||||
|
||||
#if QK_K == 256
|
||||
const int ix = tiisg;
|
||||
|
||||
device const float * y4 = y + 32 * ix;
|
||||
|
@ -4535,12 +4534,6 @@ void kernel_mul_mv_iq3_s_f32_impl(
|
|||
|
||||
y4 += 32 * 32;
|
||||
}
|
||||
#else
|
||||
(void) x;
|
||||
(void) y;
|
||||
(void) yl;
|
||||
(void) nb32;
|
||||
#endif
|
||||
|
||||
for (int row = 0; row < N_DST; ++row) {
|
||||
all_sum = simd_sum(sumf[row]);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue