fix the unalign size

This commit is contained in:
luoyu-intel 2024-07-15 15:15:41 +08:00
parent d9b89eb308
commit 127d62fa06

View file

@ -112,6 +112,7 @@ static void dequantize_mul_mat_vec_q4_0(const void * __restrict__ vx, const dflo
int constexpr Unroll = 2;
const int iqs = tid; // x quant index
int ncols_pad = ncols - ncols % (WarpK * Unroll);
int ncols_pad1 = ncols - ncols % (WarpK * 1);
int i = 0;
for (; i < ncols_pad; i += WarpK * Unroll) {
#pragma unroll
@ -140,7 +141,9 @@ static void dequantize_mul_mat_vec_q4_0(const void * __restrict__ vx, const dflo
}
}
}
for (; i < ncols_pad; i += WarpK * 1) {
if (i + WarpK <= ncols_pad1)
{
for (; i < ncols_pad1; i += WarpK * 1) {
#pragma unroll
for (int iu = 0; iu < 1; iu++)
{
@ -167,6 +170,7 @@ static void dequantize_mul_mat_vec_q4_0(const void * __restrict__ vx, const dflo
}
}
}
}
#if 1
for (; i < ncols; i += QK4_0) {
const int iybs = i; // y block start index