fix the unalign size
This commit is contained in:
parent
d9b89eb308
commit
127d62fa06
1 changed files with 23 additions and 19 deletions
|
@ -112,6 +112,7 @@ static void dequantize_mul_mat_vec_q4_0(const void * __restrict__ vx, const dflo
|
||||||
int constexpr Unroll = 2;
|
int constexpr Unroll = 2;
|
||||||
const int iqs = tid; // x quant index
|
const int iqs = tid; // x quant index
|
||||||
int ncols_pad = ncols - ncols % (WarpK * Unroll);
|
int ncols_pad = ncols - ncols % (WarpK * Unroll);
|
||||||
|
int ncols_pad1 = ncols - ncols % (WarpK * 1);
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for (; i < ncols_pad; i += WarpK * Unroll) {
|
for (; i < ncols_pad; i += WarpK * Unroll) {
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
|
@ -140,7 +141,9 @@ static void dequantize_mul_mat_vec_q4_0(const void * __restrict__ vx, const dflo
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (; i < ncols_pad; i += WarpK * 1) {
|
if (i + WarpK <= ncols_pad1)
|
||||||
|
{
|
||||||
|
for (; i < ncols_pad1; i += WarpK * 1) {
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int iu = 0; iu < 1; iu++)
|
for (int iu = 0; iu < 1; iu++)
|
||||||
{
|
{
|
||||||
|
@ -167,6 +170,7 @@ static void dequantize_mul_mat_vec_q4_0(const void * __restrict__ vx, const dflo
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#if 1
|
#if 1
|
||||||
for (; i < ncols; i += QK4_0) {
|
for (; i < ncols; i += QK4_0) {
|
||||||
const int iybs = i; // y block start index
|
const int iybs = i; // y block start index
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue