Fixed dmmv dequant for k<= GGML_SYCL_DMMV_X
This commit is contained in:
parent
b60074f1c2
commit
3ba9d04c2a
1 changed files with 2 additions and 0 deletions
|
@ -47,6 +47,7 @@ static void dequantize_mul_mat_vec(const void * __restrict__ vx, const dfloat *
|
||||||
|
|
||||||
for (int i = 0; i < ncols; i += iter_stride) {
|
for (int i = 0; i < ncols; i += iter_stride) {
|
||||||
const int col = i + vals_per_iter*tid;
|
const int col = i + vals_per_iter*tid;
|
||||||
|
if (col >= ncols) break;
|
||||||
const int ib = (row*ncols + col)/qk; // x block index
|
const int ib = (row*ncols + col)/qk; // x block index
|
||||||
const int iqs = (col%qk)/qr; // x quant index
|
const int iqs = (col%qk)/qr; // x quant index
|
||||||
const int iybs = col - col%qk; // y block start index
|
const int iybs = col - col%qk; // y block start index
|
||||||
|
@ -54,6 +55,7 @@ static void dequantize_mul_mat_vec(const void * __restrict__ vx, const dfloat *
|
||||||
// processing >2 values per i iter is faster for fast GPUs
|
// processing >2 values per i iter is faster for fast GPUs
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int j = 0; j < vals_per_iter; j += 2) {
|
for (int j = 0; j < vals_per_iter; j += 2) {
|
||||||
|
if (col + j >= ncols) break;
|
||||||
// process 2 vals per j iter
|
// process 2 vals per j iter
|
||||||
|
|
||||||
// dequantize
|
// dequantize
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue