cuda : fix out-of-bounds-access in mul_mat_vec_q
ggml-ci
This commit is contained in:
parent
36feaeb401
commit
b7ddc8bf12
1 changed files with 3 additions and 3 deletions
|
@ -5131,10 +5131,10 @@ static __global__ void mul_mat_vec_q(const void * __restrict__ vx, const void *
|
||||||
const block_q_t * x = (const block_q_t *) vx;
|
const block_q_t * x = (const block_q_t *) vx;
|
||||||
const block_q8_1 * y = (const block_q8_1 *) vy;
|
const block_q8_1 * y = (const block_q8_1 *) vy;
|
||||||
|
|
||||||
for (int i = 0; i < blocks_per_row; i += blocks_per_warp) {
|
for (int i = threadIdx.x / (qi/vdr); i < blocks_per_row; i += blocks_per_warp) {
|
||||||
const int ibx = row*blocks_per_row + i + threadIdx.x / (qi/vdr); // x block index
|
const int ibx = row*blocks_per_row + i; // x block index
|
||||||
|
|
||||||
const int iby = (i + threadIdx.x / (qi/vdr)) * (qk/QK8_1); // y block index that aligns with ibx
|
const int iby = i * (qk/QK8_1); // y block index that aligns with ibx
|
||||||
|
|
||||||
const int iqs = vdr * (threadIdx.x % (qi/vdr)); // x block quant index when casting the quants to int
|
const int iqs = vdr * (threadIdx.x % (qi/vdr)); // x block quant index when casting the quants to int
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue