CUDA: revise q8_1 data layout for mul_mat_q (#7824)
This commit is contained in:
parent
2decf57bc6
commit
42b53d192f
5 changed files with 282 additions and 151 deletions
|
@ -11,6 +11,7 @@ void ggml_cuda_op_mul_mat_q(
|
|||
const int64_t nb01 = src0->nb[1];
|
||||
|
||||
const int64_t ne10 = src1->ne[0];
|
||||
const int64_t ne11 = src1->ne[1];
|
||||
GGML_ASSERT(ne10 % QK8_1 == 0);
|
||||
|
||||
const int64_t ne0 = dst->ne[0];
|
||||
|
@ -25,7 +26,7 @@ void ggml_cuda_op_mul_mat_q(
|
|||
// nrows_dst == nrows of the matrix that the kernel writes into
|
||||
const int64_t nrows_dst = id == ctx.device ? ne0 : row_diff;
|
||||
|
||||
const mmq_args args = {src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stride00, src1_padded_row_size, src1_ncols, nrows_dst};
|
||||
const mmq_args args = {src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stride00, src1_padded_row_size, src1_ncols, ne11, nrows_dst};
|
||||
|
||||
switch (src0->type) {
|
||||
case GGML_TYPE_Q4_0:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue