CUDA: fix MMQ for non-contiguous src0, add tests (#10021)
* CUDA: fix MMQ for non-contiguous src0, add tests * revise test code
This commit is contained in:
parent
0a1c750c80
commit
c39665f589
4 changed files with 73 additions and 29 deletions
|
@ -8,8 +8,6 @@ void ggml_cuda_op_mul_mat_q(
|
|||
|
||||
const int64_t ne00 = src0->ne[0];
|
||||
|
||||
const int64_t nb01 = src0->nb[1];
|
||||
|
||||
const int64_t ne10 = src1->ne[0];
|
||||
const int64_t ne11 = src1->ne[1];
|
||||
GGML_ASSERT(ne10 % QK8_1 == 0);
|
||||
|
@ -17,7 +15,7 @@ void ggml_cuda_op_mul_mat_q(
|
|||
const int64_t ne0 = dst->ne[0];
|
||||
|
||||
const int64_t row_diff = row_high - row_low;
|
||||
const int64_t stride00 = nb01 / ggml_type_size(src0->type);
|
||||
const int64_t stride00 = ne00 / ggml_blck_size(src0->type);
|
||||
|
||||
int id = ggml_cuda_get_device();
|
||||
const int compute_capability = ggml_cuda_info().devices[id].cc;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue