CUDA: stream-k decomposition for MMQ (#8018)
* CUDA: stream-k decomposition for MMQ * fix undefined memory reads for small matrices
This commit is contained in:
parent
2075a66a96
commit
d50f8897a7
4 changed files with 292 additions and 113 deletions
|
@ -635,7 +635,7 @@ static int64_t get_row_rounding(const std::array<float, GGML_CUDA_MAX_DEVICES> &
|
|||
}
|
||||
|
||||
const int cc = ggml_cuda_info().devices[id].cc;
|
||||
row_rounding = std::max(row_rounding, (int64_t)get_mmq_y_host(cc, get_mmq_x_max_host(cc)));
|
||||
row_rounding = std::max(row_rounding, (int64_t)get_mmq_y_host(cc));
|
||||
}
|
||||
return row_rounding;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue