CUDA: fixed row rounding for 0 tensor splits
This commit is contained in:
parent
28cb35a0ec
commit
1b56724ae3
1 changed files with 6 additions and 2 deletions
|
@ -7929,12 +7929,16 @@ static void ggml_cuda_op_mul_mat(
|
|||
|
||||
if (id != 0) {
|
||||
row_low[id] = ne01*g_tensor_split[id];
|
||||
row_low[id] -= row_low[id] % rounding;
|
||||
if (row_low[id] < ne01) {
|
||||
row_low[id] -= row_low[id] % rounding;
|
||||
}
|
||||
}
|
||||
|
||||
if (id != g_device_count - 1) {
|
||||
row_high[id] = ne01*g_tensor_split[id + 1];
|
||||
row_high[id] -= row_high[id] % rounding;
|
||||
if (row_high[id] < ne01) {
|
||||
row_high[id] -= row_high[id] % rounding;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue