CUDA: fixed row rounding for 0 tensor splits

This commit is contained in:
JohannesGaessler 2023-12-22 10:32:39 +01:00
parent 28cb35a0ec
commit 1b56724ae3

View file

@ -7929,15 +7929,19 @@ static void ggml_cuda_op_mul_mat(
if (id != 0) { if (id != 0) {
row_low[id] = ne01*g_tensor_split[id]; row_low[id] = ne01*g_tensor_split[id];
if (row_low[id] < ne01) {
row_low[id] -= row_low[id] % rounding; row_low[id] -= row_low[id] % rounding;
} }
}
if (id != g_device_count - 1) { if (id != g_device_count - 1) {
row_high[id] = ne01*g_tensor_split[id + 1]; row_high[id] = ne01*g_tensor_split[id + 1];
if (row_high[id] < ne01) {
row_high[id] -= row_high[id] % rounding; row_high[id] -= row_high[id] % rounding;
} }
} }
} }
}
for (int64_t id = 0; id < g_device_count; ++id) { for (int64_t id = 0; id < g_device_count; ++id) {
if ((!split && id != g_main_device) || row_low[id] == row_high[id]) { if ((!split && id != g_main_device) || row_low[id] == row_high[id]) {