This commit is contained in:
Johannes 2023-06-08 11:00:34 +02:00
parent d811633652
commit ffd76ab684

View file

@ -1513,13 +1513,13 @@ static void ggml_cuda_op(const ggml_tensor * src0, const ggml_tensor * src1, ggm
} }
} }
// There is possibly a bug in the Windows nvcc compiler regarding instruction reordering or optimizing out local variables. // There is possibly a bug in the Windows nvcc compiler regarding instruction reordering or optimizing out local variables.
// Removing the first assert or changing the order of the arguments causes the second assert to fail. // Removing the first assert or changing the order of the arguments causes the second assert to fail.
// Removing both asserts results in i01_high becoming 0 which in turn results in garbage output. // Removing both asserts results in i01_high becoming 0 which in turn results in garbage output.
// The root cause seems to be a problem with i0_offset_high becoming 0 when it should always be 1 (for single GPU). // The root cause seems to be a problem with i0_offset_high becoming 0 when it should always be 1 (for single GPU).
GGML_ASSERT(i01_low == 0 || g_device_count > 1); GGML_ASSERT(i01_low == 0 || g_device_count > 1);
GGML_ASSERT(i01_high == ne01 || g_device_count > 1); GGML_ASSERT(i01_high == ne01 || g_device_count > 1);
const int64_t i01_diff = i01_high - i01_low; const int64_t i01_diff = i01_high - i01_low;
if (i01_diff == 0) { if (i01_diff == 0) {
continue; continue;