From ffd76ab684133ff42b8f2149479c9162249b33e4 Mon Sep 17 00:00:00 2001 From: Johannes Date: Thu, 8 Jun 2023 11:00:34 +0200 Subject: [PATCH] !fixup --- ggml-cuda.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index a61ea3bc9..8e6ded09b 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -1513,13 +1513,13 @@ static void ggml_cuda_op(const ggml_tensor * src0, const ggml_tensor * src1, ggm } } - // There is possibly a bug in the Windows nvcc compiler regarding instruction reordering or optimizing out local variables. - // Removing the first assert or changing the order of the arguments causes the second assert to fail. + // There is possibly a bug in the Windows nvcc compiler regarding instruction reordering or optimizing out local variables. + // Removing the first assert or changing the order of the arguments causes the second assert to fail. // Removing both asserts results in i01_high becoming 0 which in turn results in garbage output. // The root cause seems to be a problem with i0_offset_high becoming 0 when it should always be 1 (for single GPU). GGML_ASSERT(i01_low == 0 || g_device_count > 1); GGML_ASSERT(i01_high == ne01 || g_device_count > 1); - + const int64_t i01_diff = i01_high - i01_low; if (i01_diff == 0) { continue;