From e5de370cdfb68378eda3df8b845c84141fbf19e2 Mon Sep 17 00:00:00 2001 From: slaren Date: Mon, 15 Jan 2024 19:24:55 +0100 Subject: [PATCH] minor --- ggml-cuda.cu | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 0ddd310f2..436a0509c 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -10778,12 +10778,18 @@ static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_src, ggml_ // host -> device if (ggml_backend_buffer_is_cuda_host(src->buffer) && ggml_backend_buffer_is_cuda(dst->buffer)) { ggml_backend_cuda_context * cuda_ctx_dst = (ggml_backend_cuda_context *)backend_dst->context; + // make sure the data is ready on the source backend + // the CPU backend does not support async compute, so this does nothing at the moment + // but conceptually, it is necessary to synchronize with the source backend + ggml_backend_synchronize(backend_src); CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyHostToDevice, g_cudaStreams[cuda_ctx_dst->device][0])); return true; } // device -> host if (ggml_backend_buffer_is_cuda_host(dst->buffer) && ggml_backend_buffer_is_cuda(src->buffer)) { + // this shoudln't happen currently because the dst backend is our own backend, which does not support host buffers + GGML_ASSERT(false); ggml_backend_cuda_context * cuda_ctx_src = (ggml_backend_cuda_context *)backend_src->context; CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToHost, g_cudaStreams[cuda_ctx_src->device][0])); return true;