minor

2024-01-15 19:24:55 +01:00 · 2024-01-15 19:24:55 +01:00 · e5de370cdf
commit e5de370cdf
parent e264f2239e
1 changed files with 6 additions and 0 deletions
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@ -10778,12 +10778,18 @@ static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_src, ggml_
    // host -> device
    if (ggml_backend_buffer_is_cuda_host(src->buffer) && ggml_backend_buffer_is_cuda(dst->buffer)) {
        ggml_backend_cuda_context * cuda_ctx_dst = (ggml_backend_cuda_context *)backend_dst->context;
+        // make sure the data is ready on the source backend
+        // the CPU backend does not support async compute, so this does nothing at the moment
+        // but conceptually, it is necessary to synchronize with the source backend
+        ggml_backend_synchronize(backend_src);
        CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyHostToDevice, g_cudaStreams[cuda_ctx_dst->device][0]));
        return true;
    }

    // device -> host
    if (ggml_backend_buffer_is_cuda_host(dst->buffer) && ggml_backend_buffer_is_cuda(src->buffer)) {
+        // this shoudln't happen currently because the dst backend is our own backend, which does not support host buffers
+        GGML_ASSERT(false);
        ggml_backend_cuda_context * cuda_ctx_src = (ggml_backend_cuda_context *)backend_src->context;
        CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToHost, g_cudaStreams[cuda_ctx_src->device][0]));
        return true;