diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 9286b332c..b81d03698 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -10618,6 +10618,7 @@ GGML_CALL static void ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t size_t padded_size = ggml_backend_buft_get_alloc_size(buffer->buft, tensor); if (padded_size > original_size && tensor->view_src == nullptr) { + ggml_cuda_set_device(ctx->device); CUDA_CHECK(cudaMemset((char *)tensor->data + original_size, 0, padded_size - original_size)); } }