diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 04b6e5285..c4e572c13 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -499,6 +499,8 @@ GGML_CALL static ggml_backend_buffer_t ggml_backend_cuda_buffer_type_alloc_buffe void * dev_ptr; cudaError_t err = cudaMalloc(&dev_ptr, size); if (err != cudaSuccess) { + // clear the error + cudaGetLastError(); fprintf(stderr, "%s: allocating %.2f MiB on device %d: cudaMalloc failed: %s\n", __func__, size/1024.0/1024.0, buft_ctx->device, cudaGetErrorString(err)); return nullptr; }