cuBLAS: fall back to pageable memory if pinned alloc fails (#1233)

* cuBLAS: fall back to pageable memory if pinned alloc fails

* cuBLAS: do not use pinned memory if env variable GGML_CUDA_NO_PINNED is set
This commit is contained in:
slaren 2023-05-01 13:32:22 +02:00 committed by GitHub
parent 90b19bd6ee
commit b925f1f1b0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 52 additions and 9 deletions

View file

@ -355,8 +355,18 @@ cudaError_t ggml_cuda_h2d_tensor_2d(void * dst, const struct ggml_tensor * src,
}
void * ggml_cuda_host_malloc(size_t size) {
void * ptr;
CUDA_CHECK(cudaMallocHost((void **) &ptr, size));
if (getenv("GGML_CUDA_NO_PINNED") != nullptr) {
return nullptr;
}
void * ptr = nullptr;
cudaError_t err = cudaMallocHost((void **) &ptr, size);
if (err != cudaSuccess) {
fprintf(stderr, "WARNING: failed to allocate %.2f MB of pinned memory: %s\n",
size/1024.0/1024.0, cudaGetErrorString(err));
return nullptr;
}
return ptr;
}