cuBLAS: do not use pinned memory if env variable GGML_CUDA_NO_PINNED is set

This commit is contained in:
Slaren 2023-04-29 22:25:00 +02:00
parent 08e539d5e4
commit 476f46f7cc

View file

@ -355,13 +355,16 @@ cudaError_t ggml_cuda_h2d_tensor_2d(void * dst, const struct ggml_tensor * src,
}
void * ggml_cuda_host_malloc(size_t size) {
void * ptr = nullptr;
if (getenv("GGML_CUDA_NO_PINNED") != nullptr) {
return nullptr;
}
void * ptr = nullptr;
cudaError_t err = cudaMallocHost((void **) &ptr, size);
if (err != cudaSuccess) {
fprintf(stderr, "WARNING: failed to allocate %.2f MB of pinned memory: %s\n",
size/1024.0/1024.0, cudaGetErrorString(err));
return NULL;
return nullptr;
}
return ptr;