diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 7d9f60764..c1ec306f0 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -355,13 +355,16 @@ cudaError_t ggml_cuda_h2d_tensor_2d(void * dst, const struct ggml_tensor * src, } void * ggml_cuda_host_malloc(size_t size) { - void * ptr = nullptr; + if (getenv("GGML_CUDA_NO_PINNED") != nullptr) { + return nullptr; + } + void * ptr = nullptr; cudaError_t err = cudaMallocHost((void **) &ptr, size); if (err != cudaSuccess) { fprintf(stderr, "WARNING: failed to allocate %.2f MB of pinned memory: %s\n", size/1024.0/1024.0, cudaGetErrorString(err)); - return NULL; + return nullptr; } return ptr;