From d59c0b3a56e51aedbc8f9ab9bfa3f38b02d8691a Mon Sep 17 00:00:00 2001 From: Erik Garrison Date: Wed, 13 Dec 2023 18:53:35 +0100 Subject: [PATCH] AMD ROCm: handle UMA memory VRAM expansions This resolves #2797 by allowing ROCm AMD GPU users with a UMA to dynamically expand the VRAM allocated to the GPU. Without this, AMD ROCm users with shared CPU/GPU memory usually are stuck with the BIOS-set (or fixed) framebuffer VRAM, making it impossible to load more than 1-2 layers. Note that the model is duplicated in RAM because it's loaded once for the CPU and then copied into a second set of allocations that are managed by the HIP UMA system. We can fix this later. --- ggml-cuda.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 9e1acd3f1..c959a63bd 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -58,8 +58,8 @@ #define cudaGetDeviceProperties hipGetDeviceProperties #define cudaGetErrorString hipGetErrorString #define cudaGetLastError hipGetLastError -#define cudaMalloc hipMalloc -#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size, hipHostMallocDefault) +#define cudaMalloc(ptr, size) hipMallocManaged(ptr, size, hipMemAttachGlobal) +#define cudaMallocHost(ptr, size) hipMallocHost(ptr, size) #define cudaMemcpy hipMemcpy #define cudaMemcpy2DAsync hipMemcpy2DAsync #define cudaMemcpyAsync hipMemcpyAsync