From d59c0b3a56e51aedbc8f9ab9bfa3f38b02d8691a Mon Sep 17 00:00:00 2001
From: Erik Garrison <erik.garrison@gmail.com>
Date: Wed, 13 Dec 2023 18:53:35 +0100
Subject: [PATCH] AMD ROCm: handle UMA memory VRAM expansions

This resolves #2797 by allowing ROCm AMD GPU users with a UMA to
dynamically expand the VRAM allocated to the GPU.

Without this, AMD ROCm users with shared CPU/GPU memory usually are
stuck with the BIOS-set (or fixed) framebuffer VRAM, making it
impossible to load more than 1-2 layers.

Note that the model is duplicated in RAM because it's loaded once for
the CPU and then copied into a second set of allocations that are
managed by the HIP UMA system. We can fix this later.
---
 ggml-cuda.cu | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index 9e1acd3f1..c959a63bd 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -58,8 +58,8 @@
 #define cudaGetDeviceProperties hipGetDeviceProperties
 #define cudaGetErrorString hipGetErrorString
 #define cudaGetLastError hipGetLastError
-#define cudaMalloc hipMalloc
-#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size, hipHostMallocDefault)
+#define cudaMalloc(ptr, size) hipMallocManaged(ptr, size, hipMemAttachGlobal)
+#define cudaMallocHost(ptr, size) hipMallocHost(ptr, size)
 #define cudaMemcpy hipMemcpy
 #define cudaMemcpy2DAsync hipMemcpy2DAsync
 #define cudaMemcpyAsync hipMemcpyAsync