cmake: enable UMA-compatible allocation when LLAMA_HIP_UMA=ON
This commit is contained in:
parent
7ee8df3dc9
commit
1e946c54a2
3 changed files with 14 additions and 3 deletions
|
@ -91,6 +91,7 @@ set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for
|
|||
set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
|
||||
"llama: max. batch size for using peer access")
|
||||
option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF)
|
||||
option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF)
|
||||
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
|
||||
option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT})
|
||||
option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF)
|
||||
|
@ -372,6 +373,9 @@ if (LLAMA_HIPBLAS)
|
|||
if (${hipblas_FOUND} AND ${hip_FOUND})
|
||||
message(STATUS "HIP and hipBLAS found")
|
||||
add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUBLAS)
|
||||
if (LLAMA_HIP_UMA)
|
||||
add_compile_definitions(GGML_HIP_UMA)
|
||||
endif()
|
||||
add_library(ggml-rocm OBJECT ggml-cuda.cu ggml-cuda.h)
|
||||
if (BUILD_SHARED_LIBS)
|
||||
set_target_properties(ggml-rocm PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
|
|
@ -421,13 +421,15 @@ Building the program with BLAS support may lead to some performance improvements
|
|||
```bash
|
||||
make LLAMA_HIPBLAS=1
|
||||
```
|
||||
- Using `CMake` for Linux.
|
||||
- Using `CMake` for Linux (assuming a gfx1030-compatible AMD GPU):
|
||||
```bash
|
||||
CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ \
|
||||
cmake -H. -Bbuild -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \
|
||||
&& cmake --build build -- -j 16
|
||||
```
|
||||
- Using `CMake` for Windows (using x64 Native Tools Command Prompt for VS):
|
||||
On Linux it is also possible to use unified memory architecture (UMA) to share main memory between the CPU and integrated GPU by setting `-DLLAMA_HIP_UMA=ON"`.
|
||||
However, this hurts performance for non-integrated GPUs.
|
||||
- Using `CMake` for Windows (using x64 Native Tools Command Prompt for VS, and assuming a gfx1100-compatible AMD GPU):
|
||||
```bash
|
||||
set PATH=%HIP_PATH%\bin;%PATH%
|
||||
mkdir build
|
||||
|
|
|
@ -58,8 +58,13 @@
|
|||
#define cudaGetDeviceProperties hipGetDeviceProperties
|
||||
#define cudaGetErrorString hipGetErrorString
|
||||
#define cudaGetLastError hipGetLastError
|
||||
#define cudaMalloc(ptr, size) hipMallocManaged(ptr, size)
|
||||
#ifdef GGML_HIP_UMA
|
||||
#define cudaMalloc hipMallocManaged
|
||||
#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size)
|
||||
#else
|
||||
#define cudaMalloc hipMalloc
|
||||
#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size, hipHostMallocDefault)
|
||||
#endif
|
||||
#define cudaMemcpy hipMemcpy
|
||||
#define cudaMemcpy2DAsync hipMemcpy2DAsync
|
||||
#define cudaMemcpyAsync hipMemcpyAsync
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue