* update HIP_UMA #7399 add use of hipMemAdviseSetCoarseGrain when LLAMA_HIP_UMA is enable. - get x2 on prompte eval and x1.5 on token gen with rocm6.0 on ryzen 7940HX iGPU (780M/gfx1103) * simplify code, more consistent style --------- Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
parent
0136966daf
commit
852aafb163
2 changed files with 17 additions and 8 deletions
|
@ -79,13 +79,8 @@
|
|||
#define cudaHostRegisterReadOnly hipHostRegisterReadOnly
|
||||
#define cudaHostUnregister hipHostUnregister
|
||||
#define cudaLaunchHostFunc hipLaunchHostFunc
|
||||
#ifdef GGML_HIP_UMA
|
||||
#define cudaMalloc hipMallocManaged
|
||||
#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size)
|
||||
#else
|
||||
#define cudaMalloc hipMalloc
|
||||
#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size, hipHostMallocDefault)
|
||||
#endif
|
||||
#define cudaMemcpy hipMemcpy
|
||||
#define cudaMemcpyAsync hipMemcpyAsync
|
||||
#define cudaMemcpyPeerAsync hipMemcpyPeerAsync
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue