headers fix; add kquants_iter for hipblas and add gfx803 (#1)
* kquants_iter for hipblas and add gfx803 * Update CMakeLists.txt with hipblas kquants_iter and DMMV_F16 * remove dmmv_f16 for now
This commit is contained in:
parent
c8ae94524a
commit
bb16effc75
3 changed files with 13 additions and 5 deletions
|
@ -335,6 +335,7 @@ if (LLAMA_HIPBLAS)
|
||||||
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y})
|
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y})
|
||||||
set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX)
|
set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX)
|
||||||
target_link_libraries(ggml-rocm PRIVATE hip::device PUBLIC hip::host roc::hipblas)
|
target_link_libraries(ggml-rocm PRIVATE hip::device PUBLIC hip::host roc::hipblas)
|
||||||
|
add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
|
||||||
|
|
||||||
if (LLAMA_STATIC)
|
if (LLAMA_STATIC)
|
||||||
message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
|
message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
|
||||||
|
|
11
Makefile
11
Makefile
|
@ -21,8 +21,8 @@ ifndef UNAME_M
|
||||||
UNAME_M := $(shell uname -m)
|
UNAME_M := $(shell uname -m)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
CCV = $(shell $(CC) --version | head -n 1)
|
CCV := $(shell $(CC) --version | head -n 1)
|
||||||
CXXV = $(shell $(CXX) --version | head -n 1)
|
CXXV := $(shell $(CXX) --version | head -n 1)
|
||||||
|
|
||||||
# Mac OS + Arm can report x86_64
|
# Mac OS + Arm can report x86_64
|
||||||
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
|
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
|
||||||
|
@ -207,13 +207,18 @@ ifdef LLAMA_HIPBLAS
|
||||||
ROCM_PATH ?= /opt/rocm
|
ROCM_PATH ?= /opt/rocm
|
||||||
CC := $(ROCM_PATH)/llvm/bin/clang
|
CC := $(ROCM_PATH)/llvm/bin/clang
|
||||||
CXX := $(ROCM_PATH)/llvm/bin/clang++
|
CXX := $(ROCM_PATH)/llvm/bin/clang++
|
||||||
GPU_TARGETS = gfx900 gfx906 gfx908 gfx90a gfx1030
|
GPU_TARGETS = gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030
|
||||||
LLAMA_CUDA_DMMV_X ?= 32
|
LLAMA_CUDA_DMMV_X ?= 32
|
||||||
LLAMA_CUDA_DMMV_Y ?= 1
|
LLAMA_CUDA_DMMV_Y ?= 1
|
||||||
CFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
|
CFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
|
||||||
CXXFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
|
CXXFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
|
||||||
LDFLAGS += -L/opt/rocm/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64
|
LDFLAGS += -L/opt/rocm/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64
|
||||||
OBJS += ggml-cuda.o
|
OBJS += ggml-cuda.o
|
||||||
|
ifdef LLAMA_CUDA_KQUANTS_ITER
|
||||||
|
CXXFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
||||||
|
else
|
||||||
|
CXXFLAGS += -DK_QUANTS_PER_ITERATION=2
|
||||||
|
endif
|
||||||
ggml-cuda.o: CXXFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS))
|
ggml-cuda.o: CXXFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS))
|
||||||
ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
|
ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
|
||||||
ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_Y=$(LLAMA_CUDA_DMMV_Y)
|
ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_Y=$(LLAMA_CUDA_DMMV_Y)
|
||||||
|
|
6
ggml.c
6
ggml.c
|
@ -230,9 +230,11 @@ inline static void* ggml_aligned_malloc(size_t size) {
|
||||||
#endif
|
#endif
|
||||||
#elif defined(GGML_USE_OPENBLAS)
|
#elif defined(GGML_USE_OPENBLAS)
|
||||||
#include <cblas.h>
|
#include <cblas.h>
|
||||||
#elif defined(GGML_USE_CUBLAS) | defined(GGML_USE_HIPBLAS)
|
#endif
|
||||||
|
#if defined(GGML_USE_CUBLAS)
|
||||||
#include "ggml-cuda.h"
|
#include "ggml-cuda.h"
|
||||||
#elif defined(GGML_USE_CLBLAST)
|
#endif
|
||||||
|
#if defined(GGML_USE_CLBLAST)
|
||||||
#include "ggml-opencl.h"
|
#include "ggml-opencl.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue