headers fix; add kquants_iter for hipblas and add gfx803 (#1)

* kquants_iter for hipblas and add gfx803
* Update CMakeLists.txt with hipblas kquants_iter and DMMV_F16
* remove dmmv_f16 for now
This commit is contained in:
YellowRoseCx 2023-06-28 15:27:10 -05:00 committed by GitHub
parent c8ae94524a
commit bb16effc75
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 13 additions and 5 deletions

View file

@ -335,6 +335,7 @@ if (LLAMA_HIPBLAS)
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y}) target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y})
set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX) set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX)
target_link_libraries(ggml-rocm PRIVATE hip::device PUBLIC hip::host roc::hipblas) target_link_libraries(ggml-rocm PRIVATE hip::device PUBLIC hip::host roc::hipblas)
add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
if (LLAMA_STATIC) if (LLAMA_STATIC)
message(FATAL_ERROR "Static linking not supported for HIP/ROCm") message(FATAL_ERROR "Static linking not supported for HIP/ROCm")

View file

@ -21,8 +21,8 @@ ifndef UNAME_M
UNAME_M := $(shell uname -m) UNAME_M := $(shell uname -m)
endif endif
CCV = $(shell $(CC) --version | head -n 1) CCV := $(shell $(CC) --version | head -n 1)
CXXV = $(shell $(CXX) --version | head -n 1) CXXV := $(shell $(CXX) --version | head -n 1)
# Mac OS + Arm can report x86_64 # Mac OS + Arm can report x86_64
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789 # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
@ -207,13 +207,18 @@ ifdef LLAMA_HIPBLAS
ROCM_PATH ?= /opt/rocm ROCM_PATH ?= /opt/rocm
CC := $(ROCM_PATH)/llvm/bin/clang CC := $(ROCM_PATH)/llvm/bin/clang
CXX := $(ROCM_PATH)/llvm/bin/clang++ CXX := $(ROCM_PATH)/llvm/bin/clang++
GPU_TARGETS = gfx900 gfx906 gfx908 gfx90a gfx1030 GPU_TARGETS = gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030
LLAMA_CUDA_DMMV_X ?= 32 LLAMA_CUDA_DMMV_X ?= 32
LLAMA_CUDA_DMMV_Y ?= 1 LLAMA_CUDA_DMMV_Y ?= 1
CFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C) CFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
CXXFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C) CXXFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
LDFLAGS += -L/opt/rocm/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64 LDFLAGS += -L/opt/rocm/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64
OBJS += ggml-cuda.o OBJS += ggml-cuda.o
ifdef LLAMA_CUDA_KQUANTS_ITER
CXXFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
else
CXXFLAGS += -DK_QUANTS_PER_ITERATION=2
endif
ggml-cuda.o: CXXFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) ggml-cuda.o: CXXFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS))
ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_Y=$(LLAMA_CUDA_DMMV_Y) ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_Y=$(LLAMA_CUDA_DMMV_Y)

6
ggml.c
View file

@ -230,9 +230,11 @@ inline static void* ggml_aligned_malloc(size_t size) {
#endif #endif
#elif defined(GGML_USE_OPENBLAS) #elif defined(GGML_USE_OPENBLAS)
#include <cblas.h> #include <cblas.h>
#elif defined(GGML_USE_CUBLAS) | defined(GGML_USE_HIPBLAS) #endif
#if defined(GGML_USE_CUBLAS)
#include "ggml-cuda.h" #include "ggml-cuda.h"
#elif defined(GGML_USE_CLBLAST) #endif
#if defined(GGML_USE_CLBLAST)
#include "ggml-opencl.h" #include "ggml-opencl.h"
#endif #endif