diff --git a/Makefile b/Makefile index ea6ee2041..a5dd2a304 100644 --- a/Makefile +++ b/Makefile @@ -164,13 +164,15 @@ ifdef LLAMA_HIPBLAS CC := $(ROCM_PATH)/llvm/bin/clang CXX := $(ROCM_PATH)/llvm/bin/clang++ GPU_TARGETS = gfx900 gfx906 gfx908 gfx90a gfx1030 + LLAMA_CUDA_DMMV_X ?= 64 + LLAMA_CUDA_DMMV_Y ?= 1 CFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C) CXXFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C) LDFLAGS += -L/opt/rocm/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64 OBJS += ggml-cuda.o ggml-cuda.o: CXXFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) -ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_X=64 -ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_Y=1 +ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) +ggml-cuda.o: CXXFLAGS += -DGGML_CUDA_DMMV_Y=$(LLAMA_CUDA_DMMV_Y) ggml-cuda.o: ggml-cuda.cu ggml-cuda.h $(CXX) $(CXXFLAGS) -x hip -c -o $@ $< endif