KCPP Fetches AMD ROCm Memory without a stick, CC_TURING Gets the Boot, koboldcpp_hipblas.dll Talks To The Hand, and hipBLAS Compiler Finds Its Independence! (#517)
* AMD ROCm memory fetching and max mem setting * Update .gitignore with koboldcpp_hipblas.dll * Update CMakeLists.txt remove CC_TURING for AMD * separate hipBLAS compiler, update MMV_Y, move CXX/CC print separate hipBLAS compiler, update MMV_Y value, move the section that prints CXX and CC compiler name
This commit is contained in:
parent
a62468ec4c
commit
e2e5fe56a8
4 changed files with 33 additions and 26 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -111,6 +111,7 @@ tests/test-tokenizer-1-bpe
|
||||||
rocblas.dll
|
rocblas.dll
|
||||||
hipblas.dll
|
hipblas.dll
|
||||||
koboldcpp_hipblas.so
|
koboldcpp_hipblas.so
|
||||||
|
koboldcpp_hipblas.dll
|
||||||
|
|
||||||
# Jetbrains idea folder
|
# Jetbrains idea folder
|
||||||
.idea/
|
.idea/
|
||||||
|
|
|
@ -153,7 +153,6 @@ if (LLAMA_HIPBLAS)
|
||||||
target_compile_definitions(ggml-rocm PUBLIC GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
|
target_compile_definitions(ggml-rocm PUBLIC GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
|
||||||
target_compile_definitions(ggml-rocm PUBLIC GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
|
target_compile_definitions(ggml-rocm PUBLIC GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
|
||||||
target_compile_definitions(ggml-rocm PUBLIC K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
|
target_compile_definitions(ggml-rocm PUBLIC K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
|
||||||
target_compile_definitions(ggml-rocm PUBLIC CC_TURING=1000000000)
|
|
||||||
set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX)
|
set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX)
|
||||||
target_link_libraries(ggml-rocm PUBLIC hip::device hip::host roc::rocblas roc::hipblas)
|
target_link_libraries(ggml-rocm PUBLIC hip::device hip::host roc::rocblas roc::hipblas)
|
||||||
|
|
||||||
|
@ -165,7 +164,6 @@ if (LLAMA_HIPBLAS)
|
||||||
target_compile_definitions(ggml-v2-rocm PUBLIC GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
|
target_compile_definitions(ggml-v2-rocm PUBLIC GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
|
||||||
target_compile_definitions(ggml-v2-rocm PUBLIC GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
|
target_compile_definitions(ggml-v2-rocm PUBLIC GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
|
||||||
target_compile_definitions(ggml-v2-rocm PUBLIC K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
|
target_compile_definitions(ggml-v2-rocm PUBLIC K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
|
||||||
target_compile_definitions(ggml-v2-rocm PUBLIC CC_TURING=1000000000)
|
|
||||||
set_source_files_properties(otherarch/ggml_v2-cuda.cu PROPERTIES LANGUAGE CXX)
|
set_source_files_properties(otherarch/ggml_v2-cuda.cu PROPERTIES LANGUAGE CXX)
|
||||||
target_link_libraries(ggml-v2-rocm PUBLIC hip::device hip::host roc::rocblas roc::hipblas)
|
target_link_libraries(ggml-v2-rocm PUBLIC hip::device hip::host roc::rocblas roc::hipblas)
|
||||||
|
|
||||||
|
@ -177,7 +175,6 @@ if (LLAMA_HIPBLAS)
|
||||||
target_compile_definitions(ggml-v2-legacy-rocm PUBLIC GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
|
target_compile_definitions(ggml-v2-legacy-rocm PUBLIC GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
|
||||||
target_compile_definitions(ggml-v2-legacy-rocm PUBLIC GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
|
target_compile_definitions(ggml-v2-legacy-rocm PUBLIC GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
|
||||||
target_compile_definitions(ggml-v2-legacy-rocm PUBLIC K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
|
target_compile_definitions(ggml-v2-legacy-rocm PUBLIC K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
|
||||||
target_compile_definitions(ggml-v2-legacy-rocm PUBLIC CC_TURING=1000000000)
|
|
||||||
set_source_files_properties(otherarch/ggml_v2-cuda-legacy.cu PROPERTIES LANGUAGE CXX)
|
set_source_files_properties(otherarch/ggml_v2-cuda-legacy.cu PROPERTIES LANGUAGE CXX)
|
||||||
target_link_libraries(ggml-v2-legacy-rocm PUBLIC hip::device hip::host roc::rocblas roc::hipblas)
|
target_link_libraries(ggml-v2-legacy-rocm PUBLIC hip::device hip::host roc::rocblas roc::hipblas)
|
||||||
|
|
||||||
|
@ -437,4 +434,4 @@ target_link_libraries(llama PRIVATE
|
||||||
${LLAMA_EXTRA_LIBS}
|
${LLAMA_EXTRA_LIBS}
|
||||||
)
|
)
|
||||||
add_subdirectory(examples)
|
add_subdirectory(examples)
|
||||||
endif()
|
endif()
|
||||||
|
|
24
Makefile
24
Makefile
|
@ -196,16 +196,13 @@ endif # LLAMA_CUBLAS
|
||||||
|
|
||||||
ifdef LLAMA_HIPBLAS
|
ifdef LLAMA_HIPBLAS
|
||||||
ROCM_PATH ?= /opt/rocm
|
ROCM_PATH ?= /opt/rocm
|
||||||
CC := $(ROCM_PATH)/llvm/bin/clang
|
HCC := $(ROCM_PATH)/llvm/bin/clang
|
||||||
CXX := $(ROCM_PATH)/llvm/bin/clang++
|
HCXX := $(ROCM_PATH)/llvm/bin/clang++
|
||||||
GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100 $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
|
GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100 $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
|
||||||
LLAMA_CUDA_DMMV_X ?= 32
|
LLAMA_CUDA_DMMV_X ?= 32
|
||||||
LLAMA_CUDA_MMV_Y ?= 2
|
LLAMA_CUDA_MMV_Y ?= 1
|
||||||
LLAMA_CUDA_KQUANTS_ITER ?= 2
|
LLAMA_CUDA_KQUANTS_ITER ?= 2
|
||||||
HIPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
|
HIPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
|
||||||
ifdef LLAMA_CUDA_FORCE_DMMV
|
|
||||||
HIPFLAGS += -DGGML_CUDA_FORCE_DMMV
|
|
||||||
endif # LLAMA_CUDA_FORCE_DMMV
|
|
||||||
HIPLDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64 -lrocblas
|
HIPLDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64 -lrocblas
|
||||||
HIP_OBJS += ggml-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
|
HIP_OBJS += ggml-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
|
||||||
ggml-cuda.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
|
ggml-cuda.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
|
||||||
|
@ -221,11 +218,11 @@ ggml_v2-cuda-legacy.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
|
||||||
-DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \
|
-DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \
|
||||||
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
-DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
||||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
||||||
$(CXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
|
$(HCXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
|
||||||
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
|
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
|
||||||
$(CXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
|
$(HCXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
|
||||||
ggml_v2-cuda-legacy.o: otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h
|
ggml_v2-cuda-legacy.o: otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h
|
||||||
$(CXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
|
$(HCXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
|
||||||
endif # LLAMA_HIPBLAS
|
endif # LLAMA_HIPBLAS
|
||||||
|
|
||||||
|
|
||||||
|
@ -259,8 +256,6 @@ ifneq ($(filter armv8%,$(UNAME_M)),)
|
||||||
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
|
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
|
||||||
endif
|
endif
|
||||||
|
|
||||||
CCV := $(shell $(CC) --version | head -n 1)
|
|
||||||
CXXV := $(shell $(CXX) --version | head -n 1)
|
|
||||||
|
|
||||||
DEFAULT_BUILD =
|
DEFAULT_BUILD =
|
||||||
FAILSAFE_BUILD =
|
FAILSAFE_BUILD =
|
||||||
|
@ -281,7 +276,7 @@ ifeq ($(OS),Windows_NT)
|
||||||
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.dll $(CUBLASLD_FLAGS) $(LDFLAGS)
|
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.dll $(CUBLASLD_FLAGS) $(LDFLAGS)
|
||||||
endif
|
endif
|
||||||
ifdef LLAMA_HIPBLAS
|
ifdef LLAMA_HIPBLAS
|
||||||
HIPBLAS_BUILD = $(CXX) $(CXXFLAGS) $(HIPFLAGS) $^ -shared -o $@.dll $(HIPLDFLAGS) $(LDFLAGS)
|
HIPBLAS_BUILD = $(HCXX) $(CXXFLAGS) $(HIPFLAGS) $^ -shared -o $@.dll $(HIPLDFLAGS) $(LDFLAGS)
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
|
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
|
||||||
|
@ -300,7 +295,7 @@ else
|
||||||
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.so $(CUBLASLD_FLAGS) $(LDFLAGS)
|
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.so $(CUBLASLD_FLAGS) $(LDFLAGS)
|
||||||
endif
|
endif
|
||||||
ifdef LLAMA_HIPBLAS
|
ifdef LLAMA_HIPBLAS
|
||||||
HIPBLAS_BUILD = $(CXX) $(CXXFLAGS) $(HIPFLAGS) $^ -shared -o $@.so $(HIPLDFLAGS) $(LDFLAGS)
|
HIPBLAS_BUILD = $(HCXX) $(CXXFLAGS) $(HIPFLAGS) $^ -shared -o $@.so $(HIPLDFLAGS) $(LDFLAGS)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef LLAMA_OPENBLAS
|
ifndef LLAMA_OPENBLAS
|
||||||
|
@ -314,7 +309,8 @@ else
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
CCV := $(shell $(CC) --version | head -n 1)
|
||||||
|
CXXV := $(shell $(CXX) --version | head -n 1)
|
||||||
|
|
||||||
#
|
#
|
||||||
# Print build information
|
# Print build information
|
||||||
|
|
29
koboldcpp.py
29
koboldcpp.py
|
@ -1086,6 +1086,7 @@ def show_new_gui():
|
||||||
from subprocess import run, CalledProcessError
|
from subprocess import run, CalledProcessError
|
||||||
FetchedCUdevices = []
|
FetchedCUdevices = []
|
||||||
FetchedCUdeviceMem = []
|
FetchedCUdeviceMem = []
|
||||||
|
AMDgpu = None
|
||||||
try: # Get OpenCL GPU names on windows using a special binary. overwrite at known index if found.
|
try: # Get OpenCL GPU names on windows using a special binary. overwrite at known index if found.
|
||||||
basepath = os.path.abspath(os.path.dirname(__file__))
|
basepath = os.path.abspath(os.path.dirname(__file__))
|
||||||
output = run([((os.path.join(basepath, "winclinfo.exe")) if os.name == 'nt' else "clinfo"),"--json"], capture_output=True, text=True, check=True, encoding='utf-8').stdout
|
output = run([((os.path.join(basepath, "winclinfo.exe")) if os.name == 'nt' else "clinfo"),"--json"], capture_output=True, text=True, check=True, encoding='utf-8').stdout
|
||||||
|
@ -1119,29 +1120,41 @@ def show_new_gui():
|
||||||
try: # Get AMD ROCm GPU names
|
try: # Get AMD ROCm GPU names
|
||||||
output = run(['rocminfo'], capture_output=True, text=True, check=True, encoding='utf-8').stdout
|
output = run(['rocminfo'], capture_output=True, text=True, check=True, encoding='utf-8').stdout
|
||||||
device_name = None
|
device_name = None
|
||||||
for line in output.splitlines():
|
for line in output.splitlines(): # read through the output line by line
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if line.startswith("Marketing Name:"): device_name = line.split(":", 1)[1].strip()
|
if line.startswith("Marketing Name:"): device_name = line.split(":", 1)[1].strip() # if we find a named device, temporarily save the name
|
||||||
elif line.startswith("Device Type:") and "GPU" in line and device_name is not None: FetchedCUdevices.append(device_name)
|
elif line.startswith("Device Type:") and "GPU" in line and device_name is not None: # if the following Device Type is a GPU (not a CPU) then add it to devices list
|
||||||
|
FetchedCUdevices.append(device_name)
|
||||||
|
AMDgpu = True
|
||||||
elif line.startswith("Device Type:") and "GPU" not in line: device_name = None
|
elif line.startswith("Device Type:") and "GPU" not in line: device_name = None
|
||||||
|
if FetchedCUdevices:
|
||||||
|
getamdvram = run(['rocm-smi', '--showmeminfo', 'vram', '--csv'], capture_output=True, text=True, check=True, encoding='utf-8').stdout # fetch VRAM of devices
|
||||||
|
FetchedCUdeviceMem = [line.split(",")[1].strip() for line in getamdvram.splitlines()[1:] if line.strip()]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
for idx in range(0,4):
|
for idx in range(0,4):
|
||||||
if(len(FetchedCUdevices)>idx):
|
if(len(FetchedCUdevices)>idx):
|
||||||
CUDevicesNames[idx] = FetchedCUdevices[idx]
|
CUDevicesNames[idx] = FetchedCUdevices[idx]
|
||||||
MaxMemory[0] = max(int(FetchedCUdeviceMem[idx])*1024*1024,MaxMemory[0])
|
if AMDgpu:
|
||||||
pass
|
MaxMemory[0] = max(int(FetchedCUdeviceMem[idx]),MaxMemory[0])
|
||||||
|
else:
|
||||||
|
MaxMemory[0] = max(int(FetchedCUdeviceMem[idx])*1024*1024,MaxMemory[0])
|
||||||
|
pass
|
||||||
|
|
||||||
#autopick cublas if suitable
|
#autopick cublas if suitable
|
||||||
global exitcounter
|
global exitcounter
|
||||||
if exitcounter < 100 and MaxMemory[0]>3500000000 and CUDevicesNames[0]!="" and "Use CuBLAS" in runopts and runopts_var.get()=="Use OpenBLAS":
|
if exitcounter < 100 and MaxMemory[0]>3500000000 and CUDevicesNames[0]!="" and "Use CuBLAS" or "Use hipBLAS (ROCM)" in runopts and runopts_var.get()=="Use OpenBLAS":
|
||||||
runopts_var.set("Use CuBLAS")
|
if "Use CuBLAS" in runopts:
|
||||||
pass
|
runopts_var.set("Use CuBLAS")
|
||||||
|
pass
|
||||||
|
elif "Use hipBLAS (ROCM)" in runopts:
|
||||||
|
runopts_var.set("Use hipBLAS (ROCM)")
|
||||||
|
|
||||||
changed_gpu_choice_var()
|
changed_gpu_choice_var()
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def autoset_gpu_layers(filepath): #shitty algo to determine how many layers to use
|
def autoset_gpu_layers(filepath): #shitty algo to determine how many layers to use
|
||||||
try:
|
try:
|
||||||
global gui_layers_untouched
|
global gui_layers_untouched
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue