option for cublas
This commit is contained in:
parent
b4698abafc
commit
b084f4dc46
3 changed files with 50 additions and 29 deletions
|
@ -289,12 +289,12 @@ if (GGML_CUDA_SOURCES)
|
||||||
set_property(TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
|
set_property(TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(TARGET koboldcpp)
|
set(TARGET koboldcpp_cublas)
|
||||||
add_library(${TARGET} SHARED expose.cpp expose.h)
|
add_library(${TARGET} SHARED expose.cpp expose.h)
|
||||||
target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./examples)
|
target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./examples)
|
||||||
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
|
target_compile_features(${TARGET} PUBLIC cxx_std_11) # don't bump
|
||||||
set_target_properties(${TARGET} PROPERTIES PREFIX "")
|
set_target_properties(${TARGET} PROPERTIES PREFIX "")
|
||||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp")
|
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas")
|
||||||
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
target_link_libraries(${TARGET} PUBLIC ggml ggml_v1 ggml_v2 common2 gpttype_adapter ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PUBLIC ggml ggml_v1 ggml_v2 common2 gpttype_adapter ${CMAKE_THREAD_LIBS_INIT})
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||||
|
|
46
Makefile
46
Makefile
|
@ -1,4 +1,4 @@
|
||||||
default: koboldcpp koboldcpp_failsafe koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast
|
default: koboldcpp koboldcpp_failsafe koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast koboldcpp_cublas
|
||||||
tools: quantize_gpt2 quantize_gptj quantize_llama quantize_neox quantize_mpt
|
tools: quantize_gpt2 quantize_gptj quantize_llama quantize_neox quantize_mpt
|
||||||
dev: koboldcpp_openblas
|
dev: koboldcpp_openblas
|
||||||
dev2: koboldcpp_clblast
|
dev2: koboldcpp_clblast
|
||||||
|
@ -53,6 +53,9 @@ NONECFLAGS =
|
||||||
OPENBLAS_FLAGS = -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
|
OPENBLAS_FLAGS = -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
|
||||||
CLBLAST_FLAGS = -DGGML_USE_CLBLAST
|
CLBLAST_FLAGS = -DGGML_USE_CLBLAST
|
||||||
FAILSAFE_FLAGS = -DUSE_FAILSAFE
|
FAILSAFE_FLAGS = -DUSE_FAILSAFE
|
||||||
|
CUBLAS_FLAGS =
|
||||||
|
CUBLASLD_FLAGS =
|
||||||
|
CUBLAS_OBJS =
|
||||||
|
|
||||||
#lets try enabling everything
|
#lets try enabling everything
|
||||||
CFLAGS += -pthread -s
|
CFLAGS += -pthread -s
|
||||||
|
@ -133,10 +136,9 @@ endif
|
||||||
|
|
||||||
# it is recommended to use the CMAKE file to build for cublas if you can - will likely work better
|
# it is recommended to use the CMAKE file to build for cublas if you can - will likely work better
|
||||||
ifdef LLAMA_CUBLAS
|
ifdef LLAMA_CUBLAS
|
||||||
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
CUBLAS_FLAGS = -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
||||||
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
CUBLASLD_FLAGS = -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
|
||||||
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
|
CUBLAS_OBJS = ggml-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
|
||||||
OBJS += ggml-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
|
|
||||||
NVCC = nvcc
|
NVCC = nvcc
|
||||||
NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native
|
NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native
|
||||||
ifdef LLAMA_CUDA_DMMV_X
|
ifdef LLAMA_CUDA_DMMV_X
|
||||||
|
@ -158,11 +160,11 @@ else
|
||||||
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
|
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
|
||||||
endif
|
endif
|
||||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
||||||
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
||||||
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
|
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
|
||||||
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
||||||
ggml_v2-cuda-legacy.o: otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h
|
ggml_v2-cuda-legacy.o: otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h
|
||||||
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
||||||
endif # LLAMA_CUBLAS
|
endif # LLAMA_CUBLAS
|
||||||
|
|
||||||
ifdef LLAMA_METAL
|
ifdef LLAMA_METAL
|
||||||
|
@ -199,7 +201,7 @@ FAILSAFE_BUILD =
|
||||||
OPENBLAS_BUILD =
|
OPENBLAS_BUILD =
|
||||||
OPENBLAS_NOAVX2_BUILD =
|
OPENBLAS_NOAVX2_BUILD =
|
||||||
CLBLAST_BUILD =
|
CLBLAST_BUILD =
|
||||||
CLBLAST_NOAVX2_BUILD =
|
CUBLAS_BUILD =
|
||||||
|
|
||||||
ifeq ($(OS),Windows_NT)
|
ifeq ($(OS),Windows_NT)
|
||||||
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
|
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
|
||||||
|
@ -207,7 +209,6 @@ ifeq ($(OS),Windows_NT)
|
||||||
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS)
|
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS)
|
||||||
OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS)
|
OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS)
|
||||||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS)
|
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS)
|
||||||
CLBLAST_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS)
|
|
||||||
else
|
else
|
||||||
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
|
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
|
||||||
FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
|
FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
|
||||||
|
@ -218,19 +219,24 @@ else
|
||||||
ifdef LLAMA_CLBLAST
|
ifdef LLAMA_CLBLAST
|
||||||
ifeq ($(UNAME_S),Darwin)
|
ifeq ($(UNAME_S),Darwin)
|
||||||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
||||||
CLBLAST_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
|
||||||
else
|
else
|
||||||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
||||||
CLBLAST_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef LLAMA_OPENBLAS
|
ifndef LLAMA_OPENBLAS
|
||||||
ifndef LLAMA_CLBLAST
|
ifndef LLAMA_CLBLAST
|
||||||
|
ifndef LLAMA_CUBLAS
|
||||||
OPENBLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. For faster speeds, install and link a BLAS library. Set LLAMA_OPENBLAS=1 to compile with OpenBLAS support or LLAMA_CLBLAST=1 to compile with ClBlast support. This is just a reminder, not an error.'
|
OPENBLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. For faster speeds, install and link a BLAS library. Set LLAMA_OPENBLAS=1 to compile with OpenBLAS support or LLAMA_CLBLAST=1 to compile with ClBlast support. This is just a reminder, not an error.'
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
#both windows and non windows
|
||||||
|
ifdef LLAMA_CUBLAS
|
||||||
|
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.dll $(CUBLASLD_FLAGS) $(LDFLAGS)
|
||||||
|
endif
|
||||||
|
|
||||||
#
|
#
|
||||||
# Print build information
|
# Print build information
|
||||||
|
@ -261,8 +267,8 @@ ggml_openblas_noavx2.o: ggml.c ggml.h
|
||||||
$(CC) $(CFLAGS) $(SIMPLECFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(SIMPLECFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
|
||||||
ggml_clblast.o: ggml.c ggml.h
|
ggml_clblast.o: ggml.c ggml.h
|
||||||
$(CC) $(CFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
ggml_clblast_noavx2.o: ggml.c ggml.h
|
ggml_cublas.o: ggml.c ggml.h
|
||||||
$(CC) $(CFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
#quants K
|
#quants K
|
||||||
k_quants.o: k_quants.c k_quants.h ggml.h ggml-cuda.h
|
k_quants.o: k_quants.c k_quants.h ggml.h ggml-cuda.h
|
||||||
|
@ -283,8 +289,8 @@ ggml_v2_openblas_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
||||||
$(CC) $(CFLAGS) $(SIMPLECFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(SIMPLECFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
|
||||||
ggml_v2_clblast.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
ggml_v2_clblast.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
||||||
$(CC) $(CFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
ggml_v2_clblast_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
ggml_v2_cublas.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
||||||
$(CC) $(CFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
#extreme old version compat
|
#extreme old version compat
|
||||||
ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
|
ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
|
||||||
|
@ -313,9 +319,11 @@ gpttype_adapter.o: gpttype_adapter.cpp
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
gpttype_adapter_clblast.o: gpttype_adapter.cpp
|
gpttype_adapter_clblast.o: gpttype_adapter.cpp
|
||||||
$(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
|
gpttype_adapter_cublas.o: gpttype_adapter.cpp
|
||||||
|
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_openblas_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so
|
rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll koboldcpp_cublas.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_openblas_noavx2.so koboldcpp_clblast.so koboldcpp_cublas.so
|
||||||
|
|
||||||
main: examples/main/main.cpp build-info.h ggml.o k_quants.o llama.o common.o $(OBJS)
|
main: examples/main/main.cpp build-info.h ggml.o k_quants.o llama.o common.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
@ -334,8 +342,8 @@ koboldcpp_openblas_noavx2: ggml_openblas_noavx2.o ggml_v2_openblas_noavx2.o ggml
|
||||||
$(OPENBLAS_NOAVX2_BUILD)
|
$(OPENBLAS_NOAVX2_BUILD)
|
||||||
koboldcpp_clblast: ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o k_quants.o $(OBJS)
|
koboldcpp_clblast: ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o k_quants.o $(OBJS)
|
||||||
$(CLBLAST_BUILD)
|
$(CLBLAST_BUILD)
|
||||||
koboldcpp_clblast_noavx2: ggml_clblast_noavx2.o ggml_v2_clblast_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o k_quants_noavx2.o $(OBJS)
|
koboldcpp_cublas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o $(CUBLAS_OBJS) $(OBJS)
|
||||||
$(CLBLAST_NOAVX2_BUILD)
|
$(CUBLAS_BUILD)
|
||||||
|
|
||||||
quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o k_quants.o
|
quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o k_quants.o
|
||||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||||
|
|
29
koboldcpp.py
29
koboldcpp.py
|
@ -77,15 +77,17 @@ lib_failsafe = pick_existant_file("koboldcpp_failsafe.dll","koboldcpp_failsafe.s
|
||||||
lib_openblas = pick_existant_file("koboldcpp_openblas.dll","koboldcpp_openblas.so")
|
lib_openblas = pick_existant_file("koboldcpp_openblas.dll","koboldcpp_openblas.so")
|
||||||
lib_openblas_noavx2 = pick_existant_file("koboldcpp_openblas_noavx2.dll","koboldcpp_openblas_noavx2.so")
|
lib_openblas_noavx2 = pick_existant_file("koboldcpp_openblas_noavx2.dll","koboldcpp_openblas_noavx2.so")
|
||||||
lib_clblast = pick_existant_file("koboldcpp_clblast.dll","koboldcpp_clblast.so")
|
lib_clblast = pick_existant_file("koboldcpp_clblast.dll","koboldcpp_clblast.so")
|
||||||
|
lib_cublas = pick_existant_file("koboldcpp_cublas.dll","koboldcpp_cublas.so")
|
||||||
|
|
||||||
|
|
||||||
def init_library():
|
def init_library():
|
||||||
global handle
|
global handle
|
||||||
global lib_default,lib_failsafe,lib_openblas,lib_openblas_noavx2,lib_clblast
|
global lib_default,lib_failsafe,lib_openblas,lib_openblas_noavx2,lib_clblast,lib_cublas
|
||||||
|
|
||||||
libname = ""
|
libname = ""
|
||||||
use_blas = False # if true, uses OpenBLAS for acceleration. libopenblas.dll must exist in the same dir.
|
use_blas = False # if true, uses OpenBLAS for acceleration. libopenblas.dll must exist in the same dir.
|
||||||
use_clblast = False #uses CLBlast instead
|
use_clblast = False #uses CLBlast instead
|
||||||
|
use_cublas = False #uses cublas instead
|
||||||
use_noavx2 = False #uses openblas with no avx2 instructions
|
use_noavx2 = False #uses openblas with no avx2 instructions
|
||||||
|
|
||||||
if args.noavx2:
|
if args.noavx2:
|
||||||
|
@ -103,6 +105,12 @@ def init_library():
|
||||||
else:
|
else:
|
||||||
print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast will be required.")
|
print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast will be required.")
|
||||||
use_clblast = True
|
use_clblast = True
|
||||||
|
elif args.usecublas:
|
||||||
|
if not file_exists(lib_cublas):
|
||||||
|
print("Warning: CuBLAS library file not found. Non-BLAS library will be used.")
|
||||||
|
else:
|
||||||
|
print("Attempting to use CuBLAS library for faster prompt ingestion. A compatible CuBLAS will be required.")
|
||||||
|
use_cublas = True
|
||||||
else:
|
else:
|
||||||
if not file_exists(lib_openblas) or (os.name=='nt' and not file_exists("libopenblas.dll")):
|
if not file_exists(lib_openblas) or (os.name=='nt' and not file_exists("libopenblas.dll")):
|
||||||
print("Warning: OpenBLAS library file not found. Non-BLAS library will be used.")
|
print("Warning: OpenBLAS library file not found. Non-BLAS library will be used.")
|
||||||
|
@ -122,6 +130,8 @@ def init_library():
|
||||||
else:
|
else:
|
||||||
if use_clblast:
|
if use_clblast:
|
||||||
libname = lib_clblast
|
libname = lib_clblast
|
||||||
|
if use_cublas:
|
||||||
|
libname = lib_cublas
|
||||||
elif use_blas:
|
elif use_blas:
|
||||||
libname = lib_openblas
|
libname = lib_openblas
|
||||||
else:
|
else:
|
||||||
|
@ -581,13 +591,13 @@ def show_gui():
|
||||||
blaschoice = tk.StringVar()
|
blaschoice = tk.StringVar()
|
||||||
blaschoice.set("BLAS = 512")
|
blaschoice.set("BLAS = 512")
|
||||||
|
|
||||||
runopts = ["Use OpenBLAS","Use CLBLast GPU #1","Use CLBLast GPU #2","Use CLBLast GPU #3","Use No BLAS","Use OpenBLAS (Old CPU, noavx2)","Failsafe Mode (Old CPU, noavx)"]
|
runopts = ["Use OpenBLAS","Use CLBLast GPU #1","Use CLBLast GPU #2","Use CLBLast GPU #3","Use CuBLAS GPU","Use No BLAS","Use OpenBLAS (Old CPU, noavx2)","Failsafe Mode (Old CPU, noavx)"]
|
||||||
runchoice = tk.StringVar()
|
runchoice = tk.StringVar()
|
||||||
runchoice.set("Use OpenBLAS")
|
runchoice.set("Use OpenBLAS")
|
||||||
|
|
||||||
def onDropdownChange(event):
|
def onDropdownChange(event):
|
||||||
sel = runchoice.get()
|
sel = runchoice.get()
|
||||||
if sel==runopts[1] or sel==runopts[2] or sel==runopts[3]:
|
if sel==runopts[1] or sel==runopts[2] or sel==runopts[3] or sel==runopts[4]:
|
||||||
frameC.grid(row=4,column=0,pady=4)
|
frameC.grid(row=4,column=0,pady=4)
|
||||||
else:
|
else:
|
||||||
frameC.grid_forget()
|
frameC.grid_forget()
|
||||||
|
@ -609,7 +619,7 @@ def show_gui():
|
||||||
frameC = tk.Frame(root)
|
frameC = tk.Frame(root)
|
||||||
gpu_layers_var=tk.StringVar()
|
gpu_layers_var=tk.StringVar()
|
||||||
gpu_layers_var.set("0")
|
gpu_layers_var.set("0")
|
||||||
gpu_lbl = tk.Label(frameC, text = 'GPU Layers (CLBlast only): ', font=('calibre',10, 'bold'))
|
gpu_lbl = tk.Label(frameC, text = 'GPU Layers: ', font=('calibre',10, 'bold'))
|
||||||
gpu_layers_input = tk.Entry(frameC,textvariable = gpu_layers_var, font=('calibre',10,'normal'))
|
gpu_layers_input = tk.Entry(frameC,textvariable = gpu_layers_var, font=('calibre',10,'normal'))
|
||||||
gpu_lbl.grid(row=0,column=0)
|
gpu_lbl.grid(row=0,column=0)
|
||||||
gpu_layers_input.grid(row=0,column=1)
|
gpu_layers_input.grid(row=0,column=1)
|
||||||
|
@ -663,11 +673,13 @@ def show_gui():
|
||||||
if selrunchoice==runopts[3]:
|
if selrunchoice==runopts[3]:
|
||||||
args.useclblast = [0,1]
|
args.useclblast = [0,1]
|
||||||
if selrunchoice==runopts[4]:
|
if selrunchoice==runopts[4]:
|
||||||
args.noblas = True
|
args.usecublas = True
|
||||||
if selrunchoice==runopts[5]:
|
if selrunchoice==runopts[5]:
|
||||||
args.noavx2 = True
|
args.noblas = True
|
||||||
if selrunchoice==runopts[6]:
|
if selrunchoice==runopts[6]:
|
||||||
args.noavx2 = True
|
args.noavx2 = True
|
||||||
|
if selrunchoice==runopts[7]:
|
||||||
|
args.noavx2 = True
|
||||||
args.noblas = True
|
args.noblas = True
|
||||||
args.nommap = True
|
args.nommap = True
|
||||||
print("[Failsafe Mode : mmap is disabled.]")
|
print("[Failsafe Mode : mmap is disabled.]")
|
||||||
|
@ -861,7 +873,8 @@ if __name__ == '__main__':
|
||||||
parser.add_argument("--hordeconfig", help="Sets the display model name to something else, for easy use on AI Horde. Optional additional parameters set the horde max genlength and max ctxlen.",metavar=('[hordename]', '[hordelength] [hordectx]'), nargs='+')
|
parser.add_argument("--hordeconfig", help="Sets the display model name to something else, for easy use on AI Horde. Optional additional parameters set the horde max genlength and max ctxlen.",metavar=('[hordename]', '[hordelength] [hordectx]'), nargs='+')
|
||||||
compatgroup = parser.add_mutually_exclusive_group()
|
compatgroup = parser.add_mutually_exclusive_group()
|
||||||
compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
|
compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
|
||||||
compatgroup.add_argument("--useclblast", help="Use CLBlast instead of OpenBLAS for prompt ingestion. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
|
compatgroup.add_argument("--useclblast", help="Use CLBlast for GPU Acceleration. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
|
||||||
parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using CLBlast. Requires CLBlast.",metavar=('[GPU layers]'), type=int, default=0)
|
compatgroup.add_argument("--usecublas", help="Use CuBLAS for GPU Acceleration. Requires Nvidia GPU.", action='store_true')
|
||||||
|
parser.add_argument("--gpulayers", help="Set number of layers to offload to GPU when using GPU. Requires GPU.",metavar=('[GPU layers]'), type=int, default=0)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
main(args)
|
main(args)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue