trying to put out penguin based fires. sorry for inconvenience

This commit is contained in:
Concedo 2023-04-20 23:15:07 +08:00
parent 49697d86d8
commit 7ba36c2c6c
4 changed files with 30 additions and 12 deletions

View file

@ -1,4 +1,4 @@
default: koboldcpp koboldcpp_noavx2 koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast
default: koboldcpp koboldcpp_noavx2 koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast
simple: koboldcpp koboldcpp_noavx2
dev: koboldcpp_openblas
@ -15,9 +15,13 @@ ifndef UNAME_M
UNAME_M := $(shell uname -m)
endif
ifndef ARCH_LINUX
ARCH_LINUX := $(shell grep "Arch Linux" /etc/os-release 2>/dev/null)
ARCH_LIKE := $(shell grep "ID_LIKE=arch" /etc/os-release 2>/dev/null)
ARCH_LINUX1 := $(shell grep "Arch Linux" /etc/os-release 2>/dev/null)
ARCH_LINUX2 := $(shell grep "ID_LIKE=arch" /etc/os-release 2>/dev/null)
ifdef ARCH_LINUX1
ARCH_ADD = -lcblas
endif
ifdef ARCH_LINUX2
ARCH_ADD = -lcblas
endif
CCV := $(shell $(CC) --version | head -n 1)
@ -51,6 +55,7 @@ BONUSCFLAGS2 =
OPENBLAS_FLAGS = -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
CLBLAST_FLAGS = -DGGML_USE_CLBLAST -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
CUBLAS_FLAGS = -DGGML_USE_CUBLAS -I/usr/local/cuda/include
#lets try enabling everything
CFLAGS += -pthread -s
@ -152,6 +157,7 @@ NOAVX2_BUILD =
OPENBLAS_BUILD =
OPENBLAS_NOAVX2_BUILD =
CLBLAST_BUILD =
CUBLAS_BUILD =
ifeq ($(OS),Windows_NT)
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
@ -159,22 +165,28 @@ ifeq ($(OS),Windows_NT)
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS)
OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS)
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS)
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
else
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
ifdef LLAMA_OPENBLAS
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ -lcblas -lopenblas -shared -o $@.so $(LDFLAGS)
OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -lcblas -lopenblas -shared -o $@.so $(LDFLAGS)
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
endif
ifdef LLAMA_CLBLAST
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL -lcblas -lopenblas -shared -o $@.so $(LDFLAGS)
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
endif
ifdef LLAMA_CUBLAS
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ -lcublas_static -lculibos -lcudart_static -lcublasLt_static -lpthread -ldl -L/usr/local/cuda/lib64 -shared -o $@.so $(LDFLAGS)
endif
ifndef LLAMA_OPENBLAS
ifndef LLAMA_CLBLAST
ifndef LLAMA_CUBLAS
OPENBLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. For faster speeds, install and link a BLAS library. Set LLAMA_OPENBLAS=1 to compile with OpenBLAS support or LLAMA_CLBLAST=1 to compile with ClBlast support. This is just a reminder, not an error.'
endif
endif
endif
endif
#
@ -211,6 +223,9 @@ ggml_openblas_noavx2.o: ggml.c ggml.h
ggml_clblast.o: ggml.c ggml.h
$(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) $(CLBLAST_FLAGS) -c $< -o $@
ggml_cublas.o: ggml.c ggml.h
$(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) $(CUBLAS_FLAGS) -c $< -o $@
ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
$(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) -c $< -o $@
@ -258,6 +273,9 @@ koboldcpp_openblas_noavx2: ggml_openblas_noavx2.o ggml_rwkv.o ggml_v1_noavx2.o e
koboldcpp_clblast: ggml_clblast.o ggml_rwkv.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o
$(CLBLAST_BUILD)
koboldcpp_cublas: ggml_cublas.o ggml_rwkv.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o
$(CUBLAS_BUILD)
quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)

View file

@ -372,7 +372,7 @@ def main(args):
if args.noavx2:
use_noavx2 = True
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_openblas_noavx2)) or not (os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) and os.name=='nt'):
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_openblas_noavx2)) or (os.name=='nt' and not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll"))):
print("Warning: OpenBLAS library file not found. Non-BLAS library will be used.")
elif args.noblas:
print("Attempting to use non-avx2 compatibility library without OpenBLAS.")
@ -380,13 +380,13 @@ def main(args):
use_blas = True
print("Attempting to use non-avx2 compatibility library with OpenBLAS. A compatible libopenblas will be required.")
elif args.useclblast:
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_clblast)) or not (os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "clblast.dll")) and os.name=='nt'):
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_clblast)) or (os.name=='nt' and not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "clblast.dll"))):
print("Warning: CLBlast library file not found. Non-BLAS library will be used.")
else:
print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast will be required.")
use_clblast = True
else:
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_openblas)) or not (os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) and os.name=='nt'):
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_openblas)) or (os.name=='nt' and not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll"))):
print("Warning: OpenBLAS library file not found. Non-BLAS library will be used.")
elif args.noblas:
print("Attempting to library without OpenBLAS.")

View file

@ -371,7 +371,7 @@ bool gpt2_eval(
const int n_vocab = hparams.n_vocab;
//todo: there is a bug that causes the buffer to oom and I cannot figure it out, hack to increase size for now
static size_t buf_size = 512u*1024*1024;
static size_t buf_size = 256u*1024*1024;
static void * buf = malloc(buf_size);
if (mem_per_token > 0 && mem_per_token*N*1.6 > buf_size) {

View file

@ -382,7 +382,7 @@ bool gptj_eval(
const int d_key = n_embd/n_head;
//todo: there is a bug that causes the buffer to oom and I cannot figure it out, hack to increase size for now
static size_t buf_size = 512u*1024*1024;
static size_t buf_size = 256u*1024*1024;
static void * buf = malloc(buf_size);
if (mem_per_token > 0 && mem_per_token*N*1.4 > buf_size) {