trying to put out penguin based fires. sorry for inconvenience
This commit is contained in:
parent
49697d86d8
commit
7ba36c2c6c
4 changed files with 30 additions and 12 deletions
32
Makefile
32
Makefile
|
@ -1,4 +1,4 @@
|
|||
default: koboldcpp koboldcpp_noavx2 koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast
|
||||
default: koboldcpp koboldcpp_noavx2 koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast
|
||||
simple: koboldcpp koboldcpp_noavx2
|
||||
dev: koboldcpp_openblas
|
||||
|
||||
|
@ -15,9 +15,13 @@ ifndef UNAME_M
|
|||
UNAME_M := $(shell uname -m)
|
||||
endif
|
||||
|
||||
ifndef ARCH_LINUX
|
||||
ARCH_LINUX := $(shell grep "Arch Linux" /etc/os-release 2>/dev/null)
|
||||
ARCH_LIKE := $(shell grep "ID_LIKE=arch" /etc/os-release 2>/dev/null)
|
||||
ARCH_LINUX1 := $(shell grep "Arch Linux" /etc/os-release 2>/dev/null)
|
||||
ARCH_LINUX2 := $(shell grep "ID_LIKE=arch" /etc/os-release 2>/dev/null)
|
||||
ifdef ARCH_LINUX1
|
||||
ARCH_ADD = -lcblas
|
||||
endif
|
||||
ifdef ARCH_LINUX2
|
||||
ARCH_ADD = -lcblas
|
||||
endif
|
||||
|
||||
CCV := $(shell $(CC) --version | head -n 1)
|
||||
|
@ -51,6 +55,7 @@ BONUSCFLAGS2 =
|
|||
|
||||
OPENBLAS_FLAGS = -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
|
||||
CLBLAST_FLAGS = -DGGML_USE_CLBLAST -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
|
||||
CUBLAS_FLAGS = -DGGML_USE_CUBLAS -I/usr/local/cuda/include
|
||||
|
||||
#lets try enabling everything
|
||||
CFLAGS += -pthread -s
|
||||
|
@ -152,6 +157,7 @@ NOAVX2_BUILD =
|
|||
OPENBLAS_BUILD =
|
||||
OPENBLAS_NOAVX2_BUILD =
|
||||
CLBLAST_BUILD =
|
||||
CUBLAS_BUILD =
|
||||
|
||||
ifeq ($(OS),Windows_NT)
|
||||
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
|
||||
|
@ -159,22 +165,28 @@ ifeq ($(OS),Windows_NT)
|
|||
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS)
|
||||
OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS)
|
||||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS)
|
||||
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
|
||||
else
|
||||
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
|
||||
NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
|
||||
ifdef LLAMA_OPENBLAS
|
||||
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ -lcblas -lopenblas -shared -o $@.so $(LDFLAGS)
|
||||
OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -lcblas -lopenblas -shared -o $@.so $(LDFLAGS)
|
||||
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
||||
OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
||||
endif
|
||||
ifdef LLAMA_CLBLAST
|
||||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL -lcblas -lopenblas -shared -o $@.so $(LDFLAGS)
|
||||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
||||
endif
|
||||
ifdef LLAMA_CUBLAS
|
||||
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ -lcublas_static -lculibos -lcudart_static -lcublasLt_static -lpthread -ldl -L/usr/local/cuda/lib64 -shared -o $@.so $(LDFLAGS)
|
||||
endif
|
||||
|
||||
ifndef LLAMA_OPENBLAS
|
||||
ifndef LLAMA_CLBLAST
|
||||
ifndef LLAMA_CUBLAS
|
||||
OPENBLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. For faster speeds, install and link a BLAS library. Set LLAMA_OPENBLAS=1 to compile with OpenBLAS support or LLAMA_CLBLAST=1 to compile with ClBlast support. This is just a reminder, not an error.'
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
#
|
||||
|
@ -211,6 +223,9 @@ ggml_openblas_noavx2.o: ggml.c ggml.h
|
|||
ggml_clblast.o: ggml.c ggml.h
|
||||
$(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) $(CLBLAST_FLAGS) -c $< -o $@
|
||||
|
||||
ggml_cublas.o: ggml.c ggml.h
|
||||
$(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) $(CUBLAS_FLAGS) -c $< -o $@
|
||||
|
||||
ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
|
||||
$(CC) $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) -c $< -o $@
|
||||
|
||||
|
@ -258,6 +273,9 @@ koboldcpp_openblas_noavx2: ggml_openblas_noavx2.o ggml_rwkv.o ggml_v1_noavx2.o e
|
|||
|
||||
koboldcpp_clblast: ggml_clblast.o ggml_rwkv.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o
|
||||
$(CLBLAST_BUILD)
|
||||
|
||||
koboldcpp_cublas: ggml_cublas.o ggml_rwkv.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o
|
||||
$(CUBLAS_BUILD)
|
||||
|
||||
quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
|
|
|
@ -372,7 +372,7 @@ def main(args):
|
|||
|
||||
if args.noavx2:
|
||||
use_noavx2 = True
|
||||
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_openblas_noavx2)) or not (os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) and os.name=='nt'):
|
||||
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_openblas_noavx2)) or (os.name=='nt' and not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll"))):
|
||||
print("Warning: OpenBLAS library file not found. Non-BLAS library will be used.")
|
||||
elif args.noblas:
|
||||
print("Attempting to use non-avx2 compatibility library without OpenBLAS.")
|
||||
|
@ -380,13 +380,13 @@ def main(args):
|
|||
use_blas = True
|
||||
print("Attempting to use non-avx2 compatibility library with OpenBLAS. A compatible libopenblas will be required.")
|
||||
elif args.useclblast:
|
||||
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_clblast)) or not (os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "clblast.dll")) and os.name=='nt'):
|
||||
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_clblast)) or (os.name=='nt' and not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "clblast.dll"))):
|
||||
print("Warning: CLBlast library file not found. Non-BLAS library will be used.")
|
||||
else:
|
||||
print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast will be required.")
|
||||
use_clblast = True
|
||||
else:
|
||||
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_openblas)) or not (os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) and os.name=='nt'):
|
||||
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_openblas)) or (os.name=='nt' and not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll"))):
|
||||
print("Warning: OpenBLAS library file not found. Non-BLAS library will be used.")
|
||||
elif args.noblas:
|
||||
print("Attempting to library without OpenBLAS.")
|
||||
|
|
|
@ -371,7 +371,7 @@ bool gpt2_eval(
|
|||
const int n_vocab = hparams.n_vocab;
|
||||
|
||||
//todo: there is a bug that causes the buffer to oom and I cannot figure it out, hack to increase size for now
|
||||
static size_t buf_size = 512u*1024*1024;
|
||||
static size_t buf_size = 256u*1024*1024;
|
||||
static void * buf = malloc(buf_size);
|
||||
|
||||
if (mem_per_token > 0 && mem_per_token*N*1.6 > buf_size) {
|
||||
|
|
|
@ -382,7 +382,7 @@ bool gptj_eval(
|
|||
const int d_key = n_embd/n_head;
|
||||
|
||||
//todo: there is a bug that causes the buffer to oom and I cannot figure it out, hack to increase size for now
|
||||
static size_t buf_size = 512u*1024*1024;
|
||||
static size_t buf_size = 256u*1024*1024;
|
||||
static void * buf = malloc(buf_size);
|
||||
|
||||
if (mem_per_token > 0 && mem_per_token*N*1.4 > buf_size) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue