trying to put out penguin based fires. sorry for inconvenience

2023-04-20 23:15:07 +08:00 · 2023-04-20 23:15:07 +08:00 · 7ba36c2c6c
commit 7ba36c2c6c
parent 49697d86d8
4 changed files with 30 additions and 12 deletions
--- a/32
+++ b/32
@ -1,4 +1,4 @@
-default: koboldcpp koboldcpp_noavx2 koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast
+default: koboldcpp koboldcpp_noavx2 koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast 
 simple: koboldcpp koboldcpp_noavx2
 dev: koboldcpp_openblas

@ -15,9 +15,13 @@ ifndef UNAME_M
 UNAME_M := $(shell uname -m)
 endif

-ifndef ARCH_LINUX
-ARCH_LINUX := $(shell grep "Arch Linux" /etc/os-release 2>/dev/null)
-ARCH_LIKE := $(shell grep "ID_LIKE=arch" /etc/os-release 2>/dev/null)
+ARCH_LINUX1 := $(shell grep "Arch Linux" /etc/os-release 2>/dev/null)
+ARCH_LINUX2 := $(shell grep "ID_LIKE=arch" /etc/os-release 2>/dev/null)
+ifdef ARCH_LINUX1
+ARCH_ADD = -lcblas
+endif
+ifdef ARCH_LINUX2
+ARCH_ADD = -lcblas
 endif

 CCV := $(shell $(CC) --version | head -n 1)
@ -51,6 +55,7 @@ BONUSCFLAGS2 =

 OPENBLAS_FLAGS = -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
 CLBLAST_FLAGS = -DGGML_USE_CLBLAST -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
+CUBLAS_FLAGS = -DGGML_USE_CUBLAS -I/usr/local/cuda/include

 #lets try enabling everything
 CFLAGS   += -pthread -s
@ -152,6 +157,7 @@ NOAVX2_BUILD =
 OPENBLAS_BUILD =
 OPENBLAS_NOAVX2_BUILD =
 CLBLAST_BUILD =
+CUBLAS_BUILD =

 ifeq ($(OS),Windows_NT)
 	DEFAULT_BUILD = $(CXX) $(CXXFLAGS)  $^ -shared -o $@.dll $(LDFLAGS)
@ -159,22 +165,28 @@ ifeq ($(OS),Windows_NT)
 	OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS)
 	OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS)
 	CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS)
+	CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $^  -shared -o $@.dll $(LDFLAGS)
 else
 	DEFAULT_BUILD = $(CXX) $(CXXFLAGS)  $^ -shared -o $@.so $(LDFLAGS)
 	NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
 	ifdef LLAMA_OPENBLAS
-	OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ -lcblas -lopenblas -shared -o $@.so $(LDFLAGS)
-	OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -lcblas -lopenblas -shared -o $@.so $(LDFLAGS)
+	OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
+	OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
 	endif	
 	ifdef LLAMA_CLBLAST
-	CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL -lcblas -lopenblas -shared -o $@.so $(LDFLAGS)
+	CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
+	endif
+	ifdef LLAMA_CUBLAS
+	CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ -lcublas_static -lculibos -lcudart_static -lcublasLt_static -lpthread -ldl -L/usr/local/cuda/lib64 -shared -o $@.so $(LDFLAGS)
 	endif

 	ifndef LLAMA_OPENBLAS
 	ifndef LLAMA_CLBLAST
+	ifndef LLAMA_CUBLAS
 	OPENBLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. For faster speeds, install and link a BLAS library. Set LLAMA_OPENBLAS=1 to compile with OpenBLAS support or LLAMA_CLBLAST=1 to compile with ClBlast support. This is just a reminder, not an error.'
 	endif
 	endif
+	endif
 endif

 #
@ -211,6 +223,9 @@ ggml_openblas_noavx2.o: ggml.c ggml.h
 ggml_clblast.o: ggml.c ggml.h
 	$(CC)  $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) $(CLBLAST_FLAGS) -c $< -o $@

+ggml_cublas.o: ggml.c ggml.h
+	$(CC)  $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) $(CUBLAS_FLAGS) -c $< -o $@
+
 ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
 	$(CC)  $(CFLAGS) $(BONUSCFLAGS1) $(BONUSCFLAGS2) -c $< -o $@

@ -258,6 +273,9 @@ koboldcpp_openblas_noavx2: ggml_openblas_noavx2.o ggml_rwkv.o ggml_v1_noavx2.o e

 koboldcpp_clblast: ggml_clblast.o ggml_rwkv.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o 
 	$(CLBLAST_BUILD)
+
+koboldcpp_cublas: ggml_cublas.o ggml_rwkv.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o 
+	$(CUBLAS_BUILD)
 		
 quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o
 	$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -372,7 +372,7 @@ def main(args):
    
    if args.noavx2:
        use_noavx2 = True
-        if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_openblas_noavx2)) or not (os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) and os.name=='nt'):
+        if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_openblas_noavx2)) or (os.name=='nt' and not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll"))):
            print("Warning: OpenBLAS library file not found. Non-BLAS library will be used.")     
        elif args.noblas:            
            print("Attempting to use non-avx2 compatibility library without OpenBLAS.")
@ -380,13 +380,13 @@ def main(args):
            use_blas = True
            print("Attempting to use non-avx2 compatibility library with OpenBLAS. A compatible libopenblas will be required.")
    elif args.useclblast:
-        if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_clblast)) or not (os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "clblast.dll")) and os.name=='nt'):
+        if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_clblast)) or (os.name=='nt' and not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "clblast.dll"))):
            print("Warning: CLBlast library file not found. Non-BLAS library will be used.")
        else:
            print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast will be required.")
            use_clblast = True
    else:
-        if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_openblas)) or not (os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) and os.name=='nt'):
+        if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), lib_openblas)) or (os.name=='nt' and not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll"))):
            print("Warning: OpenBLAS library file not found. Non-BLAS library will be used.")
        elif args.noblas:
            print("Attempting to library without OpenBLAS.")
--- a/otherarch/gpt2_v2.cpp
+++ b/otherarch/gpt2_v2.cpp
@ -371,7 +371,7 @@ bool gpt2_eval(
    const int n_vocab = hparams.n_vocab;

    //todo: there is a bug that causes the buffer to oom and I cannot figure it out, hack to increase size for now  
-    static size_t buf_size = 512u*1024*1024;
+    static size_t buf_size = 256u*1024*1024;
    static void * buf = malloc(buf_size);

    if (mem_per_token > 0 && mem_per_token*N*1.6 > buf_size) {
--- a/otherarch/gptj_v2.cpp
+++ b/otherarch/gptj_v2.cpp
@ -382,7 +382,7 @@ bool gptj_eval(
    const int d_key = n_embd/n_head;

    //todo: there is a bug that causes the buffer to oom and I cannot figure it out, hack to increase size for now  
-    static size_t buf_size = 512u*1024*1024;
+    static size_t buf_size = 256u*1024*1024;
    static void * buf = malloc(buf_size);

    if (mem_per_token > 0 && mem_per_token*N*1.4 > buf_size) {