diff --git a/.gitignore b/.gitignore index b44be78fc..4bfed9c80 100644 --- a/.gitignore +++ b/.gitignore @@ -34,7 +34,6 @@ models/* /vdot /Pipfile -build-info.h arm_neon.h compile_commands.json diff --git a/Makefile b/Makefile index 840463749..9fdbfee06 100644 --- a/Makefile +++ b/Makefile @@ -266,7 +266,7 @@ gpttype_adapter_clblast.o: gpttype_adapter.cpp $(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ clean: - rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize-stats perplexity embedding benchmark-matmult save-load-state build-info.h main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_noavx2.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_noavx2.so koboldcpp_openblas_noavx2.so koboldcpp_clblast.so gptj.exe gpt2.exe + rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize-stats perplexity embedding benchmark-matmult save-load-state main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_noavx2.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_noavx2.so koboldcpp_openblas_noavx2.so koboldcpp_clblast.so gptj.exe gpt2.exe # # Examples @@ -319,13 +319,8 @@ libllama.so: llama.o ggml.o $(OBJS) save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -build-info.h: $(wildcard .git/index) scripts/build-info.sh - @sh scripts/build-info.sh > $@.tmp - @if ! cmp -s $@.tmp $@; then \ - mv $@.tmp $@; \ - else \ - rm $@.tmp; \ - fi +build-info.h: + $(DONOTHING) # # Tests diff --git a/build-info.h b/build-info.h new file mode 100644 index 000000000..70a4db739 --- /dev/null +++ b/build-info.h @@ -0,0 +1,7 @@ +#ifndef BUILD_INFO_H +#define BUILD_INFO_H + +#define BUILD_NUMBER 999 +#define BUILD_COMMIT "KOBOLDCPP" + +#endif // BUILD_INFO_H diff --git a/ggml-opencl.cpp b/ggml-opencl.cpp index 35f95863f..41002a07b 100644 --- a/ggml-opencl.cpp +++ b/ggml-opencl.cpp @@ -557,7 +557,7 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr &queue, &ev_sgemm); if (status != clblast::StatusCode::kSuccess) { - printf("\nF32 Matmul Failed (%d): You may be out of VRAM. Please check if you have enough.\n",status); + printf("\nF32 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11); GGML_ASSERT(false); } @@ -656,7 +656,7 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr &queue, &ev_sgemm); if (status != clblast::StatusCode::kSuccess) { - printf("\nF16 Matmul Failed (%d): You may be out of VRAM. Please check if you have enough.\n",status); + printf("\nF16 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11); GGML_ASSERT(false); } @@ -764,7 +764,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor * &queue, &ev_sgemm); if (status != clblast::StatusCode::kSuccess) { - printf("\nQF32 Matmul Failed (%d): You may be out of VRAM. Please check if you have enough.\n",status); + printf("\nQF32 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11); GGML_ASSERT(false); } } diff --git a/koboldcpp.py b/koboldcpp.py index d7d051999..c281d33dc 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -208,7 +208,7 @@ maxctx = 2048 maxlen = 128 modelbusy = False defaultport = 5001 -KcppVersion = "1.24" +KcppVersion = "1.25" class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): sys_version = "" diff --git a/niko.ico b/niko.ico index f0b191e4d..978ddac65 100644 Binary files a/niko.ico and b/niko.ico differ diff --git a/otherarch/ggml_v2-opencl.cpp b/otherarch/ggml_v2-opencl.cpp index 48d2d7f59..0afa8e1aa 100644 --- a/otherarch/ggml_v2-opencl.cpp +++ b/otherarch/ggml_v2-opencl.cpp @@ -573,7 +573,7 @@ static void ggml_v2_cl_mul_mat_f32(const ggml_v2_tensor * src0, const ggml_v2_te &queue, &ev_sgemm); if (status != clblast::StatusCode::kSuccess) { - printf("\nF32 Matmul Failed (%d): You may be out of VRAM. Please check if you have enough.\n",status); + printf("\nF32 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11); GGML_V2_ASSERT(false); } @@ -672,7 +672,7 @@ static void ggml_v2_cl_mul_mat_f16(const ggml_v2_tensor * src0, const ggml_v2_te &queue, &ev_sgemm); if (status != clblast::StatusCode::kSuccess) { - printf("\nF16 Matmul Failed (%d): You may be out of VRAM. Please check if you have enough.\n",status); + printf("\nF16 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11); GGML_V2_ASSERT(false); } @@ -780,7 +780,7 @@ static void ggml_v2_cl_mul_mat_q_f32(const ggml_v2_tensor * src0, const ggml_v2_ &queue, &ev_sgemm); if (status != clblast::StatusCode::kSuccess) { - printf("\nQF32 Matmul Failed (%d): You may be out of VRAM. Please check if you have enough.\n",status); + printf("\nQF32 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11); GGML_V2_ASSERT(false); } }