Merged upstream, fixed OSX compile errors, integrated noavx2 build into main

This commit is contained in:
Concedo 2023-04-12 18:08:55 +08:00
commit 4faae0afa9
15 changed files with 135 additions and 75 deletions

5
.ecrc Normal file
View file

@ -0,0 +1,5 @@
{
"Disable": {
"IndentSize": true
}
}

16
.editorconfig Normal file
View file

@ -0,0 +1,16 @@
# https://EditorConfig.org
# Top-most EditorConfig file
root = true
# Unix-style newlines with a newline ending every file, utf-8 charset
[*]
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
charset = utf-8
indent_style = space
indent_size = 4
[Makefile]
indent_style = tab

View file

@ -22,9 +22,9 @@ Please provide a detailed written description of what you were trying to do, and
# Current Behavior # Current Behavior
Please provide a detailed written description of what `llama.cpp` did, instead. Please provide a detailed written description of what `llama.cpp` did, instead.
# Environment and Context # Environment and Context
Please provide detailed information about your computer setup. This is important in case the issue is not reproducible except for under certain specific conditions. Please provide detailed information about your computer setup. This is important in case the issue is not reproducible except for under certain specific conditions.
@ -133,7 +133,7 @@ llama_model_load: loading model part 8/8 from './models/65B/ggml-model-q4_0.bin.
llama_model_load: .......................................................................................... done llama_model_load: .......................................................................................... done
llama_model_load: model size = 4869.09 MB / num tensors = 723 llama_model_load: model size = 4869.09 MB / num tensors = 723
system_info: n_threads = 16 / 32 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | system_info: n_threads = 16 / 32 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 |
main: prompt: 'Please close your issue when it has been answered.' main: prompt: 'Please close your issue when it has been answered.'
main: number of tokens in prompt = 11 main: number of tokens in prompt = 11
@ -166,14 +166,14 @@ main: total time = 246406.42 ms
Performance counter stats for './main -m ./models/65B/ggml-model-q4_0.bin -t 16 -n 1024 -p Please close your issue when it has been answered.': Performance counter stats for './main -m ./models/65B/ggml-model-q4_0.bin -t 16 -n 1024 -p Please close your issue when it has been answered.':
3636882.89 msec task-clock # 14.677 CPUs utilized 3636882.89 msec task-clock # 14.677 CPUs utilized
13509 context-switches # 3.714 /sec 13509 context-switches # 3.714 /sec
2436 cpu-migrations # 0.670 /sec 2436 cpu-migrations # 0.670 /sec
10476679 page-faults # 2.881 K/sec 10476679 page-faults # 2.881 K/sec
13133115082869 cycles # 3.611 GHz (16.77%) 13133115082869 cycles # 3.611 GHz (16.77%)
29314462753 stalled-cycles-frontend # 0.22% frontend cycles idle (16.76%) 29314462753 stalled-cycles-frontend # 0.22% frontend cycles idle (16.76%)
10294402631459 stalled-cycles-backend # 78.39% backend cycles idle (16.74%) 10294402631459 stalled-cycles-backend # 78.39% backend cycles idle (16.74%)
23479217109614 instructions # 1.79 insn per cycle 23479217109614 instructions # 1.79 insn per cycle
# 0.44 stalled cycles per insn (16.76%) # 0.44 stalled cycles per insn (16.76%)
2353072268027 branches # 647.002 M/sec (16.77%) 2353072268027 branches # 647.002 M/sec (16.77%)
1998682780 branch-misses # 0.08% of all branches (16.76%) 1998682780 branch-misses # 0.08% of all branches (16.76%)

17
.github/workflows/editorconfig.yml vendored Normal file
View file

@ -0,0 +1,17 @@
name: EditorConfig Checker
on:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
editorconfig:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: editorconfig-checker/action-editorconfig-checker@main
- run: editorconfig-checker

View file

@ -34,6 +34,7 @@ endif
CFLAGS = -I. -Ofast -DNDEBUG -std=c11 -fPIC CFLAGS = -I. -Ofast -DNDEBUG -std=c11 -fPIC
CXXFLAGS = -I. -I./examples -Ofast -DNDEBUG -std=c++11 -fPIC CXXFLAGS = -I. -I./examples -Ofast -DNDEBUG -std=c++11 -fPIC
LDFLAGS = LDFLAGS =
BONUSCFLAGS =
#lets try enabling everything #lets try enabling everything
CFLAGS += -pthread -s CFLAGS += -pthread -s
@ -71,7 +72,8 @@ endif
# feel free to update the Makefile for your architecture and send a pull request or issue # feel free to update the Makefile for your architecture and send a pull request or issue
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686)) ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
# Use all CPU extensions that are available: # Use all CPU extensions that are available:
CFLAGS += -mf16c -mfma -mavx2 -mavx -msse3 CFLAGS += -mf16c -mavx -msse3
BONUSCFLAGS += -mfma -mavx2
endif endif
ifneq ($(filter ppc64%,$(UNAME_M)),) ifneq ($(filter ppc64%,$(UNAME_M)),)
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo) POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
@ -122,17 +124,19 @@ ifneq ($(filter armv8%,$(UNAME_M)),)
endif endif
OPENBLAS_BUILD = OPENBLAS_BUILD =
CLBLAST_BUILD =
OPENBLAS_NOAVX2_BUILD =
ifeq ($(OS),Windows_NT) ifeq ($(OS),Windows_NT)
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) ggml_openblas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/libopenblas.lib -shared -o koboldcpp_openblas.dll $(LDFLAGS) OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) ggml_openblas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/libopenblas.lib -shared -o koboldcpp_openblas.dll $(LDFLAGS)
else
OPENBLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. If you want to use openblas, please install it seperately, then link it manually with LLAMA_OPENBLAS=1. This is just a reminder, not an error.'
endif
CLBLAST_BUILD =
ifeq ($(OS),Windows_NT)
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) ggml_clblast.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/OpenCL.lib lib/clblast.lib -shared -o koboldcpp_clblast.dll $(LDFLAGS) CLBLAST_BUILD = $(CXX) $(CXXFLAGS) ggml_clblast.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/OpenCL.lib lib/clblast.lib -shared -o koboldcpp_clblast.dll $(LDFLAGS)
OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) ggml_openblas_noavx2.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o lib/libopenblas.lib -shared -o koboldcpp_openblas_noavx2.dll $(LDFLAGS)
else else
CLBLAST_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. If you want to use CLBlast, please install it seperately, then link it manually with LLAMA_CLBLAST=1. This is just a reminder, not an error.' ifndef LLAMA_OPENBLAS
ifndef LLAMA_CLBLAST
OPENBLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. For faster speeds, install and link a BLAS library. Set LLAMA_OPENBLAS=1 to compile with OpenBLAS support or LLAMA_CLBLAST=1 to compile with ClBlast support. This is just a reminder, not an error.'
endif
endif
endif endif
# #
@ -150,22 +154,28 @@ $(info I CC: $(CCV))
$(info I CXX: $(CXXV)) $(info I CXX: $(CXXV))
$(info ) $(info )
default: llamalib llamalib_openblas llamalib_clblast default: llamalib llamalib_openblas llamalib_openblas_noavx2 llamalib_clblast
# #
# Build library # Build library
# #
ggml.o: ggml.c ggml.h ggml.o: ggml.c ggml.h
$(CC) $(CFLAGS) -c ggml.c -o ggml.o $(CC) $(CFLAGS) $(BONUSCFLAGS) -c ggml.c -o ggml.o
ggml_openblas.o: ggml.c ggml.h ggml_openblas.o: ggml.c ggml.h
$(CC) $(CFLAGS) -DGGML_USE_OPENBLAS -c ggml.c -o ggml_openblas.o $(CC) $(CFLAGS) $(BONUSCFLAGS) -DGGML_USE_OPENBLAS -c ggml.c -o ggml_openblas.o
ggml_openblas_noavx2.o: ggml.c ggml.h
$(CC) $(CFLAGS) -DGGML_USE_OPENBLAS -c ggml.c -o ggml_openblas_noavx2.o
ggml_clblast.o: ggml.c ggml.h ggml_clblast.o: ggml.c ggml.h
$(CC) $(CFLAGS) -DGGML_USE_OPENBLAS -DGGML_USE_CLBLAST -c ggml.c -o ggml_clblast.o $(CC) $(CFLAGS) $(BONUSCFLAGS) -DGGML_USE_OPENBLAS -DGGML_USE_CLBLAST -c ggml.c -o ggml_clblast.o
ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
$(CC) $(CFLAGS) $(BONUSCFLAGS) -c otherarch/ggml_v1.c -o ggml_v1.o
ggml_v1_noavx2.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
$(CC) $(CFLAGS) -c otherarch/ggml_v1.c -o ggml_v1.o $(CC) $(CFLAGS) -c otherarch/ggml_v1.c -o ggml_v1.o
llama.o: llama.cpp llama.h llama_internal.h llama.o: llama.cpp llama.h llama_internal.h
@ -198,6 +208,9 @@ llamalib: ggml.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o
llamalib_openblas: ggml_openblas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o llamalib_openblas: ggml_openblas.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o
$(OPENBLAS_BUILD) $(OPENBLAS_BUILD)
llamalib_openblas_noavx2: ggml_openblas_noavx2.o ggml_v1_noavx2.o expose.o common.o llama_adapter.o gpttype_adapter.o
$(OPENBLAS_NOAVX2_BUILD)
llamalib_clblast: ggml_clblast.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o llamalib_clblast: ggml_clblast.o ggml_v1.o expose.o common.o llama_adapter.o gpttype_adapter.o
$(CLBLAST_BUILD) $(CLBLAST_BUILD)

View file

@ -19,15 +19,15 @@ GEN_OPTIONS=(--batch_size 1024
--top_p 0.5) --top_p 0.5)
if [ -n "$N_THREAD" ]; then if [ -n "$N_THREAD" ]; then
GEN_OPTIONS+=(--threads "$N_THREAD") GEN_OPTIONS+=(--threads "$N_THREAD")
fi fi
./main "${GEN_OPTIONS[@]}" \ ./main "${GEN_OPTIONS[@]}" \
--model "$MODEL" \ --model "$MODEL" \
--n_predict "$N_PREDICTS" \ --n_predict "$N_PREDICTS" \
--color --interactive \ --color --interactive \
--reverse-prompt "${USER_NAME}:" \ --reverse-prompt "${USER_NAME}:" \
--prompt " --prompt "
This is a transcript of a 1000 page, never ending conversation between ${USER_NAME} and the cute and helpful AI assistant ${AI_NAME}. ${AI_NAME} is a girl who is an AI running on the users computer. This is a transcript of a 1000 page, never ending conversation between ${USER_NAME} and the cute and helpful AI assistant ${AI_NAME}. ${AI_NAME} is a girl who is an AI running on the users computer.
${AI_NAME} can think for herself without the user seeing her thoughts by adding a /think prefix to her output. She uses this to reason about the world and to think about what she should say next. ${AI_NAME} can think for herself without the user seeing her thoughts by adding a /think prefix to her output. She uses this to reason about the world and to think about what she should say next.
${AI_NAME} is always coherent and makes sense, but if she isn't sure if what she is saying is correct she will ask the user for help. ${AI_NAME} is always coherent and makes sense, but if she isn't sure if what she is saying is correct she will ask the user for help.

View file

@ -22,9 +22,9 @@ extern "C" __declspec(dllimport) int __stdcall GetConsoleMode(void* hConsoleHand
extern "C" __declspec(dllimport) int __stdcall SetConsoleMode(void* hConsoleHandle, unsigned long dwMode); extern "C" __declspec(dllimport) int __stdcall SetConsoleMode(void* hConsoleHandle, unsigned long dwMode);
extern "C" __declspec(dllimport) int __stdcall SetConsoleCP(unsigned int wCodePageID); extern "C" __declspec(dllimport) int __stdcall SetConsoleCP(unsigned int wCodePageID);
extern "C" __declspec(dllimport) int __stdcall SetConsoleOutputCP(unsigned int wCodePageID); extern "C" __declspec(dllimport) int __stdcall SetConsoleOutputCP(unsigned int wCodePageID);
extern "C" __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int CodePage, unsigned long dwFlags, extern "C" __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int CodePage, unsigned long dwFlags,
const wchar_t * lpWideCharStr, int cchWideChar, const wchar_t * lpWideCharStr, int cchWideChar,
char * lpMultiByteStr, int cbMultiByte, char * lpMultiByteStr, int cbMultiByte,
const char * lpDefaultChar, bool * lpUsedDefaultChar); const char * lpDefaultChar, bool * lpUsedDefaultChar);
#define CP_UTF8 65001 #define CP_UTF8 65001
#endif #endif
@ -328,9 +328,9 @@ void win32_console_init(bool enable_color) {
// Convert a wide Unicode string to an UTF8 string // Convert a wide Unicode string to an UTF8 string
void win32_utf8_encode(const std::wstring & wstr, std::string & str) { void win32_utf8_encode(const std::wstring & wstr, std::string & str) {
int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), NULL, 0, NULL, NULL); int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), NULL, 0, NULL, NULL);
std::string strTo(size_needed, 0); std::string strTo(size_needed, 0);
WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), &strTo[0], size_needed, NULL, NULL); WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), &strTo[0], size_needed, NULL, NULL);
str = strTo; str = strTo;
} }
#endif #endif

View file

@ -1,3 +1,3 @@
# embedding # embedding
TODO TODO

View file

@ -1,3 +1,3 @@
# main # main
TODO TODO

View file

@ -168,7 +168,7 @@ int main(int argc, char ** argv) {
} }
// enable interactive mode if reverse prompt or interactive start is specified // enable interactive mode if reverse prompt or interactive start is specified
if (params.antiprompt.size() != 0 || params.interactive_start) { if (params.antiprompt.size() != 0 || params.interactive_start) {
params.interactive = true; params.interactive = true;
} }

View file

@ -1,3 +1,3 @@
# perplexity # perplexity
TODO TODO

18
ggml.c
View file

@ -127,9 +127,9 @@ typedef void* thread_ret_t;
#ifdef GGML_USE_ACCELERATE #ifdef GGML_USE_ACCELERATE
#include <Accelerate/Accelerate.h> #include <Accelerate/Accelerate.h>
#elif GGML_USE_OPENBLAS
#include <ggml_blas_adapter.c>
#endif #endif
#include <ggml_blas_adapter.c>
#undef MIN #undef MIN
#undef MAX #undef MAX
@ -228,12 +228,12 @@ static inline float fp32_from_bits(uint32_t w) {
} }
static inline uint32_t fp32_to_bits(float f) { static inline uint32_t fp32_to_bits(float f) {
union { union {
float as_value; float as_value;
uint32_t as_bits; uint32_t as_bits;
} fp32; } fp32;
fp32.as_value = f; fp32.as_value = f;
return fp32.as_bits; return fp32.as_bits;
} }
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) { static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
@ -1881,7 +1881,7 @@ static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void * rest
sum1 += x1->d * y1->d * (vgetq_lane_s32(p_1, 0) + vgetq_lane_s32(p_1, 1) + vgetq_lane_s32(p_1, 2) + vgetq_lane_s32(p_1, 3)); sum1 += x1->d * y1->d * (vgetq_lane_s32(p_1, 0) + vgetq_lane_s32(p_1, 1) + vgetq_lane_s32(p_1, 2) + vgetq_lane_s32(p_1, 3));
#endif #endif
#else #else
const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0ls), vget_low_s8 (v1_0ls)); const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0ls), vget_low_s8 (v1_0ls));
const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0ls), vget_high_s8(v1_0ls)); const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0ls), vget_high_s8(v1_0ls));
const int16x8_t ph0l = vmull_s8(vget_low_s8 (v0_0hs), vget_low_s8 (v1_0hs)); const int16x8_t ph0l = vmull_s8(vget_low_s8 (v0_0hs), vget_low_s8 (v1_0hs));

View file

@ -4,6 +4,7 @@
//windows binaries for clblast obtained from https://github.com/CNugteren/CLBlast (apache license) //windows binaries for clblast obtained from https://github.com/CNugteren/CLBlast (apache license)
//windows binaries for opencl obtained from https://github.com/KhronosGroup/OpenCL-SDK (apache license) //windows binaries for opencl obtained from https://github.com/KhronosGroup/OpenCL-SDK (apache license)
#if GGML_USE_OPENBLAS
#include <cblas.h> #include <cblas.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@ -104,21 +105,16 @@ static void ggml_cl_sgemm_wrapper(const enum CBLAS_ORDER order, const enum CBLAS
} }
#endif #endif
static void do_blas_sgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc)
{
#if GGML_USE_CLBLAST
ggml_cl_sgemm_wrapper(Order, TransA, TransB,
M, N, K,
alpha, A, lda,
B, ldb,
beta, C, ldc);
#else
cblas_sgemm(Order, TransA, TransB,
M, N, K,
alpha, A, lda,
B, ldb,
beta, C, ldc);
#endif #endif
}
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
#if GGML_USE_CLBLAST
#define do_blas_sgemm(Order, TransA, TransB,M, N, K,alpha, A, lda, B, ldb, beta, C, ldc) ({\
ggml_cl_sgemm_wrapper(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);\
})
#else
#define do_blas_sgemm(Order, TransA, TransB,M, N, K,alpha, A, lda, B, ldb, beta, C, ldc) ({\
cblas_sgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);\
})
#endif
#endif

View file

@ -36,11 +36,14 @@ class generation_outputs(ctypes.Structure):
handle = None handle = None
use_blas = False # if true, uses OpenBLAS for acceleration. libopenblas.dll must exist in the same dir. use_blas = False # if true, uses OpenBLAS for acceleration. libopenblas.dll must exist in the same dir.
use_clblast = False #uses CLBlast instead use_clblast = False #uses CLBlast instead
use_noavx2 = False #uses openblas with no avx2 instructions
def init_library(): def init_library():
global handle, use_blas, use_clblast global handle, use_blas, use_clblast, use_noavx2
libname = "" libname = ""
if use_blas: if use_noavx2:
libname = "koboldcpp_openblas_noavx2.dll"
elif use_blas:
libname = "koboldcpp_openblas.dll" libname = "koboldcpp_openblas.dll"
elif use_clblast: elif use_clblast:
libname = "koboldcpp_clblast.dll" libname = "koboldcpp_clblast.dll"
@ -309,7 +312,7 @@ def RunServerMultiThreaded(addr, port, embedded_kailite = None):
sys.exit(0) sys.exit(0)
def main(args): def main(args):
global use_blas, use_clblast global use_blas, use_clblast, use_noavx2
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_openblas.dll")): if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_openblas.dll")):
print("Warning: libopenblas.dll or koboldcpp_openblas.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with OpenBLAS.") print("Warning: libopenblas.dll or koboldcpp_openblas.dll not found. Non-BLAS library will be used. Ignore this if you have manually linked with OpenBLAS.")
use_blas = False use_blas = False
@ -322,6 +325,14 @@ def main(args):
else: else:
print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast.dll will be required.") print("Attempting to use CLBlast library for faster prompt ingestion. A compatible clblast.dll will be required.")
use_clblast = True use_clblast = True
elif args.noavx2:
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "libopenblas.dll")) or not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), "koboldcpp_openblas_noavx2.dll")):
print("Warning: libopenblas.dll or koboldcpp_openblas_noavx2.dll not found. This mode cannot be used.")
elif os.name == 'nt':
print("Attempting to use non-avx2 compatibility openblas library.")
use_noavx2 = True
else:
print("Non-AVX2 compatibility OpenBLAS mode only available on windows. On other OS, please manually rebuild without AVX2 flags.")
elif not args.noblas: elif not args.noblas:
print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas.dll will be required.") print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas.dll will be required.")
use_blas = True use_blas = True
@ -409,8 +420,10 @@ if __name__ == '__main__':
parser.add_argument("--threads", help="Use a custom number of threads if specified. Otherwise, uses an amount based on CPU cores", type=int, default=default_threads) parser.add_argument("--threads", help="Use a custom number of threads if specified. Otherwise, uses an amount based on CPU cores", type=int, default=default_threads)
parser.add_argument("--psutil_set_threads", help="Experimental flag. If set, uses psutils to determine thread count based on physical cores.", action='store_true') parser.add_argument("--psutil_set_threads", help="Experimental flag. If set, uses psutils to determine thread count based on physical cores.", action='store_true')
parser.add_argument("--stream", help="Uses pseudo streaming", action='store_true') parser.add_argument("--stream", help="Uses pseudo streaming", action='store_true')
parser.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true') parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
parser.add_argument("--useclblast", help="Use CLBlast instead of OpenBLAS for prompt ingestion. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2) compatgroup = parser.add_mutually_exclusive_group()
compatgroup.add_argument("--noblas", help="Do not use OpenBLAS for accelerated prompt ingestion", action='store_true')
compatgroup.add_argument("--noavx2", help="Do not use AVX2 instructions, a slower compatibility mode for older devices. Does not work with --noblas or --clblast.", action='store_true')
compatgroup.add_argument("--useclblast", help="Use CLBlast instead of OpenBLAS for prompt ingestion. Must specify exactly 2 arguments, platform ID and device ID (e.g. --useclblast 1 0).", type=int, choices=range(0,9), nargs=2)
args = parser.parse_args() args = parser.parse_args()
main(args) main(args)

View file

@ -1 +1 @@
pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." "./koboldcpp.py" -n "koboldcpp.exe" pyinstaller --noconfirm --onefile --clean --console --icon "./niko.ico" --add-data "./klite.embd;." --add-data "./koboldcpp.dll;." --add-data "./koboldcpp_openblas.dll;." --add-data "./koboldcpp_openblas_noavx2.dll;." --add-data "./libopenblas.dll;." --add-data "./koboldcpp_clblast.dll;." --add-data "./clblast.dll;." "./koboldcpp.py" -n "koboldcpp.exe"