Added prints for stopping sequences, made makefile 1% friendlier to arch linux users

2023-04-16 20:43:17 +08:00 · 2023-04-16 20:43:17 +08:00 · 6548d3b3fb
commit 6548d3b3fb
parent 525184930d
4 changed files with 15 additions and 6 deletions
--- a/17
+++ b/17
@ -10,6 +10,10 @@ ifndef UNAME_M
 UNAME_M := $(shell uname -m)
 endif

+ifndef ARCH_LINUX
+ARCH_LINUX := $(shell grep "Arch Linux" /etc/os-release 2>/dev/null)
+endif
+
 CCV := $(shell $(CC) --version | head -n 1)
 CXXV := $(shell $(CXX) --version | head -n 1)

@ -40,7 +44,7 @@ BONUSCFLAGS1 =
 BONUSCFLAGS2 =

 #lets try enabling everything
-CFLAGS   += -pthread -s 
+CFLAGS   += -pthread -s
 CXXFLAGS += -pthread -s -Wno-multichar

 # OS specific
@ -48,6 +52,9 @@ CXXFLAGS += -pthread -s -Wno-multichar
 ifeq ($(UNAME_S),Linux)
 	CFLAGS   += -pthread
 	CXXFLAGS += -pthread
+ifdef ARCH_LINUX
+	LDFLAGS += -lcblas
+endif
 endif
 ifeq ($(UNAME_S),Darwin)
 	CFLAGS   += -pthread
@ -75,11 +82,11 @@ endif
 #       feel free to update the Makefile for your architecture and send a pull request or issue
 ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
 	# Use all CPU extensions that are available:
-	CFLAGS += -mavx 
+	CFLAGS += -mavx
 # old library NEEDS mf16c to work. so we must build with it. new one doesnt
 	ifeq ($(OS),Windows_NT)
-		BONUSCFLAGS1 += -mf16c 
-		BONUSCFLAGS2 += -mavx2 -msse3 -mfma 
+		BONUSCFLAGS1 += -mf16c
+		BONUSCFLAGS2 += -mavx2 -msse3 -mfma
 	else
 # if not on windows, they are clearly building it themselves, so lets just use whatever is supported
 		CFLAGS += -march=native -mtune=native
@ -135,7 +142,7 @@ endif

 OPENBLAS_BUILD =
 CLBLAST_BUILD =
-NOAVX2_BUILD = 
+NOAVX2_BUILD =
 OPENBLAS_NOAVX2_BUILD =

 ifeq ($(OS),Windows_NT)
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -350,6 +350,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
                    if (concat_output.find(matched) != std::string::npos)
                    {
                        remaining_tokens = 0;
+                        printf("\n(Stop sequence triggered)");
                        break;
                    }
                }
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -435,7 +435,7 @@ def main(args):
    RunServerMultiThreaded(args.host, args.port, embedded_kailite)

 if __name__ == '__main__':
-    print("Welcome to KoboldCpp - Version 1.8") # just update version manually
+    print("Welcome to KoboldCpp - Version 1.9") # just update version manually
    parser = argparse.ArgumentParser(description='Kobold llama.cpp server')
    modelgroup = parser.add_mutually_exclusive_group() #we want to be backwards compatible with the unnamed positional args
    modelgroup.add_argument("--model", help="Model file to load", nargs="?")
--- a/llama_adapter.cpp
+++ b/llama_adapter.cpp
@ -246,6 +246,7 @@ generation_outputs llama_generate(const generation_inputs inputs, generation_out
                if (concat_output.find(matched) != std::string::npos)
                {
                    remaining_tokens = 0;
+                    printf("\n(Stop sequence triggered)");
                    break;
                }
            }