From 6548d3b3fb0ca79da54acba8ce6a9df4835c19d7 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Sun, 16 Apr 2023 20:43:17 +0800
Subject: [PATCH] Added prints for stopping sequences, made makefile 1%
 friendlier to arch linux users

---
 Makefile            | 17 ++++++++++++-----
 gpttype_adapter.cpp |  1 +
 koboldcpp.py        |  2 +-
 llama_adapter.cpp   |  1 +
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/Makefile b/Makefile
index ae239b581..5e9a8d226 100644
--- a/Makefile
+++ b/Makefile
@@ -10,6 +10,10 @@ ifndef UNAME_M
 UNAME_M := $(shell uname -m)
 endif
 
+ifndef ARCH_LINUX
+ARCH_LINUX := $(shell grep "Arch Linux" /etc/os-release 2>/dev/null)
+endif
+
 CCV := $(shell $(CC) --version | head -n 1)
 CXXV := $(shell $(CXX) --version | head -n 1)
 
@@ -40,7 +44,7 @@ BONUSCFLAGS1 =
 BONUSCFLAGS2 =
 
 #lets try enabling everything
-CFLAGS   += -pthread -s 
+CFLAGS   += -pthread -s
 CXXFLAGS += -pthread -s -Wno-multichar
 
 # OS specific
@@ -48,6 +52,9 @@ CXXFLAGS += -pthread -s -Wno-multichar
 ifeq ($(UNAME_S),Linux)
 	CFLAGS   += -pthread
 	CXXFLAGS += -pthread
+ifdef ARCH_LINUX
+	LDFLAGS += -lcblas
+endif
 endif
 ifeq ($(UNAME_S),Darwin)
 	CFLAGS   += -pthread
@@ -75,11 +82,11 @@ endif
 #       feel free to update the Makefile for your architecture and send a pull request or issue
 ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
 	# Use all CPU extensions that are available:
-	CFLAGS += -mavx 
+	CFLAGS += -mavx
 # old library NEEDS mf16c to work. so we must build with it. new one doesnt
 	ifeq ($(OS),Windows_NT)
-		BONUSCFLAGS1 += -mf16c 
-		BONUSCFLAGS2 += -mavx2 -msse3 -mfma 
+		BONUSCFLAGS1 += -mf16c
+		BONUSCFLAGS2 += -mavx2 -msse3 -mfma
 	else
 # if not on windows, they are clearly building it themselves, so lets just use whatever is supported
 		CFLAGS += -march=native -mtune=native
@@ -135,7 +142,7 @@ endif
 
 OPENBLAS_BUILD =
 CLBLAST_BUILD =
-NOAVX2_BUILD = 
+NOAVX2_BUILD =
 OPENBLAS_NOAVX2_BUILD =
 
 ifeq ($(OS),Windows_NT)
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index 197828216..1f4537e8c 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -350,6 +350,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
                     if (concat_output.find(matched) != std::string::npos)
                     {
                         remaining_tokens = 0;
+                        printf("\n(Stop sequence triggered)");
                         break;
                     }
                 }
diff --git a/koboldcpp.py b/koboldcpp.py
index 737ed406c..0fbdd2e71 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -435,7 +435,7 @@ def main(args):
     RunServerMultiThreaded(args.host, args.port, embedded_kailite)
 
 if __name__ == '__main__':
-    print("Welcome to KoboldCpp - Version 1.8") # just update version manually
+    print("Welcome to KoboldCpp - Version 1.9") # just update version manually
     parser = argparse.ArgumentParser(description='Kobold llama.cpp server')
     modelgroup = parser.add_mutually_exclusive_group() #we want to be backwards compatible with the unnamed positional args
     modelgroup.add_argument("--model", help="Model file to load", nargs="?")
diff --git a/llama_adapter.cpp b/llama_adapter.cpp
index 6e0cf8581..861d87422 100644
--- a/llama_adapter.cpp
+++ b/llama_adapter.cpp
@@ -246,6 +246,7 @@ generation_outputs llama_generate(const generation_inputs inputs, generation_out
                 if (concat_output.find(matched) != std::string::npos)
                 {
                     remaining_tokens = 0;
+                    printf("\n(Stop sequence triggered)");
                     break;
                 }
             }