From 6548d3b3fb0ca79da54acba8ce6a9df4835c19d7 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 16 Apr 2023 20:43:17 +0800 Subject: [PATCH] Added prints for stopping sequences, made makefile 1% friendlier to arch linux users --- Makefile | 17 ++++++++++++----- gpttype_adapter.cpp | 1 + koboldcpp.py | 2 +- llama_adapter.cpp | 1 + 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index ae239b581..5e9a8d226 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,10 @@ ifndef UNAME_M UNAME_M := $(shell uname -m) endif +ifndef ARCH_LINUX +ARCH_LINUX := $(shell grep "Arch Linux" /etc/os-release 2>/dev/null) +endif + CCV := $(shell $(CC) --version | head -n 1) CXXV := $(shell $(CXX) --version | head -n 1) @@ -40,7 +44,7 @@ BONUSCFLAGS1 = BONUSCFLAGS2 = #lets try enabling everything -CFLAGS += -pthread -s +CFLAGS += -pthread -s CXXFLAGS += -pthread -s -Wno-multichar # OS specific @@ -48,6 +52,9 @@ CXXFLAGS += -pthread -s -Wno-multichar ifeq ($(UNAME_S),Linux) CFLAGS += -pthread CXXFLAGS += -pthread +ifdef ARCH_LINUX + LDFLAGS += -lcblas +endif endif ifeq ($(UNAME_S),Darwin) CFLAGS += -pthread @@ -75,11 +82,11 @@ endif # feel free to update the Makefile for your architecture and send a pull request or issue ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686)) # Use all CPU extensions that are available: - CFLAGS += -mavx + CFLAGS += -mavx # old library NEEDS mf16c to work. so we must build with it. new one doesnt ifeq ($(OS),Windows_NT) - BONUSCFLAGS1 += -mf16c - BONUSCFLAGS2 += -mavx2 -msse3 -mfma + BONUSCFLAGS1 += -mf16c + BONUSCFLAGS2 += -mavx2 -msse3 -mfma else # if not on windows, they are clearly building it themselves, so lets just use whatever is supported CFLAGS += -march=native -mtune=native @@ -135,7 +142,7 @@ endif OPENBLAS_BUILD = CLBLAST_BUILD = -NOAVX2_BUILD = +NOAVX2_BUILD = OPENBLAS_NOAVX2_BUILD = ifeq ($(OS),Windows_NT) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 197828216..1f4537e8c 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -350,6 +350,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o if (concat_output.find(matched) != std::string::npos) { remaining_tokens = 0; + printf("\n(Stop sequence triggered)"); break; } } diff --git a/koboldcpp.py b/koboldcpp.py index 737ed406c..0fbdd2e71 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -435,7 +435,7 @@ def main(args): RunServerMultiThreaded(args.host, args.port, embedded_kailite) if __name__ == '__main__': - print("Welcome to KoboldCpp - Version 1.8") # just update version manually + print("Welcome to KoboldCpp - Version 1.9") # just update version manually parser = argparse.ArgumentParser(description='Kobold llama.cpp server') modelgroup = parser.add_mutually_exclusive_group() #we want to be backwards compatible with the unnamed positional args modelgroup.add_argument("--model", help="Model file to load", nargs="?") diff --git a/llama_adapter.cpp b/llama_adapter.cpp index 6e0cf8581..861d87422 100644 --- a/llama_adapter.cpp +++ b/llama_adapter.cpp @@ -246,6 +246,7 @@ generation_outputs llama_generate(const generation_inputs inputs, generation_out if (concat_output.find(matched) != std::string::npos) { remaining_tokens = 0; + printf("\n(Stop sequence triggered)"); break; } }