Added prints for stopping sequences, made makefile 1% friendlier to arch linux users
This commit is contained in:
parent
525184930d
commit
6548d3b3fb
4 changed files with 15 additions and 6 deletions
17
Makefile
17
Makefile
|
@ -10,6 +10,10 @@ ifndef UNAME_M
|
||||||
UNAME_M := $(shell uname -m)
|
UNAME_M := $(shell uname -m)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifndef ARCH_LINUX
|
||||||
|
ARCH_LINUX := $(shell grep "Arch Linux" /etc/os-release 2>/dev/null)
|
||||||
|
endif
|
||||||
|
|
||||||
CCV := $(shell $(CC) --version | head -n 1)
|
CCV := $(shell $(CC) --version | head -n 1)
|
||||||
CXXV := $(shell $(CXX) --version | head -n 1)
|
CXXV := $(shell $(CXX) --version | head -n 1)
|
||||||
|
|
||||||
|
@ -40,7 +44,7 @@ BONUSCFLAGS1 =
|
||||||
BONUSCFLAGS2 =
|
BONUSCFLAGS2 =
|
||||||
|
|
||||||
#lets try enabling everything
|
#lets try enabling everything
|
||||||
CFLAGS += -pthread -s
|
CFLAGS += -pthread -s
|
||||||
CXXFLAGS += -pthread -s -Wno-multichar
|
CXXFLAGS += -pthread -s -Wno-multichar
|
||||||
|
|
||||||
# OS specific
|
# OS specific
|
||||||
|
@ -48,6 +52,9 @@ CXXFLAGS += -pthread -s -Wno-multichar
|
||||||
ifeq ($(UNAME_S),Linux)
|
ifeq ($(UNAME_S),Linux)
|
||||||
CFLAGS += -pthread
|
CFLAGS += -pthread
|
||||||
CXXFLAGS += -pthread
|
CXXFLAGS += -pthread
|
||||||
|
ifdef ARCH_LINUX
|
||||||
|
LDFLAGS += -lcblas
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
ifeq ($(UNAME_S),Darwin)
|
ifeq ($(UNAME_S),Darwin)
|
||||||
CFLAGS += -pthread
|
CFLAGS += -pthread
|
||||||
|
@ -75,11 +82,11 @@ endif
|
||||||
# feel free to update the Makefile for your architecture and send a pull request or issue
|
# feel free to update the Makefile for your architecture and send a pull request or issue
|
||||||
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
|
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
|
||||||
# Use all CPU extensions that are available:
|
# Use all CPU extensions that are available:
|
||||||
CFLAGS += -mavx
|
CFLAGS += -mavx
|
||||||
# old library NEEDS mf16c to work. so we must build with it. new one doesnt
|
# old library NEEDS mf16c to work. so we must build with it. new one doesnt
|
||||||
ifeq ($(OS),Windows_NT)
|
ifeq ($(OS),Windows_NT)
|
||||||
BONUSCFLAGS1 += -mf16c
|
BONUSCFLAGS1 += -mf16c
|
||||||
BONUSCFLAGS2 += -mavx2 -msse3 -mfma
|
BONUSCFLAGS2 += -mavx2 -msse3 -mfma
|
||||||
else
|
else
|
||||||
# if not on windows, they are clearly building it themselves, so lets just use whatever is supported
|
# if not on windows, they are clearly building it themselves, so lets just use whatever is supported
|
||||||
CFLAGS += -march=native -mtune=native
|
CFLAGS += -march=native -mtune=native
|
||||||
|
@ -135,7 +142,7 @@ endif
|
||||||
|
|
||||||
OPENBLAS_BUILD =
|
OPENBLAS_BUILD =
|
||||||
CLBLAST_BUILD =
|
CLBLAST_BUILD =
|
||||||
NOAVX2_BUILD =
|
NOAVX2_BUILD =
|
||||||
OPENBLAS_NOAVX2_BUILD =
|
OPENBLAS_NOAVX2_BUILD =
|
||||||
|
|
||||||
ifeq ($(OS),Windows_NT)
|
ifeq ($(OS),Windows_NT)
|
||||||
|
|
|
@ -350,6 +350,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
if (concat_output.find(matched) != std::string::npos)
|
if (concat_output.find(matched) != std::string::npos)
|
||||||
{
|
{
|
||||||
remaining_tokens = 0;
|
remaining_tokens = 0;
|
||||||
|
printf("\n(Stop sequence triggered)");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -435,7 +435,7 @@ def main(args):
|
||||||
RunServerMultiThreaded(args.host, args.port, embedded_kailite)
|
RunServerMultiThreaded(args.host, args.port, embedded_kailite)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print("Welcome to KoboldCpp - Version 1.8") # just update version manually
|
print("Welcome to KoboldCpp - Version 1.9") # just update version manually
|
||||||
parser = argparse.ArgumentParser(description='Kobold llama.cpp server')
|
parser = argparse.ArgumentParser(description='Kobold llama.cpp server')
|
||||||
modelgroup = parser.add_mutually_exclusive_group() #we want to be backwards compatible with the unnamed positional args
|
modelgroup = parser.add_mutually_exclusive_group() #we want to be backwards compatible with the unnamed positional args
|
||||||
modelgroup.add_argument("--model", help="Model file to load", nargs="?")
|
modelgroup.add_argument("--model", help="Model file to load", nargs="?")
|
||||||
|
|
|
@ -246,6 +246,7 @@ generation_outputs llama_generate(const generation_inputs inputs, generation_out
|
||||||
if (concat_output.find(matched) != std::string::npos)
|
if (concat_output.find(matched) != std::string::npos)
|
||||||
{
|
{
|
||||||
remaining_tokens = 0;
|
remaining_tokens = 0;
|
||||||
|
printf("\n(Stop sequence triggered)");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue