Added prints for stopping sequences, made makefile 1% friendlier to arch linux users
This commit is contained in:
parent
525184930d
commit
6548d3b3fb
4 changed files with 15 additions and 6 deletions
17
Makefile
17
Makefile
|
@ -10,6 +10,10 @@ ifndef UNAME_M
|
|||
UNAME_M := $(shell uname -m)
|
||||
endif
|
||||
|
||||
ifndef ARCH_LINUX
|
||||
ARCH_LINUX := $(shell grep "Arch Linux" /etc/os-release 2>/dev/null)
|
||||
endif
|
||||
|
||||
CCV := $(shell $(CC) --version | head -n 1)
|
||||
CXXV := $(shell $(CXX) --version | head -n 1)
|
||||
|
||||
|
@ -40,7 +44,7 @@ BONUSCFLAGS1 =
|
|||
BONUSCFLAGS2 =
|
||||
|
||||
#lets try enabling everything
|
||||
CFLAGS += -pthread -s
|
||||
CFLAGS += -pthread -s
|
||||
CXXFLAGS += -pthread -s -Wno-multichar
|
||||
|
||||
# OS specific
|
||||
|
@ -48,6 +52,9 @@ CXXFLAGS += -pthread -s -Wno-multichar
|
|||
ifeq ($(UNAME_S),Linux)
|
||||
CFLAGS += -pthread
|
||||
CXXFLAGS += -pthread
|
||||
ifdef ARCH_LINUX
|
||||
LDFLAGS += -lcblas
|
||||
endif
|
||||
endif
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
CFLAGS += -pthread
|
||||
|
@ -75,11 +82,11 @@ endif
|
|||
# feel free to update the Makefile for your architecture and send a pull request or issue
|
||||
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
|
||||
# Use all CPU extensions that are available:
|
||||
CFLAGS += -mavx
|
||||
CFLAGS += -mavx
|
||||
# old library NEEDS mf16c to work. so we must build with it. new one doesnt
|
||||
ifeq ($(OS),Windows_NT)
|
||||
BONUSCFLAGS1 += -mf16c
|
||||
BONUSCFLAGS2 += -mavx2 -msse3 -mfma
|
||||
BONUSCFLAGS1 += -mf16c
|
||||
BONUSCFLAGS2 += -mavx2 -msse3 -mfma
|
||||
else
|
||||
# if not on windows, they are clearly building it themselves, so lets just use whatever is supported
|
||||
CFLAGS += -march=native -mtune=native
|
||||
|
@ -135,7 +142,7 @@ endif
|
|||
|
||||
OPENBLAS_BUILD =
|
||||
CLBLAST_BUILD =
|
||||
NOAVX2_BUILD =
|
||||
NOAVX2_BUILD =
|
||||
OPENBLAS_NOAVX2_BUILD =
|
||||
|
||||
ifeq ($(OS),Windows_NT)
|
||||
|
|
|
@ -350,6 +350,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
|||
if (concat_output.find(matched) != std::string::npos)
|
||||
{
|
||||
remaining_tokens = 0;
|
||||
printf("\n(Stop sequence triggered)");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -435,7 +435,7 @@ def main(args):
|
|||
RunServerMultiThreaded(args.host, args.port, embedded_kailite)
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("Welcome to KoboldCpp - Version 1.8") # just update version manually
|
||||
print("Welcome to KoboldCpp - Version 1.9") # just update version manually
|
||||
parser = argparse.ArgumentParser(description='Kobold llama.cpp server')
|
||||
modelgroup = parser.add_mutually_exclusive_group() #we want to be backwards compatible with the unnamed positional args
|
||||
modelgroup.add_argument("--model", help="Model file to load", nargs="?")
|
||||
|
|
|
@ -246,6 +246,7 @@ generation_outputs llama_generate(const generation_inputs inputs, generation_out
|
|||
if (concat_output.find(matched) != std::string::npos)
|
||||
{
|
||||
remaining_tokens = 0;
|
||||
printf("\n(Stop sequence triggered)");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue