improved makefile, allowing building without k quants
This commit is contained in:
parent
17ee719c56
commit
6c2134a860
2 changed files with 53 additions and 14 deletions
55
Makefile
55
Makefile
|
@ -39,10 +39,15 @@ endif
|
||||||
#
|
#
|
||||||
|
|
||||||
# keep standard at C11 and C++11
|
# keep standard at C11 and C++11
|
||||||
CFLAGS = -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c11 -fPIC -DGGML_USE_K_QUANTS -DLOG_DISABLE_LOGS -D_GNU_SOURCE
|
CFLAGS = -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE
|
||||||
CXXFLAGS = -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c++11 -fPIC -DGGML_USE_K_QUANTS -DLOG_DISABLE_LOGS -D_GNU_SOURCE
|
CXXFLAGS = -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE
|
||||||
LDFLAGS =
|
LDFLAGS =
|
||||||
|
|
||||||
|
ifndef LLAMA_NO_K_QUANTS
|
||||||
|
CFLAGS += -DGGML_USE_K_QUANTS
|
||||||
|
CXXFLAGS += -DGGML_USE_K_QUANTS
|
||||||
|
endif
|
||||||
|
|
||||||
# these are used on windows, to build some libraries with extra old device compatibility
|
# these are used on windows, to build some libraries with extra old device compatibility
|
||||||
SIMPLECFLAGS =
|
SIMPLECFLAGS =
|
||||||
FULLCFLAGS =
|
FULLCFLAGS =
|
||||||
|
@ -285,19 +290,17 @@ ifeq ($(OS),Windows_NT)
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
|
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
|
||||||
FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
|
|
||||||
ifdef LLAMA_OPENBLAS
|
ifdef LLAMA_OPENBLAS
|
||||||
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
||||||
NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
|
||||||
endif
|
endif
|
||||||
ifdef LLAMA_CLBLAST
|
ifdef LLAMA_CLBLAST
|
||||||
ifeq ($(UNAME_S),Darwin)
|
ifeq ($(UNAME_S),Darwin)
|
||||||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
||||||
else
|
else
|
||||||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef LLAMA_CUBLAS
|
ifdef LLAMA_CUBLAS
|
||||||
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.so $(CUBLASLD_FLAGS) $(LDFLAGS)
|
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.so $(CUBLASLD_FLAGS) $(LDFLAGS)
|
||||||
endif
|
endif
|
||||||
|
@ -351,12 +354,18 @@ ggml_cublas.o: ggml.c ggml.h ggml-cuda.h k_quants.h
|
||||||
$(CC) $(CFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
||||||
|
|
||||||
#quants K
|
#quants K
|
||||||
|
ifndef LLAMA_NO_K_QUANTS
|
||||||
k_quants.o: k_quants.c k_quants.h ggml.h ggml-cuda.h
|
k_quants.o: k_quants.c k_quants.h ggml.h ggml-cuda.h
|
||||||
$(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@
|
||||||
k_quants_noavx2.o: k_quants.c k_quants.h ggml.h ggml-cuda.h
|
k_quants_noavx2.o: k_quants.c k_quants.h ggml.h ggml-cuda.h
|
||||||
$(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
||||||
k_quants_failsafe.o: k_quants.c k_quants.h ggml.h ggml-cuda.h
|
k_quants_failsafe.o: k_quants.c k_quants.h ggml.h ggml-cuda.h
|
||||||
$(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@
|
||||||
|
else
|
||||||
|
k_quants.o:
|
||||||
|
k_quants_noavx2.o:
|
||||||
|
k_quants_failsafe.o:
|
||||||
|
endif # LLAMA_NO_K_QUANTS
|
||||||
|
|
||||||
#there's no intrinsics or special gpu ops used here, so we can have a universal object
|
#there's no intrinsics or special gpu ops used here, so we can have a universal object
|
||||||
ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
|
ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
|
||||||
|
@ -425,22 +434,54 @@ main: examples/main/main.cpp build-info.h ggml.o k_quants.o ggml-alloc.o llama.o
|
||||||
gguf: examples/gguf/gguf.cpp build-info.h ggml.o llama.o $(OBJS)
|
gguf: examples/gguf/gguf.cpp build-info.h ggml.o llama.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
|
|
||||||
#generated libraries
|
#generated libraries
|
||||||
koboldcpp_default: ggml.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o ggml-alloc.o grammar-parser.o $(OBJS)
|
koboldcpp_default: ggml.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o ggml-alloc.o grammar-parser.o $(OBJS)
|
||||||
$(DEFAULT_BUILD)
|
$(DEFAULT_BUILD)
|
||||||
|
|
||||||
|
ifdef OPENBLAS_BUILD
|
||||||
koboldcpp_openblas: ggml_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o ggml-alloc.o grammar-parser.o $(OBJS)
|
koboldcpp_openblas: ggml_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o ggml-alloc.o grammar-parser.o $(OBJS)
|
||||||
$(OPENBLAS_BUILD)
|
$(OPENBLAS_BUILD)
|
||||||
|
else
|
||||||
|
koboldcpp_openblas:
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifdef FAILSAFE_BUILD
|
||||||
koboldcpp_failsafe: ggml_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_failsafe.o ggml-alloc.o grammar-parser.o $(OBJS)
|
koboldcpp_failsafe: ggml_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_failsafe.o ggml-alloc.o grammar-parser.o $(OBJS)
|
||||||
$(FAILSAFE_BUILD)
|
$(FAILSAFE_BUILD)
|
||||||
|
else
|
||||||
|
koboldcpp_failsafe:
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifdef NOAVX2_BUILD
|
||||||
koboldcpp_noavx2: ggml_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_noavx2.o ggml-alloc.o grammar-parser.o $(OBJS)
|
koboldcpp_noavx2: ggml_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_noavx2.o ggml-alloc.o grammar-parser.o $(OBJS)
|
||||||
$(NOAVX2_BUILD)
|
$(NOAVX2_BUILD)
|
||||||
|
else
|
||||||
|
koboldcpp_noavx2:
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifdef CLBLAST_BUILD
|
||||||
koboldcpp_clblast: ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o k_quants.o ggml-alloc.o grammar-parser.o $(OBJS)
|
koboldcpp_clblast: ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o k_quants.o ggml-alloc.o grammar-parser.o $(OBJS)
|
||||||
$(CLBLAST_BUILD)
|
$(CLBLAST_BUILD)
|
||||||
|
else
|
||||||
|
koboldcpp_clblast:
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifdef CUBLAS_BUILD
|
||||||
koboldcpp_cublas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o ggml-alloc.o grammar-parser.o $(CUBLAS_OBJS) $(OBJS)
|
koboldcpp_cublas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o ggml-alloc.o grammar-parser.o $(CUBLAS_OBJS) $(OBJS)
|
||||||
$(CUBLAS_BUILD)
|
$(CUBLAS_BUILD)
|
||||||
|
else
|
||||||
|
koboldcpp_cublas:
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifdef HIPBLAS_BUILD
|
||||||
koboldcpp_hipblas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o ggml-alloc.o grammar-parser.o $(HIP_OBJS) $(OBJS)
|
koboldcpp_hipblas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o ggml-alloc.o grammar-parser.o $(HIP_OBJS) $(OBJS)
|
||||||
$(HIPBLAS_BUILD)
|
$(HIPBLAS_BUILD)
|
||||||
|
else
|
||||||
|
koboldcpp_hipblas:
|
||||||
|
endif
|
||||||
|
|
||||||
|
# tools
|
||||||
quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o k_quants.o ggml-alloc.o
|
quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o k_quants.o ggml-alloc.o
|
||||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||||
quantize_gptj: ggml.o llama.o k_quants.o ggml-alloc.o otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp
|
quantize_gptj: ggml.o llama.o k_quants.o ggml-alloc.o otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp
|
||||||
|
|
12
koboldcpp.py
12
koboldcpp.py
|
@ -576,9 +576,6 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
global modelbusy, requestsinqueue
|
global modelbusy, requestsinqueue
|
||||||
content_length = int(self.headers['Content-Length'])
|
content_length = int(self.headers['Content-Length'])
|
||||||
body = self.rfile.read(content_length)
|
body = self.rfile.read(content_length)
|
||||||
basic_api_flag = False
|
|
||||||
kai_api_flag = False
|
|
||||||
kai_sse_stream_flag = False
|
|
||||||
self.path = self.path.rstrip('/')
|
self.path = self.path.rstrip('/')
|
||||||
|
|
||||||
if self.path.endswith(('/api/extra/tokencount')):
|
if self.path.endswith(('/api/extra/tokencount')):
|
||||||
|
@ -634,6 +631,9 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
requestsinqueue = (requestsinqueue - 1) if requestsinqueue>0 else 0
|
requestsinqueue = (requestsinqueue - 1) if requestsinqueue>0 else 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
basic_api_flag = False
|
||||||
|
kai_api_flag = False
|
||||||
|
kai_sse_stream_flag = False
|
||||||
if self.path.endswith('/request'):
|
if self.path.endswith('/request'):
|
||||||
basic_api_flag = True
|
basic_api_flag = True
|
||||||
|
|
||||||
|
@ -1670,7 +1670,7 @@ def run_horde_worker(args, api_key, worker_name):
|
||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
else:
|
else:
|
||||||
print_with_time("Horde Worker Shutdown - Server Closing.")
|
print_with_time("Horde Worker Shutdown - Server Closing.")
|
||||||
time.sleep(2)
|
time.sleep(3)
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
def unload_libs():
|
def unload_libs():
|
||||||
|
@ -1752,7 +1752,7 @@ def main(launch_args,start_server=True):
|
||||||
setattr(args, key, value)
|
setattr(args, key, value)
|
||||||
else:
|
else:
|
||||||
print("Specified kcpp config file invalid or not found.")
|
print("Specified kcpp config file invalid or not found.")
|
||||||
time.sleep(2)
|
time.sleep(3)
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
if not args.model_param:
|
if not args.model_param:
|
||||||
args.model_param = args.model
|
args.model_param = args.model
|
||||||
|
@ -1896,8 +1896,6 @@ def main(launch_args,start_server=True):
|
||||||
asyncio.run(RunServerMultiThreaded(args.host, args.port, embedded_kailite))
|
asyncio.run(RunServerMultiThreaded(args.host, args.port, embedded_kailite))
|
||||||
else:
|
else:
|
||||||
print(f"Server was not started, main function complete. Idling.")
|
print(f"Server was not started, main function complete. Idling.")
|
||||||
# while True:
|
|
||||||
# time.sleep(5)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print("***\nWelcome to KoboldCpp - Version " + KcppVersion) # just update version manually
|
print("***\nWelcome to KoboldCpp - Version " + KcppVersion) # just update version manually
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue