improved makefile, allowing building without k quants
This commit is contained in:
parent
17ee719c56
commit
6c2134a860
2 changed files with 53 additions and 14 deletions
55
Makefile
55
Makefile
|
@ -39,10 +39,15 @@ endif
|
|||
#
|
||||
|
||||
# keep standard at C11 and C++11
|
||||
CFLAGS = -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c11 -fPIC -DGGML_USE_K_QUANTS -DLOG_DISABLE_LOGS -D_GNU_SOURCE
|
||||
CXXFLAGS = -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c++11 -fPIC -DGGML_USE_K_QUANTS -DLOG_DISABLE_LOGS -D_GNU_SOURCE
|
||||
CFLAGS = -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE
|
||||
CXXFLAGS = -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -Ofast -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE
|
||||
LDFLAGS =
|
||||
|
||||
ifndef LLAMA_NO_K_QUANTS
|
||||
CFLAGS += -DGGML_USE_K_QUANTS
|
||||
CXXFLAGS += -DGGML_USE_K_QUANTS
|
||||
endif
|
||||
|
||||
# these are used on windows, to build some libraries with extra old device compatibility
|
||||
SIMPLECFLAGS =
|
||||
FULLCFLAGS =
|
||||
|
@ -285,19 +290,17 @@ ifeq ($(OS),Windows_NT)
|
|||
endif
|
||||
else
|
||||
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
|
||||
FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
|
||||
|
||||
ifdef LLAMA_OPENBLAS
|
||||
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
||||
NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
||||
endif
|
||||
ifdef LLAMA_CLBLAST
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
||||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
||||
else
|
||||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
||||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef LLAMA_CUBLAS
|
||||
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o $@.so $(CUBLASLD_FLAGS) $(LDFLAGS)
|
||||
endif
|
||||
|
@ -351,12 +354,18 @@ ggml_cublas.o: ggml.c ggml.h ggml-cuda.h k_quants.h
|
|||
$(CC) $(CFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) $(HIPFLAGS) -c $< -o $@
|
||||
|
||||
#quants K
|
||||
ifndef LLAMA_NO_K_QUANTS
|
||||
k_quants.o: k_quants.c k_quants.h ggml.h ggml-cuda.h
|
||||
$(CC) $(CFLAGS) $(FULLCFLAGS) -c $< -o $@
|
||||
k_quants_noavx2.o: k_quants.c k_quants.h ggml.h ggml-cuda.h
|
||||
$(CC) $(CFLAGS) $(SIMPLECFLAGS) -c $< -o $@
|
||||
k_quants_failsafe.o: k_quants.c k_quants.h ggml.h ggml-cuda.h
|
||||
$(CC) $(CFLAGS) $(NONECFLAGS) -c $< -o $@
|
||||
else
|
||||
k_quants.o:
|
||||
k_quants_noavx2.o:
|
||||
k_quants_failsafe.o:
|
||||
endif # LLAMA_NO_K_QUANTS
|
||||
|
||||
#there's no intrinsics or special gpu ops used here, so we can have a universal object
|
||||
ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
|
||||
|
@ -425,22 +434,54 @@ main: examples/main/main.cpp build-info.h ggml.o k_quants.o ggml-alloc.o llama.o
|
|||
gguf: examples/gguf/gguf.cpp build-info.h ggml.o llama.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||
|
||||
|
||||
#generated libraries
|
||||
koboldcpp_default: ggml.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o ggml-alloc.o grammar-parser.o $(OBJS)
|
||||
$(DEFAULT_BUILD)
|
||||
|
||||
ifdef OPENBLAS_BUILD
|
||||
koboldcpp_openblas: ggml_openblas.o ggml_v2_openblas.o ggml_v1.o expose.o common.o gpttype_adapter.o k_quants.o ggml-alloc.o grammar-parser.o $(OBJS)
|
||||
$(OPENBLAS_BUILD)
|
||||
else
|
||||
koboldcpp_openblas:
|
||||
endif
|
||||
|
||||
ifdef FAILSAFE_BUILD
|
||||
koboldcpp_failsafe: ggml_failsafe.o ggml_v2_failsafe.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_failsafe.o ggml-alloc.o grammar-parser.o $(OBJS)
|
||||
$(FAILSAFE_BUILD)
|
||||
else
|
||||
koboldcpp_failsafe:
|
||||
endif
|
||||
|
||||
ifdef NOAVX2_BUILD
|
||||
koboldcpp_noavx2: ggml_noavx2.o ggml_v2_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_failsafe.o k_quants_noavx2.o ggml-alloc.o grammar-parser.o $(OBJS)
|
||||
$(NOAVX2_BUILD)
|
||||
else
|
||||
koboldcpp_noavx2:
|
||||
endif
|
||||
|
||||
ifdef CLBLAST_BUILD
|
||||
koboldcpp_clblast: ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o k_quants.o ggml-alloc.o grammar-parser.o $(OBJS)
|
||||
$(CLBLAST_BUILD)
|
||||
else
|
||||
koboldcpp_clblast:
|
||||
endif
|
||||
|
||||
ifdef CUBLAS_BUILD
|
||||
koboldcpp_cublas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o ggml-alloc.o grammar-parser.o $(CUBLAS_OBJS) $(OBJS)
|
||||
$(CUBLAS_BUILD)
|
||||
else
|
||||
koboldcpp_cublas:
|
||||
endif
|
||||
|
||||
ifdef HIPBLAS_BUILD
|
||||
koboldcpp_hipblas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o ggml-alloc.o grammar-parser.o $(HIP_OBJS) $(OBJS)
|
||||
$(HIPBLAS_BUILD)
|
||||
else
|
||||
koboldcpp_hipblas:
|
||||
endif
|
||||
|
||||
# tools
|
||||
quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o k_quants.o ggml-alloc.o
|
||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||
quantize_gptj: ggml.o llama.o k_quants.o ggml-alloc.o otherarch/tools/gptj_quantize.cpp otherarch/tools/common-ggml.cpp
|
||||
|
|
12
koboldcpp.py
12
koboldcpp.py
|
@ -576,9 +576,6 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
global modelbusy, requestsinqueue
|
||||
content_length = int(self.headers['Content-Length'])
|
||||
body = self.rfile.read(content_length)
|
||||
basic_api_flag = False
|
||||
kai_api_flag = False
|
||||
kai_sse_stream_flag = False
|
||||
self.path = self.path.rstrip('/')
|
||||
|
||||
if self.path.endswith(('/api/extra/tokencount')):
|
||||
|
@ -634,6 +631,9 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
requestsinqueue = (requestsinqueue - 1) if requestsinqueue>0 else 0
|
||||
|
||||
try:
|
||||
basic_api_flag = False
|
||||
kai_api_flag = False
|
||||
kai_sse_stream_flag = False
|
||||
if self.path.endswith('/request'):
|
||||
basic_api_flag = True
|
||||
|
||||
|
@ -1670,7 +1670,7 @@ def run_horde_worker(args, api_key, worker_name):
|
|||
time.sleep(3)
|
||||
else:
|
||||
print_with_time("Horde Worker Shutdown - Server Closing.")
|
||||
time.sleep(2)
|
||||
time.sleep(3)
|
||||
sys.exit(2)
|
||||
|
||||
def unload_libs():
|
||||
|
@ -1752,7 +1752,7 @@ def main(launch_args,start_server=True):
|
|||
setattr(args, key, value)
|
||||
else:
|
||||
print("Specified kcpp config file invalid or not found.")
|
||||
time.sleep(2)
|
||||
time.sleep(3)
|
||||
sys.exit(2)
|
||||
if not args.model_param:
|
||||
args.model_param = args.model
|
||||
|
@ -1896,8 +1896,6 @@ def main(launch_args,start_server=True):
|
|||
asyncio.run(RunServerMultiThreaded(args.host, args.port, embedded_kailite))
|
||||
else:
|
||||
print(f"Server was not started, main function complete. Idling.")
|
||||
# while True:
|
||||
# time.sleep(5)
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("***\nWelcome to KoboldCpp - Version " + KcppVersion) # just update version manually
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue