added dropdown for blasbatch. added capability to build avx clblast but not in default build for now
This commit is contained in:
parent
54dc75ce73
commit
79df932d0a
2 changed files with 63 additions and 33 deletions
12
Makefile
12
Makefile
|
@ -1,5 +1,4 @@
|
||||||
default: koboldcpp koboldcpp_failsafe koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast
|
default: koboldcpp koboldcpp_failsafe koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast
|
||||||
simple: koboldcpp koboldcpp_failsafe
|
|
||||||
tools: quantize_gpt2 quantize_gptj quantize_llama quantize_neox quantize_mpt
|
tools: quantize_gpt2 quantize_gptj quantize_llama quantize_neox quantize_mpt
|
||||||
dev: koboldcpp_openblas
|
dev: koboldcpp_openblas
|
||||||
dev2: koboldcpp_clblast
|
dev2: koboldcpp_clblast
|
||||||
|
@ -185,6 +184,7 @@ FAILSAFE_BUILD =
|
||||||
OPENBLAS_BUILD =
|
OPENBLAS_BUILD =
|
||||||
OPENBLAS_NOAVX2_BUILD =
|
OPENBLAS_NOAVX2_BUILD =
|
||||||
CLBLAST_BUILD =
|
CLBLAST_BUILD =
|
||||||
|
CLBLAST_NOAVX2_BUILD =
|
||||||
|
|
||||||
ifeq ($(OS),Windows_NT)
|
ifeq ($(OS),Windows_NT)
|
||||||
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
|
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.dll $(LDFLAGS)
|
||||||
|
@ -192,6 +192,7 @@ ifeq ($(OS),Windows_NT)
|
||||||
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS)
|
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS)
|
||||||
OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS)
|
OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o $@.dll $(LDFLAGS)
|
||||||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS)
|
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS)
|
||||||
|
CLBLAST_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o $@.dll $(LDFLAGS)
|
||||||
else
|
else
|
||||||
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
|
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
|
||||||
FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
|
FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o $@.so $(LDFLAGS)
|
||||||
|
@ -201,6 +202,7 @@ else
|
||||||
endif
|
endif
|
||||||
ifdef LLAMA_CLBLAST
|
ifdef LLAMA_CLBLAST
|
||||||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
||||||
|
CLBLAST_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef LLAMA_OPENBLAS
|
ifndef LLAMA_OPENBLAS
|
||||||
|
@ -239,6 +241,8 @@ ggml_openblas_noavx2.o: ggml.c ggml.h
|
||||||
$(CC) $(CFLAGS) $(SIMPLECFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(SIMPLECFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
|
||||||
ggml_clblast.o: ggml.c ggml.h
|
ggml_clblast.o: ggml.c ggml.h
|
||||||
$(CC) $(CFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
|
ggml_clblast_noavx2.o: ggml.c ggml.h
|
||||||
|
$(CC) $(CFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
#version 2 libs
|
#version 2 libs
|
||||||
ggml_v2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
ggml_v2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
||||||
|
@ -251,6 +255,8 @@ ggml_v2_openblas_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
||||||
$(CC) $(CFLAGS) $(SIMPLECFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(SIMPLECFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@
|
||||||
ggml_v2_clblast.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
ggml_v2_clblast.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
||||||
$(CC) $(CFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
|
ggml_v2_clblast_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h
|
||||||
|
$(CC) $(CFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
#extreme old version compat
|
#extreme old version compat
|
||||||
ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
|
ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h
|
||||||
|
@ -281,7 +287,7 @@ gpttype_adapter_clblast.o: gpttype_adapter.cpp
|
||||||
$(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_openblas_noavx2.so koboldcpp_clblast.so
|
rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_openblas_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so
|
||||||
|
|
||||||
main: examples/main/main.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
main: examples/main/main.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
@ -299,6 +305,8 @@ koboldcpp_openblas_noavx2: ggml_openblas_noavx2.o ggml_v2_openblas_noavx2.o ggml
|
||||||
$(OPENBLAS_NOAVX2_BUILD)
|
$(OPENBLAS_NOAVX2_BUILD)
|
||||||
koboldcpp_clblast: ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o
|
koboldcpp_clblast: ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o
|
||||||
$(CLBLAST_BUILD)
|
$(CLBLAST_BUILD)
|
||||||
|
koboldcpp_clblast_noavx2: ggml_clblast_noavx2.o ggml_v2_clblast_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o
|
||||||
|
$(CLBLAST_NOAVX2_BUILD)
|
||||||
|
|
||||||
quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o
|
quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o
|
||||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||||
|
|
80
koboldcpp.py
80
koboldcpp.py
|
@ -455,37 +455,43 @@ def show_gui():
|
||||||
tk.Label(root, text = "(Note: KoboldCpp only works with GGML model formats!)",
|
tk.Label(root, text = "(Note: KoboldCpp only works with GGML model formats!)",
|
||||||
font = ("Arial", 9)).grid(row=1,column=0)
|
font = ("Arial", 9)).grid(row=1,column=0)
|
||||||
|
|
||||||
|
blasbatchopts = ["Don't Batch BLAS","BLAS = 32","BLAS = 64","BLAS = 128","BLAS = 256","BLAS = 512","BLAS = 1024"]
|
||||||
|
blaschoice = tk.StringVar()
|
||||||
|
blaschoice.set("BLAS = 512")
|
||||||
|
|
||||||
opts = ["Use OpenBLAS","Use CLBLast GPU #1","Use CLBLast GPU #2","Use CLBLast GPU #3","Use No BLAS","Use OpenBLAS (Old CPU, noavx2)","Failsafe Mode (Old CPU, noavx)"]
|
runopts = ["Use OpenBLAS","Use CLBLast GPU #1","Use CLBLast GPU #2","Use CLBLast GPU #3","Use No BLAS","Use OpenBLAS (Old CPU, noavx2)","Failsafe Mode (Old CPU, noavx)"]
|
||||||
runchoice = tk.StringVar()
|
runchoice = tk.StringVar()
|
||||||
runchoice.set("Use OpenBLAS")
|
runchoice.set("Use OpenBLAS")
|
||||||
|
|
||||||
def onDropdownChange(event):
|
def onDropdownChange(event):
|
||||||
sel = runchoice.get()
|
sel = runchoice.get()
|
||||||
if sel==opts[1] or sel==opts[2] or sel==opts[3]:
|
if sel==runopts[1] or sel==runopts[2] or sel==runopts[3]:
|
||||||
frm1.grid(row=4,column=0,pady=4)
|
frameC.grid(row=4,column=0,pady=4)
|
||||||
else:
|
else:
|
||||||
frm1.grid_forget()
|
frameC.grid_forget()
|
||||||
pass
|
|
||||||
tk.OptionMenu( root , runchoice , command = onDropdownChange ,*opts ).grid(row=2,column=0)
|
|
||||||
|
|
||||||
|
frameA = tk.Frame(root)
|
||||||
|
tk.OptionMenu( frameA , runchoice , command = onDropdownChange ,*runopts ).grid(row=0,column=0)
|
||||||
|
tk.OptionMenu( frameA , blaschoice ,*blasbatchopts ).grid(row=0,column=1)
|
||||||
|
frameA.grid(row=2,column=0)
|
||||||
|
|
||||||
frm2 = tk.Frame(root)
|
frameB = tk.Frame(root)
|
||||||
threads_var=tk.StringVar()
|
threads_var=tk.StringVar()
|
||||||
threads_var.set(str(default_threads))
|
threads_var.set(str(default_threads))
|
||||||
threads_lbl = tk.Label(frm2, text = 'Threads: ', font=('calibre',10, 'bold'))
|
threads_lbl = tk.Label(frameB, text = 'Threads: ', font=('calibre',10, 'bold'))
|
||||||
threads_input = tk.Entry(frm2,textvariable = threads_var, font=('calibre',10,'normal'))
|
threads_input = tk.Entry(frameB,textvariable = threads_var, font=('calibre',10,'normal'))
|
||||||
threads_lbl.grid(row=0,column=0)
|
threads_lbl.grid(row=0,column=0)
|
||||||
threads_input.grid(row=0,column=1)
|
threads_input.grid(row=0,column=1)
|
||||||
frm2.grid(row=3,column=0,pady=4)
|
frameB.grid(row=3,column=0,pady=4)
|
||||||
|
|
||||||
frm1 = tk.Frame(root)
|
frameC = tk.Frame(root)
|
||||||
gpu_layers_var=tk.StringVar()
|
gpu_layers_var=tk.StringVar()
|
||||||
gpu_layers_var.set("0")
|
gpu_layers_var.set("0")
|
||||||
gpu_lbl = tk.Label(frm1, text = 'GPU Layers (CLBlast only): ', font=('calibre',10, 'bold'))
|
gpu_lbl = tk.Label(frameC, text = 'GPU Layers (CLBlast only): ', font=('calibre',10, 'bold'))
|
||||||
gpu_layers_input = tk.Entry(frm1,textvariable = gpu_layers_var, font=('calibre',10,'normal'))
|
gpu_layers_input = tk.Entry(frameC,textvariable = gpu_layers_var, font=('calibre',10,'normal'))
|
||||||
gpu_lbl.grid(row=0,column=0)
|
gpu_lbl.grid(row=0,column=0)
|
||||||
gpu_layers_input.grid(row=0,column=1)
|
gpu_layers_input.grid(row=0,column=1)
|
||||||
frm1.grid(row=4,column=0,pady=4)
|
frameC.grid(row=4,column=0,pady=4)
|
||||||
onDropdownChange(None)
|
onDropdownChange(None)
|
||||||
|
|
||||||
stream = tk.IntVar()
|
stream = tk.IntVar()
|
||||||
|
@ -494,15 +500,15 @@ def show_gui():
|
||||||
unbantokens = tk.IntVar()
|
unbantokens = tk.IntVar()
|
||||||
highpriority = tk.IntVar()
|
highpriority = tk.IntVar()
|
||||||
disablemmap = tk.IntVar()
|
disablemmap = tk.IntVar()
|
||||||
frm3 = tk.Frame(root)
|
|
||||||
tk.Checkbutton(frm3, text='Streaming Mode',variable=stream, onvalue=1, offvalue=0).grid(row=0,column=0)
|
|
||||||
tk.Checkbutton(frm3, text='Use SmartContext',variable=smartcontext, onvalue=1, offvalue=0).grid(row=0,column=1)
|
|
||||||
tk.Checkbutton(frm3, text='High Priority',variable=highpriority, onvalue=1, offvalue=0).grid(row=1,column=0)
|
|
||||||
tk.Checkbutton(frm3, text='Disable MMAP',variable=disablemmap, onvalue=1, offvalue=0).grid(row=1,column=1)
|
|
||||||
tk.Checkbutton(frm3, text='Unban Tokens',variable=unbantokens, onvalue=1, offvalue=0).grid(row=2,column=0)
|
|
||||||
tk.Checkbutton(frm3, text='Launch Browser',variable=launchbrowser, onvalue=1, offvalue=0).grid(row=2,column=1)
|
|
||||||
|
|
||||||
frm3.grid(row=5,column=0,pady=4)
|
frameD = tk.Frame(root)
|
||||||
|
tk.Checkbutton(frameD, text='Streaming Mode',variable=stream, onvalue=1, offvalue=0).grid(row=0,column=0)
|
||||||
|
tk.Checkbutton(frameD, text='Use SmartContext',variable=smartcontext, onvalue=1, offvalue=0).grid(row=0,column=1)
|
||||||
|
tk.Checkbutton(frameD, text='High Priority',variable=highpriority, onvalue=1, offvalue=0).grid(row=1,column=0)
|
||||||
|
tk.Checkbutton(frameD, text='Disable MMAP',variable=disablemmap, onvalue=1, offvalue=0).grid(row=1,column=1)
|
||||||
|
tk.Checkbutton(frameD, text='Unban Tokens',variable=unbantokens, onvalue=1, offvalue=0).grid(row=2,column=0)
|
||||||
|
tk.Checkbutton(frameD, text='Launch Browser',variable=launchbrowser, onvalue=1, offvalue=0).grid(row=2,column=1)
|
||||||
|
frameD.grid(row=5,column=0,pady=4)
|
||||||
|
|
||||||
# Create button, it will change label text
|
# Create button, it will change label text
|
||||||
tk.Button( root , text = "Launch", font = ("Impact", 18), bg='#54FA9B', command = guilaunch ).grid(row=6,column=0)
|
tk.Button( root , text = "Launch", font = ("Impact", 18), bg='#54FA9B', command = guilaunch ).grid(row=6,column=0)
|
||||||
|
@ -526,24 +532,40 @@ def show_gui():
|
||||||
args.unbantokens = (unbantokens.get()==1)
|
args.unbantokens = (unbantokens.get()==1)
|
||||||
args.highpriority = (highpriority.get()==1)
|
args.highpriority = (highpriority.get()==1)
|
||||||
args.nommap = (disablemmap.get()==1)
|
args.nommap = (disablemmap.get()==1)
|
||||||
selchoice = runchoice.get()
|
selrunchoice = runchoice.get()
|
||||||
|
selblaschoice = blaschoice.get()
|
||||||
|
|
||||||
if selchoice==opts[1]:
|
if selrunchoice==runopts[1]:
|
||||||
args.useclblast = [0,0]
|
args.useclblast = [0,0]
|
||||||
if selchoice==opts[2]:
|
if selrunchoice==runopts[2]:
|
||||||
args.useclblast = [1,0]
|
args.useclblast = [1,0]
|
||||||
if selchoice==opts[3]:
|
if selrunchoice==runopts[3]:
|
||||||
args.useclblast = [0,1]
|
args.useclblast = [0,1]
|
||||||
if selchoice==opts[4]:
|
if selrunchoice==runopts[4]:
|
||||||
args.noblas = True
|
args.noblas = True
|
||||||
if selchoice==opts[5]:
|
if selrunchoice==runopts[5]:
|
||||||
args.noavx2 = True
|
args.noavx2 = True
|
||||||
if selchoice==opts[6]:
|
if selrunchoice==runopts[6]:
|
||||||
args.noavx2 = True
|
args.noavx2 = True
|
||||||
args.noblas = True
|
args.noblas = True
|
||||||
args.nommap = True
|
args.nommap = True
|
||||||
print("[Failsafe Mode : mmap is disabled.]")
|
print("[Failsafe Mode : mmap is disabled.]")
|
||||||
|
|
||||||
|
if selblaschoice==blasbatchopts[0]:
|
||||||
|
args.blasbatchsize = -1
|
||||||
|
if selblaschoice==blasbatchopts[1]:
|
||||||
|
args.blasbatchsize = 32
|
||||||
|
if selblaschoice==blasbatchopts[2]:
|
||||||
|
args.blasbatchsize = 64
|
||||||
|
if selblaschoice==blasbatchopts[3]:
|
||||||
|
args.blasbatchsize = 128
|
||||||
|
if selblaschoice==blasbatchopts[4]:
|
||||||
|
args.blasbatchsize = 256
|
||||||
|
if selblaschoice==blasbatchopts[5]:
|
||||||
|
args.blasbatchsize = 512
|
||||||
|
if selblaschoice==blasbatchopts[6]:
|
||||||
|
args.blasbatchsize = 1024
|
||||||
|
|
||||||
root = tk.Tk()
|
root = tk.Tk()
|
||||||
root.attributes("-alpha", 0)
|
root.attributes("-alpha", 0)
|
||||||
args.model_param = askopenfilename(title="Select ggml model .bin files")
|
args.model_param = askopenfilename(title="Select ggml model .bin files")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue