Fix hordeconfig max context setting, and add Makefile flags for cuda F16/KQuants per iter. (#252)
* Fix hordeconfig maxcontext setting. * cuda: Bring DMMV_F16 and KQUANTS_ITER Makefile flags over from llama.
This commit is contained in:
parent
dfdd20240c
commit
b1f00fa9cc
2 changed files with 9 additions and 1 deletions
8
Makefile
8
Makefile
|
@ -149,6 +149,14 @@ ifdef LLAMA_CUDA_DMMV_Y
|
||||||
else
|
else
|
||||||
NVCCFLAGS += -DGGML_CUDA_DMMV_Y=1
|
NVCCFLAGS += -DGGML_CUDA_DMMV_Y=1
|
||||||
endif # LLAMA_CUDA_DMMV_Y
|
endif # LLAMA_CUDA_DMMV_Y
|
||||||
|
ifdef LLAMA_CUDA_DMMV_F16
|
||||||
|
NVCCFLAGS += -DGGML_CUDA_DMMV_F16
|
||||||
|
endif # LLAMA_CUDA_DMMV_F16
|
||||||
|
ifdef LLAMA_CUDA_KQUANTS_ITER
|
||||||
|
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
||||||
|
else
|
||||||
|
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
|
||||||
|
endif
|
||||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
||||||
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
||||||
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
|
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
|
||||||
|
|
|
@ -724,7 +724,7 @@ def main(args):
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
if args.hordeconfig and args.hordeconfig[0]!="":
|
if args.hordeconfig and args.hordeconfig[0]!="":
|
||||||
global friendlymodelname, maxhordelen, showdebug
|
global friendlymodelname, maxhordelen, maxhordectx, showdebug
|
||||||
friendlymodelname = "koboldcpp/"+args.hordeconfig[0]
|
friendlymodelname = "koboldcpp/"+args.hordeconfig[0]
|
||||||
if len(args.hordeconfig) > 1:
|
if len(args.hordeconfig) > 1:
|
||||||
maxhordelen = int(args.hordeconfig[1])
|
maxhordelen = int(args.hordeconfig[1])
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue