Fix hordeconfig max context setting, and add Makefile flags for cuda F16/KQuants per iter. (#252)
* Fix hordeconfig maxcontext setting. * cuda: Bring DMMV_F16 and KQUANTS_ITER Makefile flags over from llama.
This commit is contained in:
parent
dfdd20240c
commit
b1f00fa9cc
2 changed files with 9 additions and 1 deletions
8
Makefile
8
Makefile
|
@ -149,6 +149,14 @@ ifdef LLAMA_CUDA_DMMV_Y
|
|||
else
|
||||
NVCCFLAGS += -DGGML_CUDA_DMMV_Y=1
|
||||
endif # LLAMA_CUDA_DMMV_Y
|
||||
ifdef LLAMA_CUDA_DMMV_F16
|
||||
NVCCFLAGS += -DGGML_CUDA_DMMV_F16
|
||||
endif # LLAMA_CUDA_DMMV_F16
|
||||
ifdef LLAMA_CUDA_KQUANTS_ITER
|
||||
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
||||
else
|
||||
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
|
||||
endif
|
||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
||||
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@
|
||||
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
|
||||
|
|
|
@ -724,7 +724,7 @@ def main(args):
|
|||
sys.exit(2)
|
||||
|
||||
if args.hordeconfig and args.hordeconfig[0]!="":
|
||||
global friendlymodelname, maxhordelen, showdebug
|
||||
global friendlymodelname, maxhordelen, maxhordectx, showdebug
|
||||
friendlymodelname = "koboldcpp/"+args.hordeconfig[0]
|
||||
if len(args.hordeconfig) > 1:
|
||||
maxhordelen = int(args.hordeconfig[1])
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue