build
: rename main → llama-cli, server → llama-server, llava-cli → llama-llava-cli, etc... (#7809)
* `main`/`server`: rename to `llama` / `llama-server` for consistency w/ homebrew
* server: update refs -> llama-server
gitignore llama-server
* server: simplify nix package
* main: update refs -> llama
fix examples/main ref
* main/server: fix targets
* update more names
* Update build.yml
* rm accidentally checked in bins
* update straggling refs
* Update .gitignore
* Update server-llm.sh
* main: target name -> llama-cli
* Prefix all example bins w/ llama-
* fix main refs
* rename {main->llama}-cmake-pkg binary
* prefix more cmake targets w/ llama-
* add/fix gbnf-validator subfolder to cmake
* sort cmake example subdirs
* rm bin files
* fix llama-lookup-* Makefile rules
* gitignore /llama-*
* rename Dockerfiles
* rename llama|main -> llama-cli; consistent RPM bin prefixes
* fix some missing -cli suffixes
* rename dockerfile w/ llama-cli
* rename(make): llama-baby-llama
* update dockerfile refs
* more llama-cli(.exe)
* fix test-eval-callback
* rename: llama-cli-cmake-pkg(.exe)
* address gbnf-validator unused fread warning (switched to C++ / ifstream)
* add two missing llama- prefixes
* Updating docs for eval-callback binary to use new `llama-` prefix.
* Updating a few lingering doc references for rename of main to llama-cli
* Updating `run-with-preset.py` to use new binary names.
Updating docs around `perplexity` binary rename.
* Updating documentation references for lookup-merge and export-lora
* Updating two small `main` references missed earlier in the finetune docs.
* Update apps.nix
* update grammar/README.md w/ new llama-* names
* update llama-rpc-server bin name + doc
* Revert "update llama-rpc-server bin name + doc"
This reverts commit e474ef1df4
.
* add hot topic notice to README.md
* Update README.md
* Update README.md
* rename gguf-split & quantize bins refs in **/tests.sh
---------
Co-authored-by: HanClinto <hanclinto@gmail.com>
This commit is contained in:
parent
963552903f
commit
1c641e6aac
128 changed files with 578 additions and 578 deletions
|
@ -4,7 +4,7 @@ wget https://raw.githubusercontent.com/klosax/hellaswag_text_data/main/hellaswag
|
|||
|
||||
echo "Usage:"
|
||||
echo ""
|
||||
echo " ./perplexity -m model.gguf -f hellaswag_val_full.txt --hellaswag [--hellaswag-tasks N] [other params]"
|
||||
echo " ./llama-perplexity -m model.gguf -f hellaswag_val_full.txt --hellaswag [--hellaswag-tasks N] [other params]"
|
||||
echo ""
|
||||
|
||||
exit 0
|
||||
|
|
|
@ -4,7 +4,7 @@ wget https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-raw-v1.
|
|||
|
||||
echo "Usage:"
|
||||
echo ""
|
||||
echo " ./perplexity -m model.gguf -f wiki.test.raw [other params]"
|
||||
echo " ./llama-perplexity -m model.gguf -f wiki.test.raw [other params]"
|
||||
echo ""
|
||||
|
||||
exit 0
|
||||
|
|
|
@ -5,7 +5,7 @@ unzip wikitext-2-raw-v1.zip
|
|||
|
||||
echo "Usage:"
|
||||
echo ""
|
||||
echo " ./perplexity -m model.gguf -f wikitext-2-raw/wiki.test.raw [other params]"
|
||||
echo " ./llama-perplexity -m model.gguf -f wikitext-2-raw/wiki.test.raw [other params]"
|
||||
echo ""
|
||||
|
||||
exit 0
|
||||
|
|
|
@ -4,7 +4,7 @@ wget https://huggingface.co/datasets/ikawrakow/winogrande-eval-for-llama.cpp/raw
|
|||
|
||||
echo "Usage:"
|
||||
echo ""
|
||||
echo " ./perplexity -m model.gguf -f winogrande-debiased-eval.csv --winogrande [--winogrande-tasks N] [other params]"
|
||||
echo " ./llama-perplexity -m model.gguf -f winogrande-debiased-eval.csv --winogrande [--winogrande-tasks N] [other params]"
|
||||
echo ""
|
||||
|
||||
exit 0
|
||||
|
|
|
@ -3,9 +3,9 @@
|
|||
# Shortcut for downloading HF models
|
||||
#
|
||||
# Usage:
|
||||
# ./main -m $(./scripts/hf.sh https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q4_K_M.gguf)
|
||||
# ./main -m $(./scripts/hf.sh --url https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/blob/main/mixtral-8x7b-v0.1.Q4_K_M.gguf)
|
||||
# ./main -m $(./scripts/hf.sh --repo TheBloke/Mixtral-8x7B-v0.1-GGUF --file mixtral-8x7b-v0.1.Q4_K_M.gguf)
|
||||
# ./llama-cli -m $(./scripts/hf.sh https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q4_K_M.gguf)
|
||||
# ./llama-cli -m $(./scripts/hf.sh --url https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/blob/main/mixtral-8x7b-v0.1.Q4_K_M.gguf)
|
||||
# ./llama-cli -m $(./scripts/hf.sh --repo TheBloke/Mixtral-8x7B-v0.1-GGUF --file mixtral-8x7b-v0.1.Q4_K_M.gguf)
|
||||
#
|
||||
|
||||
# all logs go to stderr
|
||||
|
|
|
@ -77,9 +77,9 @@ if [ "$1" -eq "1" ]; then
|
|||
|
||||
python3 examples/convert-legacy-llama.py ./models/tinyllama-1b --outfile ./models/tinyllama-1b/ggml-model-f16.gguf --outtype f16
|
||||
|
||||
./quantize ./models/tinyllama-1b/ggml-model-f16.gguf ./models/tinyllama-1b/ggml-model-q4_0.gguf q4_0
|
||||
./quantize ./models/tinyllama-1b/ggml-model-f16.gguf ./models/tinyllama-1b/ggml-model-q4_k.gguf q4_k
|
||||
./quantize ./models/tinyllama-1b/ggml-model-f16.gguf ./models/tinyllama-1b/ggml-model-q8_0.gguf q8_0
|
||||
./llama-quantize ./models/tinyllama-1b/ggml-model-f16.gguf ./models/tinyllama-1b/ggml-model-q4_0.gguf q4_0
|
||||
./llama-quantize ./models/tinyllama-1b/ggml-model-f16.gguf ./models/tinyllama-1b/ggml-model-q4_k.gguf q4_k
|
||||
./llama-quantize ./models/tinyllama-1b/ggml-model-f16.gguf ./models/tinyllama-1b/ggml-model-q8_0.gguf q8_0
|
||||
fi
|
||||
|
||||
if [ "$1" -eq "2" ]; then
|
||||
|
@ -92,9 +92,9 @@ if [ "$1" -eq "2" ]; then
|
|||
|
||||
python3 examples/convert-legacy-llama.py ./models/codellama-7b --outfile ./models/codellama-7b/ggml-model-f16.gguf --outtype f16
|
||||
|
||||
./quantize ./models/codellama-7b/ggml-model-f16.gguf ./models/codellama-7b/ggml-model-q4_0.gguf q4_0
|
||||
./quantize ./models/codellama-7b/ggml-model-f16.gguf ./models/codellama-7b/ggml-model-q4_k.gguf q4_k
|
||||
./quantize ./models/codellama-7b/ggml-model-f16.gguf ./models/codellama-7b/ggml-model-q8_0.gguf q8_0
|
||||
./llama-quantize ./models/codellama-7b/ggml-model-f16.gguf ./models/codellama-7b/ggml-model-q4_0.gguf q4_0
|
||||
./llama-quantize ./models/codellama-7b/ggml-model-f16.gguf ./models/codellama-7b/ggml-model-q4_k.gguf q4_k
|
||||
./llama-quantize ./models/codellama-7b/ggml-model-f16.gguf ./models/codellama-7b/ggml-model-q8_0.gguf q8_0
|
||||
fi
|
||||
|
||||
if [ "$1" -eq "3" ]; then
|
||||
|
@ -107,9 +107,9 @@ if [ "$1" -eq "3" ]; then
|
|||
|
||||
python3 examples/convert-legacy-llama.py ./models/codellama-13b --outfile ./models/codellama-13b/ggml-model-f16.gguf --outtype f16
|
||||
|
||||
./quantize ./models/codellama-13b/ggml-model-f16.gguf ./models/codellama-13b/ggml-model-q4_0.gguf q4_0
|
||||
./quantize ./models/codellama-13b/ggml-model-f16.gguf ./models/codellama-13b/ggml-model-q4_k.gguf q4_k
|
||||
./quantize ./models/codellama-13b/ggml-model-f16.gguf ./models/codellama-13b/ggml-model-q8_0.gguf q8_0
|
||||
./llama-quantize ./models/codellama-13b/ggml-model-f16.gguf ./models/codellama-13b/ggml-model-q4_0.gguf q4_0
|
||||
./llama-quantize ./models/codellama-13b/ggml-model-f16.gguf ./models/codellama-13b/ggml-model-q4_k.gguf q4_k
|
||||
./llama-quantize ./models/codellama-13b/ggml-model-f16.gguf ./models/codellama-13b/ggml-model-q8_0.gguf q8_0
|
||||
fi
|
||||
|
||||
if [ "$1" -eq "4" ]; then
|
||||
|
@ -122,9 +122,9 @@ if [ "$1" -eq "4" ]; then
|
|||
|
||||
python3 examples/convert-legacy-llama.py ./models/codellama-34b --outfile ./models/codellama-34b/ggml-model-f16.gguf --outtype f16
|
||||
|
||||
./quantize ./models/codellama-34b/ggml-model-f16.gguf ./models/codellama-34b/ggml-model-q4_0.gguf q4_0
|
||||
./quantize ./models/codellama-34b/ggml-model-f16.gguf ./models/codellama-34b/ggml-model-q4_k.gguf q4_k
|
||||
./quantize ./models/codellama-34b/ggml-model-f16.gguf ./models/codellama-34b/ggml-model-q8_0.gguf q8_0
|
||||
./llama-quantize ./models/codellama-34b/ggml-model-f16.gguf ./models/codellama-34b/ggml-model-q4_0.gguf q4_0
|
||||
./llama-quantize ./models/codellama-34b/ggml-model-f16.gguf ./models/codellama-34b/ggml-model-q4_k.gguf q4_k
|
||||
./llama-quantize ./models/codellama-34b/ggml-model-f16.gguf ./models/codellama-34b/ggml-model-q8_0.gguf q8_0
|
||||
fi
|
||||
|
||||
if [ "$1" -eq "5" ]; then
|
||||
|
@ -137,9 +137,9 @@ if [ "$1" -eq "5" ]; then
|
|||
|
||||
python3 examples/convert-legacy-llama.py ./models/codellama-7b-instruct --outfile ./models/codellama-7b-instruct/ggml-model-f16.gguf --outtype f16
|
||||
|
||||
./quantize ./models/codellama-7b-instruct/ggml-model-f16.gguf ./models/codellama-7b-instruct/ggml-model-q4_0.gguf q4_0
|
||||
./quantize ./models/codellama-7b-instruct/ggml-model-f16.gguf ./models/codellama-7b-instruct/ggml-model-q4_k.gguf q4_k
|
||||
./quantize ./models/codellama-7b-instruct/ggml-model-f16.gguf ./models/codellama-7b-instruct/ggml-model-q8_0.gguf q8_0
|
||||
./llama-quantize ./models/codellama-7b-instruct/ggml-model-f16.gguf ./models/codellama-7b-instruct/ggml-model-q4_0.gguf q4_0
|
||||
./llama-quantize ./models/codellama-7b-instruct/ggml-model-f16.gguf ./models/codellama-7b-instruct/ggml-model-q4_k.gguf q4_k
|
||||
./llama-quantize ./models/codellama-7b-instruct/ggml-model-f16.gguf ./models/codellama-7b-instruct/ggml-model-q8_0.gguf q8_0
|
||||
fi
|
||||
|
||||
if [ "$1" -eq "6" ]; then
|
||||
|
@ -152,9 +152,9 @@ if [ "$1" -eq "6" ]; then
|
|||
|
||||
python3 examples/convert-legacy-llama.py ./models/codellama-13b-instruct --outfile ./models/codellama-13b-instruct/ggml-model-f16.gguf --outtype f16
|
||||
|
||||
./quantize ./models/codellama-13b-instruct/ggml-model-f16.gguf ./models/codellama-13b-instruct/ggml-model-q4_0.gguf q4_0
|
||||
./quantize ./models/codellama-13b-instruct/ggml-model-f16.gguf ./models/codellama-13b-instruct/ggml-model-q4_k.gguf q4_k
|
||||
./quantize ./models/codellama-13b-instruct/ggml-model-f16.gguf ./models/codellama-13b-instruct/ggml-model-q8_0.gguf q8_0
|
||||
./llama-quantize ./models/codellama-13b-instruct/ggml-model-f16.gguf ./models/codellama-13b-instruct/ggml-model-q4_0.gguf q4_0
|
||||
./llama-quantize ./models/codellama-13b-instruct/ggml-model-f16.gguf ./models/codellama-13b-instruct/ggml-model-q4_k.gguf q4_k
|
||||
./llama-quantize ./models/codellama-13b-instruct/ggml-model-f16.gguf ./models/codellama-13b-instruct/ggml-model-q8_0.gguf q8_0
|
||||
fi
|
||||
|
||||
if [ "$1" -eq "7" ]; then
|
||||
|
@ -167,9 +167,9 @@ if [ "$1" -eq "7" ]; then
|
|||
|
||||
python3 examples/convert-legacy-llama.py ./models/codellama-34b-instruct --outfile ./models/codellama-34b-instruct/ggml-model-f16.gguf --outtype f16
|
||||
|
||||
./quantize ./models/codellama-34b-instruct/ggml-model-f16.gguf ./models/codellama-34b-instruct/ggml-model-q4_0.gguf q4_0
|
||||
./quantize ./models/codellama-34b-instruct/ggml-model-f16.gguf ./models/codellama-34b-instruct/ggml-model-q4_k.gguf q4_k
|
||||
./quantize ./models/codellama-34b-instruct/ggml-model-f16.gguf ./models/codellama-34b-instruct/ggml-model-q8_0.gguf q8_0
|
||||
./llama-quantize ./models/codellama-34b-instruct/ggml-model-f16.gguf ./models/codellama-34b-instruct/ggml-model-q4_0.gguf q4_0
|
||||
./llama-quantize ./models/codellama-34b-instruct/ggml-model-f16.gguf ./models/codellama-34b-instruct/ggml-model-q4_k.gguf q4_k
|
||||
./llama-quantize ./models/codellama-34b-instruct/ggml-model-f16.gguf ./models/codellama-34b-instruct/ggml-model-q8_0.gguf q8_0
|
||||
fi
|
||||
|
||||
if [ "$1" -eq "1" ]; then
|
||||
|
@ -181,22 +181,22 @@ if [ "$1" -eq "1" ]; then
|
|||
../scripts/get-wikitext-2.sh
|
||||
unzip wikitext-2-raw-v1.zip
|
||||
|
||||
make -j && ./bin/perplexity -m ../models/tinyllama-1b/ggml-model-f16.gguf -f ./wikitext-2-raw/wiki.test.raw -ngl 100 --chunks 32
|
||||
make -j && ./bin/llama-perplexity -m ../models/tinyllama-1b/ggml-model-f16.gguf -f ./wikitext-2-raw/wiki.test.raw -ngl 100 --chunks 32
|
||||
|
||||
# batched
|
||||
cd /workspace/llama.cpp
|
||||
|
||||
LLAMA_CUDA=1 make -j && ./batched ./models/tinyllama-1b/ggml-model-f16.gguf "Hello, my name is" 8 128 999
|
||||
LLAMA_CUDA=1 make -j && ./llama-batched ./models/tinyllama-1b/ggml-model-f16.gguf "Hello, my name is" 8 128 999
|
||||
|
||||
# batched-bench
|
||||
cd /workspace/llama.cpp
|
||||
|
||||
LLAMA_CUDA=1 make -j && ./batched-bench ./models/tinyllama-1b/ggml-model-f16.gguf 4608 1 99 0 512 128 1,2,3,4,5,6,7,8,16,32
|
||||
LLAMA_CUDA=1 make -j && ./llama-batched-bench ./models/tinyllama-1b/ggml-model-f16.gguf 4608 1 99 0 512 128 1,2,3,4,5,6,7,8,16,32
|
||||
|
||||
# parallel
|
||||
cd /workspace/llama.cpp
|
||||
|
||||
LLAMA_CUDA=1 make -j && ./parallel -m ./models/tinyllama-1b/ggml-model-f16.gguf -t 1 -ngl 100 -c 4096 -b 512 -s 1 -np 8 -ns 128 -n 100 -cb
|
||||
LLAMA_CUDA=1 make -j && ./llama-parallel -m ./models/tinyllama-1b/ggml-model-f16.gguf -t 1 -ngl 100 -c 4096 -b 512 -s 1 -np 8 -ns 128 -n 100 -cb
|
||||
|
||||
fi
|
||||
|
||||
|
@ -204,10 +204,10 @@ fi
|
|||
#if [ "$1" -eq "7" ]; then
|
||||
# cd /workspace/llama.cpp
|
||||
#
|
||||
# LLAMA_CUDA=1 make -j && ./speculative -m ./models/codellama-34b-instruct/ggml-model-f16.gguf -md ./models/codellama-7b-instruct/ggml-model-q4_0.gguf -p "# Dijkstra's shortest path algorithm in Python (4 spaces indentation) + complexity analysis:\n\n" -e -ngl 999 -ngld 999 -t 4 -n 512 -c 4096 -s 21 --draft 16 -np 1 --temp 0.0
|
||||
# LLAMA_CUDA=1 make -j && ./llama-speculative -m ./models/codellama-34b-instruct/ggml-model-f16.gguf -md ./models/codellama-7b-instruct/ggml-model-q4_0.gguf -p "# Dijkstra's shortest path algorithm in Python (4 spaces indentation) + complexity analysis:\n\n" -e -ngl 999 -ngld 999 -t 4 -n 512 -c 4096 -s 21 --draft 16 -np 1 --temp 0.0
|
||||
#fi
|
||||
|
||||
# more benches
|
||||
#LLAMA_CUDA=1 make -j && ./batched-bench ./models/codellama-7b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
|
||||
#LLAMA_CUDA=1 make -j && ./batched-bench ./models/codellama-13b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
|
||||
#LLAMA_CUDA=1 make -j && ./llama-batched-bench ./models/codellama-7b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
|
||||
#LLAMA_CUDA=1 make -j && ./llama-batched-bench ./models/codellama-13b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
|
||||
|
||||
|
|
|
@ -26,5 +26,5 @@ set -e
|
|||
mkdir -p ${out}
|
||||
|
||||
for q in ${qnt[@]}; do
|
||||
time ./bin/quantize ../models/${model}/ggml-model-f16.gguf ../models/${model}/ggml-model-${q}.gguf ${q} 2>&1 ${args} | tee ${out}/qnt-${q}.txt
|
||||
time ./bin/llama-quantize ../models/${model}/ggml-model-f16.gguf ../models/${model}/ggml-model-${q}.gguf ${q} 2>&1 ${args} | tee ${out}/qnt-${q}.txt
|
||||
done
|
||||
|
|
|
@ -26,5 +26,5 @@ out="../tmp/results-${model}"
|
|||
mkdir -p ${out}
|
||||
|
||||
for q in ${qnt[@]}; do
|
||||
time ./bin/perplexity -m ../models/${model}/ggml-model-f16.gguf -f ./wiki.test.raw ${args} 2>&1 | tee ${out}/ppl-${q}.txt
|
||||
time ./bin/llama-perplexity -m ../models/${model}/ggml-model-f16.gguf -f ./wiki.test.raw ${args} 2>&1 | tee ${out}/ppl-${q}.txt
|
||||
done
|
||||
|
|
|
@ -10,7 +10,7 @@ import yaml
|
|||
|
||||
logger = logging.getLogger("run-with-preset")
|
||||
|
||||
CLI_ARGS_MAIN_PERPLEXITY = [
|
||||
CLI_ARGS_LLAMA_CLI_PERPLEXITY = [
|
||||
"batch-size", "cfg-negative-prompt", "cfg-scale", "chunks", "color", "ctx-size", "escape",
|
||||
"export", "file", "frequency-penalty", "grammar", "grammar-file", "hellaswag",
|
||||
"hellaswag-tasks", "ignore-eos", "in-prefix", "in-prefix-bos", "in-suffix",
|
||||
|
@ -29,7 +29,7 @@ CLI_ARGS_LLAMA_BENCH = [
|
|||
"n-prompt", "output", "repetitions", "tensor-split", "threads", "verbose"
|
||||
]
|
||||
|
||||
CLI_ARGS_SERVER = [
|
||||
CLI_ARGS_LLAMA_SERVER = [
|
||||
"alias", "batch-size", "ctx-size", "embedding", "host", "memory-f32", "lora", "lora-base",
|
||||
"low-vram", "main-gpu", "mlock", "model", "n-gpu-layers", "n-probs", "no-mmap", "no-mul-mat-q",
|
||||
"numa", "path", "port", "rope-freq-base", "timeout", "rope-freq-scale", "tensor-split",
|
||||
|
@ -37,7 +37,7 @@ CLI_ARGS_SERVER = [
|
|||
]
|
||||
|
||||
description = """Run llama.cpp binaries with presets from YAML file(s).
|
||||
To specify which binary should be run, specify the "binary" property (main, perplexity, llama-bench, and server are supported).
|
||||
To specify which binary should be run, specify the "binary" property (llama-cli, llama-perplexity, llama-bench, and llama-server are supported).
|
||||
To get a preset file template, run a llama.cpp binary with the "--logdir" CLI argument.
|
||||
|
||||
Formatting considerations:
|
||||
|
@ -77,19 +77,19 @@ for yaml_file in known_args.yaml_files:
|
|||
|
||||
props = {prop.replace("_", "-"): val for prop, val in props.items()}
|
||||
|
||||
binary = props.pop("binary", "main")
|
||||
binary = props.pop("binary", "llama-cli")
|
||||
if known_args.binary:
|
||||
binary = known_args.binary
|
||||
|
||||
if os.path.exists(f"./{binary}"):
|
||||
binary = f"./{binary}"
|
||||
|
||||
if binary.lower().endswith("main") or binary.lower().endswith("perplexity"):
|
||||
cli_args = CLI_ARGS_MAIN_PERPLEXITY
|
||||
if binary.lower().endswith("llama-cli") or binary.lower().endswith("llama-perplexity"):
|
||||
cli_args = CLI_ARGS_LLAMA_CLI_PERPLEXITY
|
||||
elif binary.lower().endswith("llama-bench"):
|
||||
cli_args = CLI_ARGS_LLAMA_BENCH
|
||||
elif binary.lower().endswith("server"):
|
||||
cli_args = CLI_ARGS_SERVER
|
||||
elif binary.lower().endswith("llama-server"):
|
||||
cli_args = CLI_ARGS_LLAMA_SERVER
|
||||
else:
|
||||
logger.error(f"Unknown binary: {binary}")
|
||||
sys.exit(1)
|
||||
|
|
|
@ -380,13 +380,13 @@ fi
|
|||
|
||||
if [[ "$backend" == "cuda" ]]; then
|
||||
printf "[+] Building with CUDA backend\n"
|
||||
LLAMA_CUDA=1 make -j server $log
|
||||
LLAMA_CUDA=1 make -j llama-server $log
|
||||
elif [[ "$backend" == "cpu" ]]; then
|
||||
printf "[+] Building with CPU backend\n"
|
||||
make -j server $log
|
||||
make -j llama-server $log
|
||||
elif [[ "$backend" == "metal" ]]; then
|
||||
printf "[+] Building with Metal backend\n"
|
||||
make -j server $log
|
||||
make -j llama-server $log
|
||||
else
|
||||
printf "[-] Unknown backend: %s\n" "$backend"
|
||||
exit 1
|
||||
|
@ -413,6 +413,6 @@ if [[ $verbose -eq 1 ]]; then
|
|||
args="$args --verbose"
|
||||
fi
|
||||
|
||||
./server -m "../$wfile" --host 0.0.0.0 --port "$port" -c $n_kv -np "$n_parallel" $args
|
||||
./llama-server -m "../$wfile" --host 0.0.0.0 --port "$port" -c $n_kv -np "$n_parallel" $args
|
||||
|
||||
exit 0
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue