diff --git a/examples/gguf-split/tests.sh b/examples/gguf-split/tests.sh index 7ca6fa7f2..3bc0fa471 100755 --- a/examples/gguf-split/tests.sh +++ b/examples/gguf-split/tests.sh @@ -41,7 +41,7 @@ echo PASS echo # 2b. Test the sharded model is loading properly -$MAIN --model $WORK_PATH/ggml-model-split-00001-of-00006.gguf --random-prompt --n-predict 32 +$MAIN --model $WORK_PATH/ggml-model-split-00001-of-00006.gguf --n-predict 32 echo PASS echo @@ -51,7 +51,7 @@ echo PASS echo # 3b. Test the merged model is loading properly -$MAIN --model $WORK_PATH/ggml-model-merge.gguf --random-prompt --n-predict 32 +$MAIN --model $WORK_PATH/ggml-model-merge.gguf --n-predict 32 echo PASS echo @@ -61,7 +61,7 @@ echo PASS echo # 4b. Test the sharded model is loading properly -$MAIN --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00007.gguf --random-prompt --n-predict 32 +$MAIN --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00007.gguf --n-predict 32 echo PASS echo @@ -71,7 +71,7 @@ echo #echo # 5b. Test the merged model is loading properly -#$MAIN --model $WORK_PATH/ggml-model-merge-2.gguf --random-prompt --n-predict 32 +#$MAIN --model $WORK_PATH/ggml-model-merge-2.gguf --n-predict 32 #echo PASS #echo @@ -81,7 +81,7 @@ echo PASS echo # 6b. Test the sharded model is loading properly -$MAIN --model $WORK_PATH/ggml-model-split-2G-00001-of-00002.gguf --random-prompt --n-predict 32 +$MAIN --model $WORK_PATH/ggml-model-split-2G-00001-of-00002.gguf --n-predict 32 echo PASS echo diff --git a/examples/main/README.md b/examples/main/README.md index ee930f4e7..4eaa68475 100644 --- a/examples/main/README.md +++ b/examples/main/README.md @@ -53,13 +53,13 @@ The following command generates "infinite" text from a starting prompt (you can #### Unix-based systems (Linux, macOS, etc.): ```bash -./main -m models/7B/ggml-model.bin --ignore-eos -n -1 --random-prompt +./main -m models/7B/ggml-model.bin --ignore-eos -n -1 ``` #### Windows: ```powershell -main.exe -m models\7B\ggml-model.bin --ignore-eos -n -1 --random-prompt +main.exe -m models\7B\ggml-model.bin --ignore-eos -n -1 ``` ## Common Options @@ -80,7 +80,6 @@ The `main` program provides several ways to interact with the LLaMA models using - `--prompt PROMPT`: Provide a prompt directly as a command-line option. - `--file FNAME`: Provide a file containing a prompt or multiple prompts. - `--interactive-first`: Run the program in interactive mode and wait for input right away. (More on this below.) -- `--random-prompt`: Start with a randomized prompt. ## Interaction diff --git a/examples/quantize/tests.sh b/examples/quantize/tests.sh index a3ca74c68..38e28ffc3 100644 --- a/examples/quantize/tests.sh +++ b/examples/quantize/tests.sh @@ -47,7 +47,7 @@ echo PASS echo # 3a. Test the requanted model is loading properly -$MAIN --model $WORK_PATH/ggml-model-requant-00001-of-00006.gguf --random-prompt --n-predict 32 +$MAIN --model $WORK_PATH/ggml-model-requant-00001-of-00006.gguf --n-predict 32 echo PASS echo @@ -57,7 +57,7 @@ echo PASS echo # 4b. Test the requanted model is loading properly -$MAIN --model $WORK_PATH/ggml-model-requant-merge.gguf --random-prompt --n-predict 32 +$MAIN --model $WORK_PATH/ggml-model-requant-merge.gguf --n-predict 32 echo PASS echo diff --git a/scripts/run-with-preset.py b/scripts/run-with-preset.py index e986a3604..452dd7105 100755 --- a/scripts/run-with-preset.py +++ b/scripts/run-with-preset.py @@ -18,7 +18,7 @@ CLI_ARGS_MAIN_PERPLEXITY = [ "low-vram", "main-gpu", "memory-f32", "mirostat", "mirostat-ent", "mirostat-lr", "mlock", "model", "multiline-input", "n-gpu-layers", "n-predict", "no-mmap", "no-mul-mat-q", "np-penalize-nl", "numa", "ppl-output-type", "ppl-stride", "presence-penalty", "prompt", - "prompt-cache", "prompt-cache-all", "prompt-cache-ro", "random-prompt", "repeat-last-n", + "prompt-cache", "prompt-cache-all", "prompt-cache-ro", "repeat-last-n", "repeat-penalty", "reverse-prompt", "rope-freq-base", "rope-freq-scale", "rope-scale", "seed", "simple-io", "tensor-split", "threads", "temp", "tfs", "top-k", "top-p", "typical", "verbose-prompt"