diff --git a/examples/gguf-split/tests.sh b/examples/gguf-split/tests.sh
index 7ca6fa7f2..3bc0fa471 100755
--- a/examples/gguf-split/tests.sh
+++ b/examples/gguf-split/tests.sh
@@ -41,7 +41,7 @@ echo PASS
 echo
 
 # 2b. Test the sharded model is loading properly
-$MAIN --model $WORK_PATH/ggml-model-split-00001-of-00006.gguf --random-prompt --n-predict 32
+$MAIN --model $WORK_PATH/ggml-model-split-00001-of-00006.gguf --n-predict 32
 echo PASS
 echo
 
@@ -51,7 +51,7 @@ echo PASS
 echo
 
 # 3b. Test the merged model is loading properly
-$MAIN --model $WORK_PATH/ggml-model-merge.gguf --random-prompt --n-predict 32
+$MAIN --model $WORK_PATH/ggml-model-merge.gguf --n-predict 32
 echo PASS
 echo
 
@@ -61,7 +61,7 @@ echo PASS
 echo
 
 # 4b. Test the sharded model is loading properly
-$MAIN --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00007.gguf --random-prompt --n-predict 32
+$MAIN --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00007.gguf --n-predict 32
 echo PASS
 echo
 
@@ -71,7 +71,7 @@ echo
 #echo
 
 # 5b. Test the merged model is loading properly
-#$MAIN --model $WORK_PATH/ggml-model-merge-2.gguf --random-prompt --n-predict 32
+#$MAIN --model $WORK_PATH/ggml-model-merge-2.gguf --n-predict 32
 #echo PASS
 #echo
 
@@ -81,7 +81,7 @@ echo PASS
 echo
 
 # 6b. Test the sharded model is loading properly
-$MAIN --model $WORK_PATH/ggml-model-split-2G-00001-of-00002.gguf --random-prompt --n-predict 32
+$MAIN --model $WORK_PATH/ggml-model-split-2G-00001-of-00002.gguf --n-predict 32
 echo PASS
 echo
 
diff --git a/examples/main/README.md b/examples/main/README.md
index ee930f4e7..4eaa68475 100644
--- a/examples/main/README.md
+++ b/examples/main/README.md
@@ -53,13 +53,13 @@ The following command generates "infinite" text from a starting prompt (you can
 #### Unix-based systems (Linux, macOS, etc.):
 
 ```bash
-./main -m models/7B/ggml-model.bin --ignore-eos -n -1 --random-prompt
+./main -m models/7B/ggml-model.bin --ignore-eos -n -1
 ```
 
 #### Windows:
 
 ```powershell
-main.exe -m models\7B\ggml-model.bin --ignore-eos -n -1 --random-prompt
+main.exe -m models\7B\ggml-model.bin --ignore-eos -n -1
 ```
 
 ## Common Options
@@ -80,7 +80,6 @@ The `main` program provides several ways to interact with the LLaMA models using
 -   `--prompt PROMPT`: Provide a prompt directly as a command-line option.
 -   `--file FNAME`: Provide a file containing a prompt or multiple prompts.
 -   `--interactive-first`: Run the program in interactive mode and wait for input right away. (More on this below.)
--   `--random-prompt`: Start with a randomized prompt.
 
 ## Interaction
 
diff --git a/examples/quantize/tests.sh b/examples/quantize/tests.sh
index a3ca74c68..38e28ffc3 100644
--- a/examples/quantize/tests.sh
+++ b/examples/quantize/tests.sh
@@ -47,7 +47,7 @@ echo PASS
 echo
 
 # 3a. Test the requanted model is loading properly
-$MAIN --model $WORK_PATH/ggml-model-requant-00001-of-00006.gguf --random-prompt --n-predict 32
+$MAIN --model $WORK_PATH/ggml-model-requant-00001-of-00006.gguf --n-predict 32
 echo PASS
 echo
 
@@ -57,7 +57,7 @@ echo PASS
 echo
 
 # 4b. Test the requanted model is loading properly
-$MAIN --model $WORK_PATH/ggml-model-requant-merge.gguf --random-prompt --n-predict 32
+$MAIN --model $WORK_PATH/ggml-model-requant-merge.gguf --n-predict 32
 echo PASS
 echo
 
diff --git a/scripts/run-with-preset.py b/scripts/run-with-preset.py
index e986a3604..452dd7105 100755
--- a/scripts/run-with-preset.py
+++ b/scripts/run-with-preset.py
@@ -18,7 +18,7 @@ CLI_ARGS_MAIN_PERPLEXITY = [
     "low-vram", "main-gpu", "memory-f32", "mirostat", "mirostat-ent", "mirostat-lr", "mlock",
     "model", "multiline-input", "n-gpu-layers", "n-predict", "no-mmap", "no-mul-mat-q",
     "np-penalize-nl", "numa", "ppl-output-type", "ppl-stride", "presence-penalty", "prompt",
-    "prompt-cache", "prompt-cache-all", "prompt-cache-ro", "random-prompt", "repeat-last-n",
+    "prompt-cache", "prompt-cache-all", "prompt-cache-ro", "repeat-last-n",
     "repeat-penalty", "reverse-prompt", "rope-freq-base", "rope-freq-scale", "rope-scale", "seed",
     "simple-io", "tensor-split", "threads", "temp", "tfs", "top-k", "top-p", "typical",
     "verbose-prompt"