Make shell usability improvements to llama.cpp

- Introduce -v and --verbose flags - Don't print stats / diagnostics unless -v is passed - Reduce --top_p default from 0.95 to 0.70 - Change --reverse-prompt to no longer imply --interactive - Permit --reverse-prompt specifying custom EOS if non-interactive
2025-10-11 20:38:07 +00:00 · 2023-04-28 02:54:11 -07:00 · 2023-04-28 02:54:11 -07:00 · 1c2da3a55a
commit 1c2da3a55a
parent 420f889ac3
6 changed files with 103 additions and 55 deletions
--- a/third_party/ggml/common.h
+++ b/third_party/ggml/common.h
@ -17,6 +17,7 @@

 struct gpt_params {
    int32_t seed          = -1;   // RNG seed
+    int32_t verbose       = 0;    // Logging verbosity
    int32_t n_threads     = std::min(4, (int32_t) std::thread::hardware_concurrency());
    int32_t n_predict     = 128;  // new tokens to predict
    int32_t repeat_last_n = 64;   // last n tokens to penalize
@ -27,7 +28,7 @@ struct gpt_params {

    // sampling parameters
    int32_t top_k = 40;
-    float   top_p = 0.95f;
+    float   top_p = 0.70f;
    float   temp  = 0.80f;
    float   repeat_penalty  = 1.10f;