mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-10-11 20:38:07 +00:00
Make shell usability improvements to llama.cpp
- Introduce -v and --verbose flags - Don't print stats / diagnostics unless -v is passed - Reduce --top_p default from 0.95 to 0.70 - Change --reverse-prompt to no longer imply --interactive - Permit --reverse-prompt specifying custom EOS if non-interactive
This commit is contained in:
parent
420f889ac3
commit
1c2da3a55a
6 changed files with 103 additions and 55 deletions
3
third_party/ggml/common.h
vendored
3
third_party/ggml/common.h
vendored
|
@ -17,6 +17,7 @@
|
|||
|
||||
struct gpt_params {
|
||||
int32_t seed = -1; // RNG seed
|
||||
int32_t verbose = 0; // Logging verbosity
|
||||
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
||||
int32_t n_predict = 128; // new tokens to predict
|
||||
int32_t repeat_last_n = 64; // last n tokens to penalize
|
||||
|
@ -27,7 +28,7 @@ struct gpt_params {
|
|||
|
||||
// sampling parameters
|
||||
int32_t top_k = 40;
|
||||
float top_p = 0.95f;
|
||||
float top_p = 0.70f;
|
||||
float temp = 0.80f;
|
||||
float repeat_penalty = 1.10f;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue