diff --git a/third_party/ggml/common.h b/third_party/ggml/common.h index 9964c92fa..6ec10350a 100644 --- a/third_party/ggml/common.h +++ b/third_party/ggml/common.h @@ -22,7 +22,7 @@ struct gpt_params { int32_t seed = -1; // RNG seed int32_t verbose = 0; // Logging verbosity int32_t n_threads = std::min(1, (int)(_getcpucount() * 0.75)); - int32_t n_predict = 128; // new tokens to predict + int32_t n_predict = -1; // new tokens to predict int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions) int32_t n_ctx = 512; // context size int32_t n_batch = 64; // batch size for prompt processing (must be >=32 to use BLAS)