diff --git a/third_party/ggml/common.h b/third_party/ggml/common.h
index 9964c92fa..6ec10350a 100644
--- a/third_party/ggml/common.h
+++ b/third_party/ggml/common.h
@@ -22,7 +22,7 @@ struct gpt_params {
     int32_t seed          = -1;   // RNG seed
     int32_t verbose       = 0;    // Logging verbosity
     int32_t n_threads     = std::min(1, (int)(_getcpucount() * 0.75));
-    int32_t n_predict     = 128;  // new tokens to predict
+    int32_t n_predict     = -1;   // new tokens to predict
     int32_t n_parts       = -1;   // amount of model parts (-1 = determine from model dimensions)
     int32_t n_ctx         = 512;  // context size
     int32_t n_batch       = 64;   // batch size for prompt processing (must be >=32 to use BLAS)