diff --git a/examples/common.cpp b/examples/common.cpp index 3fdee868d..55e977171 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -493,7 +493,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity)\n", params.n_predict); fprintf(stderr, " -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx); fprintf(stderr, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch); - fprintf(stderr, " -gqa N, --gqa N grouped-query attention factor (TEMP!!! use 7 for LLaMAv2 70B) (default: %d)\n", params.n_gqa); + fprintf(stderr, " -gqa N, --gqa N grouped-query attention factor (TEMP!!! use 8 for LLaMAv2 70B) (default: %d)\n", params.n_gqa); fprintf(stderr, " --top-k N top-k sampling (default: %d, 0 = disabled)\n", params.top_k); fprintf(stderr, " --top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p); fprintf(stderr, " --tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z);