From 23fd782d35c3b7f5a3069785b7a2c89031b9d5f3 Mon Sep 17 00:00:00 2001 From: Gary Linscott Date: Thu, 13 Apr 2023 08:20:54 -0700 Subject: [PATCH] Update batch size for efficiency --- examples/perplexity/perplexity.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index c36c597c9..38e3643b1 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -86,11 +86,13 @@ int main(int argc, char ** argv) { gpt_params params; params.model = "models/llama-7B/ggml-model.bin"; + params.n_batch = 512; if (gpt_params_parse(argc, argv, params) == false) { return 1; } params.perplexity = true; + params.n_batch = std::min(params.n_batch, params.n_ctx); if (params.n_ctx > 2048) { fprintf(stderr, "%s: warning: model does not support context sizes greater than 2048 tokens (%d specified);"