From 9129e937f92172264ed99065a1ac2a97c1e3a1be Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 23 Apr 2023 15:57:06 +0800 Subject: [PATCH] only llama can use batch sizes above 256 to prevent unacceptably high memory usage --- gpttype_adapter.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 7e27390f9..036c44520 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -358,8 +358,13 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o int original_threads = params.n_threads; if (blasmode) { - //for gpttype, GPT2 crashes above 256. - int bbs = blasbatchsize; //(blasbatchsize>256?256:blasbatchsize); + //for non llama, limit to 256 + int bbs = blasbatchsize; + if (file_format != FileFormat::GGML && file_format != FileFormat::GGHF && file_format != FileFormat::GGJT) + { + bbs = (blasbatchsize > 256 ? 256 : blasbatchsize); + } + params.n_batch = bbs; //received reports of 1024 and above crashing on some models params.n_threads = 1; }