From bb55b19c0478386e3ceb8937718af03331b1cd42 Mon Sep 17 00:00:00 2001 From: thxCode Date: Tue, 6 Aug 2024 17:03:58 +0800 Subject: [PATCH] refactor: let ubatch-size = batch-size if non-casual Signed-off-by: thxCode --- src/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index a7b1c9ebd..224390890 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -16524,7 +16524,7 @@ struct llama_context * llama_new_context_with_model( cparams.n_batch = GGML_KQ_MASK_PAD; } - cparams.n_ubatch = std::min(cparams.n_batch, params.n_ubatch == 0 ? params.n_batch : params.n_ubatch); + cparams.n_ubatch = hparams.causal_attn ? std::min(cparams.n_batch, params.n_ubatch == 0 ? params.n_batch : params.n_ubatch) : cparams.n_batch; cparams.n_ctx_orig_yarn = params.yarn_orig_ctx != 0 ? params.yarn_orig_ctx : hparams.n_ctx_orig_yarn != 0 ? hparams.n_ctx_orig_yarn :