From 9bed1aebbe14e0bd7b89baaa6fd796be23a26f1f Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Tue, 11 Jun 2024 21:12:43 -0400 Subject: [PATCH] Reserve logits when causal attention is disabled on context --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 8b675ea99..d9f30a26d 100644 --- a/llama.cpp +++ b/llama.cpp @@ -11957,7 +11957,7 @@ static size_t llama_output_reserve(llama_context & lctx, size_t n_outputs) { const auto n_embd = hparams.n_embd; // TODO: use a per-batch flag for logits presence instead - const bool has_logits = cparams.causal_attn; + const bool has_logits = hparams.causal_attn; const bool has_embd = cparams.embeddings && (hparams.causal_attn || cparams.pooling_type == LLAMA_POOLING_TYPE_NONE); const size_t logits_size = has_logits ? n_vocab*n_outputs_max : 0;