diff --git a/llama.cpp b/llama.cpp index 8b675ea99..d9f30a26d 100644 --- a/llama.cpp +++ b/llama.cpp @@ -11957,7 +11957,7 @@ static size_t llama_output_reserve(llama_context & lctx, size_t n_outputs) { const auto n_embd = hparams.n_embd; // TODO: use a per-batch flag for logits presence instead - const bool has_logits = cparams.causal_attn; + const bool has_logits = hparams.causal_attn; const bool has_embd = cparams.embeddings && (hparams.causal_attn || cparams.pooling_type == LLAMA_POOLING_TYPE_NONE); const size_t logits_size = has_logits ? n_vocab*n_outputs_max : 0;