Reserve logits when causal attention is disabled on context

This commit is contained in:
Andrei Betlen 2024-06-11 21:12:43 -04:00
parent 7612e4cdcc
commit 9bed1aebbe

View file

@ -11957,7 +11957,7 @@ static size_t llama_output_reserve(llama_context & lctx, size_t n_outputs) {
const auto n_embd = hparams.n_embd; const auto n_embd = hparams.n_embd;
// TODO: use a per-batch flag for logits presence instead // TODO: use a per-batch flag for logits presence instead
const bool has_logits = cparams.causal_attn; const bool has_logits = hparams.causal_attn;
const bool has_embd = cparams.embeddings && (hparams.causal_attn || cparams.pooling_type == LLAMA_POOLING_TYPE_NONE); const bool has_embd = cparams.embeddings && (hparams.causal_attn || cparams.pooling_type == LLAMA_POOLING_TYPE_NONE);
const size_t logits_size = has_logits ? n_vocab*n_outputs_max : 0; const size_t logits_size = has_logits ? n_vocab*n_outputs_max : 0;