From 711b0bcb11a6b102fd848c62109956a2b49adb3a Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Sun, 17 Mar 2024 20:41:21 -0400 Subject: [PATCH] llama : fix running a batch with n_outputs == 0 It previously worked because lctx.inp_out_ids was not initialized, so it pointed to some garbage address which was somehow still valid when I ran my tests. --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index b351779d9..6650cfc60 100644 --- a/llama.cpp +++ b/llama.cpp @@ -8979,7 +8979,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) { ggml_backend_tensor_set(lctx.inp_pos, batch.pos, 0, n_tokens*ggml_element_size(lctx.inp_pos)); } - if (hparams.causal_attn || cparams.pooling_type == LLAMA_POOLING_TYPE_NONE) { + if (lctx.n_outputs > 0 && (hparams.causal_attn || cparams.pooling_type == LLAMA_POOLING_TYPE_NONE)) { GGML_ASSERT(lctx.inp_out_ids && "every model that can must skip unused outputs"); const int64_t n_tokens = batch.n_tokens;