From 1b908d91434230602ee2239b5f7bc192510883e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stanis=C5=82aw=20Szymczyk?= Date: Sun, 11 Aug 2024 07:41:50 +0200 Subject: [PATCH] llama : check all graph nodes when searching for result_embd_pooled (needed for gemma-2) --- src/llama.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/llama.cpp b/src/llama.cpp index e0fe8013b..aaf8db496 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -14722,12 +14722,15 @@ static int llama_decode_internal( res = nullptr; embd = nullptr; } else if (cparams.embeddings) { - res = nullptr; // do not extract logits for embedding case - embd = gf->nodes[gf->n_nodes - 1]; - if (strcmp(embd->name, "result_embd_pooled") != 0) { - embd = gf->nodes[gf->n_nodes - 2]; + res = nullptr; // do not extract logits for embedding case + embd = nullptr; + for (int i = gf->n_nodes - 1; i >= 0; --i) { + if (strcmp(gf->nodes[i]->name, "result_embd_pooled") == 0) { + embd = gf->nodes[i]; + break; + } } - GGML_ASSERT(strcmp(embd->name, "result_embd_pooled") == 0 && "missing embeddings tensor"); + GGML_ASSERT(embd != nullptr && "missing embeddings tensor"); } else { embd = nullptr; // do not extract embeddings when not needed GGML_ASSERT(strcmp(res->name, "result_output") == 0 && "missing result_output tensor");