From 6167c263c7c822c8f3512593d8e3a026ae0ee441 Mon Sep 17 00:00:00 2001 From: kalomaze <66376113+kalomaze@users.noreply.github.com> Date: Mon, 22 Jan 2024 03:02:40 -0600 Subject: [PATCH] Softmax exp & sum in one pass + temp returns if 1 --- llama.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/llama.cpp b/llama.cpp index 909ad4ad8..fb8e7b6a3 100644 --- a/llama.cpp +++ b/llama.cpp @@ -7955,11 +7955,14 @@ void llama_sample_softmax(struct llama_context * ctx, llama_token_data_array * c float max_l = candidates->data[0].logit; float cum_sum = 0.0f; + + // Calculate the exp and sum in one pass for (size_t i = 0; i < candidates->size; ++i) { - float p = expf(candidates->data[i].logit - max_l); - candidates->data[i].p = p; - cum_sum += p; + candidates->data[i].p = expf(candidates->data[i].logit - max_l); + cum_sum += candidates->data[i].p; } + + // Normalize the probabilities for (size_t i = 0; i < candidates->size; ++i) { candidates->data[i].p /= cum_sum; } @@ -8178,6 +8181,10 @@ void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * c void llama_sample_temp(struct llama_context * ctx, llama_token_data_array * candidates_p, float temp) { const int64_t t_start_sample_us = ggml_time_us(); + if (temp == 1.0f) { + return; // No adjustment needed as dividing by 1 leaves the values unchanged + } + for (size_t i = 0; i < candidates_p->size; ++i) { candidates_p->data[i].logit /= temp; }