Softmax exp & sum in one pass + temp returns if 1
This commit is contained in:
parent
726c0fa9a2
commit
6167c263c7
1 changed files with 10 additions and 3 deletions
13
llama.cpp
13
llama.cpp
|
@ -7955,11 +7955,14 @@ void llama_sample_softmax(struct llama_context * ctx, llama_token_data_array * c
|
||||||
|
|
||||||
float max_l = candidates->data[0].logit;
|
float max_l = candidates->data[0].logit;
|
||||||
float cum_sum = 0.0f;
|
float cum_sum = 0.0f;
|
||||||
|
|
||||||
|
// Calculate the exp and sum in one pass
|
||||||
for (size_t i = 0; i < candidates->size; ++i) {
|
for (size_t i = 0; i < candidates->size; ++i) {
|
||||||
float p = expf(candidates->data[i].logit - max_l);
|
candidates->data[i].p = expf(candidates->data[i].logit - max_l);
|
||||||
candidates->data[i].p = p;
|
cum_sum += candidates->data[i].p;
|
||||||
cum_sum += p;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Normalize the probabilities
|
||||||
for (size_t i = 0; i < candidates->size; ++i) {
|
for (size_t i = 0; i < candidates->size; ++i) {
|
||||||
candidates->data[i].p /= cum_sum;
|
candidates->data[i].p /= cum_sum;
|
||||||
}
|
}
|
||||||
|
@ -8178,6 +8181,10 @@ void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * c
|
||||||
void llama_sample_temp(struct llama_context * ctx, llama_token_data_array * candidates_p, float temp) {
|
void llama_sample_temp(struct llama_context * ctx, llama_token_data_array * candidates_p, float temp) {
|
||||||
const int64_t t_start_sample_us = ggml_time_us();
|
const int64_t t_start_sample_us = ggml_time_us();
|
||||||
|
|
||||||
|
if (temp == 1.0f) {
|
||||||
|
return; // No adjustment needed as dividing by 1 leaves the values unchanged
|
||||||
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < candidates_p->size; ++i) {
|
for (size_t i = 0; i < candidates_p->size; ++i) {
|
||||||
candidates_p->data[i].logit /= temp;
|
candidates_p->data[i].logit /= temp;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue