Capture softmax operations for sampler profiling
This commit is contained in:
parent
feea528add
commit
bbb578b09d
1 changed files with 8 additions and 7 deletions
15
llama.cpp
15
llama.cpp
|
@ -8005,10 +8005,10 @@ void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * can
|
|||
return;
|
||||
}
|
||||
|
||||
llama_sample_softmax(ctx, candidates);
|
||||
|
||||
const int64_t t_start_sample_us = ggml_time_us();
|
||||
|
||||
llama_sample_softmax(ctx, candidates);
|
||||
|
||||
// Compute the cumulative probabilities
|
||||
float cum_sum = 0.0f;
|
||||
size_t last_idx = candidates->size;
|
||||
|
@ -8037,10 +8037,10 @@ void llama_sample_min_p(struct llama_context * ctx, llama_token_data_array * can
|
|||
return;
|
||||
}
|
||||
|
||||
llama_sample_softmax(ctx, candidates);
|
||||
|
||||
const int64_t t_start_sample_us = ggml_time_us();
|
||||
|
||||
llama_sample_softmax(ctx, candidates);
|
||||
|
||||
float scale = candidates->data[0].p; // scale by max prob
|
||||
size_t i = 1; // first token always matches
|
||||
|
||||
|
@ -8063,9 +8063,10 @@ void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array *
|
|||
return;
|
||||
}
|
||||
|
||||
llama_sample_softmax(nullptr, candidates);
|
||||
const int64_t t_start_sample_us = ggml_time_us();
|
||||
|
||||
llama_sample_softmax(nullptr, candidates);
|
||||
|
||||
// Compute the first and second derivatives
|
||||
std::vector<float> first_derivatives(candidates->size - 1);
|
||||
std::vector<float> second_derivatives(candidates->size - 2);
|
||||
|
@ -8124,11 +8125,11 @@ void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * c
|
|||
return;
|
||||
}
|
||||
|
||||
const int64_t t_start_sample_us = ggml_time_us();
|
||||
|
||||
// Compute the softmax of logits and calculate entropy
|
||||
llama_sample_softmax(nullptr, candidates);
|
||||
|
||||
const int64_t t_start_sample_us = ggml_time_us();
|
||||
|
||||
float entropy = 0.0f;
|
||||
for (size_t i = 0; i < candidates->size; ++i) {
|
||||
entropy += -candidates->data[i].p * logf(candidates->data[i].p);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue