Capture softmax operations for sampler profiling
This commit is contained in:
parent
feea528add
commit
bbb578b09d
1 changed files with 8 additions and 7 deletions
15
llama.cpp
15
llama.cpp
|
@ -8005,10 +8005,10 @@ void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * can
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_sample_softmax(ctx, candidates);
|
|
||||||
|
|
||||||
const int64_t t_start_sample_us = ggml_time_us();
|
const int64_t t_start_sample_us = ggml_time_us();
|
||||||
|
|
||||||
|
llama_sample_softmax(ctx, candidates);
|
||||||
|
|
||||||
// Compute the cumulative probabilities
|
// Compute the cumulative probabilities
|
||||||
float cum_sum = 0.0f;
|
float cum_sum = 0.0f;
|
||||||
size_t last_idx = candidates->size;
|
size_t last_idx = candidates->size;
|
||||||
|
@ -8037,10 +8037,10 @@ void llama_sample_min_p(struct llama_context * ctx, llama_token_data_array * can
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_sample_softmax(ctx, candidates);
|
|
||||||
|
|
||||||
const int64_t t_start_sample_us = ggml_time_us();
|
const int64_t t_start_sample_us = ggml_time_us();
|
||||||
|
|
||||||
|
llama_sample_softmax(ctx, candidates);
|
||||||
|
|
||||||
float scale = candidates->data[0].p; // scale by max prob
|
float scale = candidates->data[0].p; // scale by max prob
|
||||||
size_t i = 1; // first token always matches
|
size_t i = 1; // first token always matches
|
||||||
|
|
||||||
|
@ -8063,9 +8063,10 @@ void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array *
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_sample_softmax(nullptr, candidates);
|
|
||||||
const int64_t t_start_sample_us = ggml_time_us();
|
const int64_t t_start_sample_us = ggml_time_us();
|
||||||
|
|
||||||
|
llama_sample_softmax(nullptr, candidates);
|
||||||
|
|
||||||
// Compute the first and second derivatives
|
// Compute the first and second derivatives
|
||||||
std::vector<float> first_derivatives(candidates->size - 1);
|
std::vector<float> first_derivatives(candidates->size - 1);
|
||||||
std::vector<float> second_derivatives(candidates->size - 2);
|
std::vector<float> second_derivatives(candidates->size - 2);
|
||||||
|
@ -8124,11 +8125,11 @@ void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * c
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const int64_t t_start_sample_us = ggml_time_us();
|
||||||
|
|
||||||
// Compute the softmax of logits and calculate entropy
|
// Compute the softmax of logits and calculate entropy
|
||||||
llama_sample_softmax(nullptr, candidates);
|
llama_sample_softmax(nullptr, candidates);
|
||||||
|
|
||||||
const int64_t t_start_sample_us = ggml_time_us();
|
|
||||||
|
|
||||||
float entropy = 0.0f;
|
float entropy = 0.0f;
|
||||||
for (size_t i = 0; i < candidates->size; ++i) {
|
for (size_t i = 0; i < candidates->size; ++i) {
|
||||||
entropy += -candidates->data[i].p * logf(candidates->data[i].p);
|
entropy += -candidates->data[i].p * logf(candidates->data[i].p);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue