sampling : avoid expensive softmax during greedy sampling (#9605)

* sampling : avoid expensive softmax during greedy sampling

ggml-ci

* speculative : fix default RNG seed + set sparams.n_probs

* Update tests/test-sampling.cpp

Co-authored-by: slaren <slarengh@gmail.com>

* sampling : add clarifying comment [no ci]

---------

Co-authored-by: slaren <slarengh@gmail.com>
This commit is contained in:
Georgi Gerganov 2024-09-24 09:03:17 +03:00 committed by GitHub
parent c087b6f11d
commit b0f27361f3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 59 additions and 6 deletions

View file

@ -32,6 +32,9 @@ struct seq_draft {
int main(int argc, char ** argv) {
gpt_params params;
// needed to get candidate probs even for temp <= 0.0
params.sparams.n_probs = 128;
if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_SPECULATIVE)) {
return 1;
}
@ -49,7 +52,7 @@ int main(int argc, char ** argv) {
// probability threshold for splitting a draft branch (only for n_seq_dft > 1)
const float p_split = params.p_split;
std::default_random_engine rng(params.sparams.seed);
std::default_random_engine rng(params.sparams.seed == LLAMA_DEFAULT_SEED ? std::random_device()() : params.sparams.seed);
std::uniform_real_distribution<> u_dist;
// init llama.cpp