sampling : deduplicated code for probability distribution access (#6240)

* sampling: remove duplicated code for probability distribution access

* free original_logits

* fix original_logits allocation

* fixes based on review @cebtenzzre

* change function name to `llama_sampling_prepare`
This commit is contained in:
Minsoo Cheong 2024-03-24 17:54:07 +09:00 committed by GitHub
parent ddf6568510
commit 586e7bc561
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 28 additions and 76 deletions

View file

@ -219,7 +219,8 @@ int main(int argc, char ** argv) {
if (params.sparams.temp > 0) {
// stochastic verification
llama_token_data_array dist_tgt = llama_sampling_probability_distribution(ctx_sampling, ctx_tgt, NULL, drafts[s_keep].i_batch_tgt[i_dft]);
llama_token_data_array dist_tgt = llama_sampling_prepare(ctx_sampling, ctx_tgt, NULL, drafts[s_keep].i_batch_tgt[i_dft], true, NULL);
llama_sample_softmax(ctx_tgt, &dist_tgt);
float p_tgt = 0, p_dft = 0;
// GGML_ASSERT(dist_tgt.size() == dist_dft.size());