sampling : deduplicated code for probability distribution access (#6240)
* sampling: remove duplicated code for probability distribution access * free original_logits * fix original_logits allocation * fixes based on review @cebtenzzre * change function name to `llama_sampling_prepare`
This commit is contained in:
parent
ddf6568510
commit
586e7bc561
4 changed files with 28 additions and 76 deletions
|
@ -219,7 +219,8 @@ int main(int argc, char ** argv) {
|
|||
if (params.sparams.temp > 0) {
|
||||
// stochastic verification
|
||||
|
||||
llama_token_data_array dist_tgt = llama_sampling_probability_distribution(ctx_sampling, ctx_tgt, NULL, drafts[s_keep].i_batch_tgt[i_dft]);
|
||||
llama_token_data_array dist_tgt = llama_sampling_prepare(ctx_sampling, ctx_tgt, NULL, drafts[s_keep].i_batch_tgt[i_dft], true, NULL);
|
||||
llama_sample_softmax(ctx_tgt, &dist_tgt);
|
||||
float p_tgt = 0, p_dft = 0;
|
||||
|
||||
// GGML_ASSERT(dist_tgt.size() == dist_dft.size());
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue