llama : improve llama_batch API + simplify parallel example
This commit is contained in:
parent
a1327c71c6
commit
addae65fd4
6 changed files with 111 additions and 70 deletions
|
@ -134,7 +134,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
while (true) {
|
||||
// sample from the target model
|
||||
const llama_token id = llama_sample_token(ctx_tgt, NULL, grammar_tgt, params, last_tokens, candidates, i_dft);
|
||||
llama_token id = llama_sample_token(ctx_tgt, NULL, grammar_tgt, params, last_tokens, candidates, i_dft);
|
||||
|
||||
// remember which tokens were sampled - used for repetition penalties during sampling
|
||||
last_tokens.erase(last_tokens.begin());
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue