llama : improve llama_batch API + simplify parallel example

This commit is contained in:
Georgi Gerganov 2023-09-20 10:46:18 +03:00
parent a1327c71c6
commit addae65fd4
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
6 changed files with 111 additions and 70 deletions

View file

@ -419,7 +419,7 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
}
static std::vector<float> hellaswag_evaluate_tokens(
llama_context * ctx, const std::vector<int>& tokens, int n_past, int n_batch, int n_vocab, int n_thread
llama_context * ctx, std::vector<int> & tokens, int n_past, int n_batch, int n_vocab, int n_thread
) {
std::vector<float> result;
result.reserve(tokens.size() * n_vocab);