diff --git a/examples/embd-input/embd-input-lib.cpp b/examples/embd-input/embd-input-lib.cpp index 0480eee19..d5df68637 100644 --- a/examples/embd-input/embd-input-lib.cpp +++ b/examples/embd-input/embd-input-lib.cpp @@ -83,7 +83,7 @@ bool eval_float(void * model, float * input, int N){ if (n_eval > n_batch) { n_eval = n_batch; } - if (llama_eval_embd(ctx, (input+i*n_emb), n_eval, n_past, params.n_threads, params.pp_threads)) { + if (llama_eval_embd(ctx, (input+i*n_emb), n_eval, n_past, params.n_threads, params.n_threads)) { fprintf(stderr, "%s : failed to eval\n", __func__); return false; } @@ -104,7 +104,7 @@ bool eval_tokens(void * model, std::vector tokens) { if (n_eval > params.n_batch) { n_eval = params.n_batch; } - if (llama_eval(ctx, &tokens[i], n_eval, n_past, params.n_threads, params.pp_threads)) { + if (llama_eval(ctx, &tokens[i], n_eval, n_past, params.n_threads, params.n_threads)) { fprintf(stderr, "%s : failed to eval\n", __func__); return false; } diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index e931d7f39..6f732ce46 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -74,7 +74,7 @@ int main(int argc, char ** argv) { if (params.embedding){ if (embd_inp.size() > 0) { - if (llama_eval(ctx, embd_inp.data(), embd_inp.size(), n_past, params.n_threads, params.pp_threads)) { + if (llama_eval(ctx, embd_inp.data(), embd_inp.size(), n_past, params.n_threads, params.n_threads)) { fprintf(stderr, "%s : failed to eval\n", __func__); return 1; } diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index f75b1ec41..7ed661ec5 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -66,7 +66,7 @@ void perplexity(llama_context * ctx, const gpt_params & params) { tokens[batch_start] = llama_token_bos(); } - if (llama_eval(ctx, tokens.data() + batch_start, batch_size, j * n_batch, params.n_threads, params.pp_threads)) { + if (llama_eval(ctx, tokens.data() + batch_start, batch_size, j * n_batch, params.n_threads, params.n_threads)) { fprintf(stderr, "%s : failed to eval\n", __func__); return; } @@ -233,7 +233,7 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) { } // Evaluate the query - if (llama_eval(ctx, query_embd.data(), query_embd.size(), 0, params.n_threads, params.pp_threads)) { + if (llama_eval(ctx, query_embd.data(), query_embd.size(), 0, params.n_threads, params.n_threads)) { fprintf(stderr, "%s : failed to eval\n", __func__); return; } diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp index 4821fad5d..c8586718e 100644 --- a/examples/save-load-state/save-load-state.cpp +++ b/examples/save-load-state/save-load-state.cpp @@ -56,7 +56,7 @@ int main(int argc, char ** argv) { } // evaluate prompt - llama_eval(ctx, tokens.data(), n_prompt_tokens, n_past, params.n_threads, params.pp_threads); + llama_eval(ctx, tokens.data(), n_prompt_tokens, n_past, params.n_threads, params.n_threads); last_n_tokens_data.insert(last_n_tokens_data.end(), tokens.data(), tokens.data() + n_prompt_tokens); n_past += n_prompt_tokens; @@ -93,7 +93,7 @@ int main(int argc, char ** argv) { last_n_tokens_data.push_back(next_token); printf("%s", next_token_str); - if (llama_eval(ctx, &next_token, 1, n_past, params.n_threads, params.pp_threads)) { + if (llama_eval(ctx, &next_token, 1, n_past, params.n_threads, params.n_threads)) { fprintf(stderr, "\n%s : failed to evaluate\n", __func__); llama_free(ctx); llama_free_model(model); @@ -153,7 +153,7 @@ int main(int argc, char ** argv) { last_n_tokens_data.push_back(next_token); printf("%s", next_token_str); - if (llama_eval(ctx2, &next_token, 1, n_past, params.n_threads, params.pp_threads)) { + if (llama_eval(ctx2, &next_token, 1, n_past, params.n_threads, params.n_threads)) { fprintf(stderr, "\n%s : failed to evaluate\n", __func__); llama_free(ctx2); llama_free_model(model); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 853cd74cd..10b666c1c 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -350,7 +350,7 @@ struct llama_server_context { n_eval = params.n_batch; } - if (llama_eval(ctx, &embd[n_past], n_eval, n_past, params.n_threads, params.pp_threads)) + if (llama_eval(ctx, &embd[n_past], n_eval, n_past, params.n_threads, params.n_threads)) { LOG_ERROR("failed to eval", { {"n_eval", n_eval}, diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp index 5fc6074bc..f093da32a 100644 --- a/examples/simple/simple.cpp +++ b/examples/simple/simple.cpp @@ -123,7 +123,7 @@ int main(int argc, char ** argv) // Evaluate the tokens : //--------------------------------- - if ( llama_eval( ctx , tokens_list.data() , int(tokens_list.size()) , llama_get_kv_cache_token_count( ctx ) , params.n_threads , params.pp_threads ) ) + if ( llama_eval( ctx , tokens_list.data() , int(tokens_list.size()) , llama_get_kv_cache_token_count( ctx ) , params.n_threads , params.n_threads ) ) { fprintf( stderr, "%s : failed to eval\n" , __func__ ); return 1; diff --git a/llama.cpp b/llama.cpp index 630a4357a..97c5e0cb0 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1782,7 +1782,7 @@ static bool llama_eval_internal( int n_tokens, int n_past, int n_threads, - int pp_threads, + int pp_threads, const char * cgraph_fname) { LLAMA_ASSERT((!tokens && embd) || (tokens && !embd));