add printout of pp_threads

This commit is contained in:
netrunnereve 2023-08-06 00:13:02 -04:00
parent 1de711d4f8
commit 590feeac1d
3 changed files with 6 additions and 6 deletions

View file

@ -534,7 +534,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
fprintf(stdout, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n");
fprintf(stdout, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
fprintf(stdout, " -ppt N, --pp-threads N\n");
fprintf(stdout, " number of threads to use during prompt processing (default is equal to --threads)\n");
fprintf(stdout, " number of threads to use during prompt processing (default: %d)\n", params.pp_threads);
fprintf(stdout, " -p PROMPT, --prompt PROMPT\n");
fprintf(stdout, " prompt to start generation with (default: empty)\n");
fprintf(stdout, " -e process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n");

View file

@ -133,8 +133,8 @@ int main(int argc, char ** argv) {
// print system information
{
fprintf(stderr, "\n");
fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info());
fprintf(stderr, "system_info: n_threads = %d / %d | pp_threads = %d / %d | %s\n",
params.n_threads, std::thread::hardware_concurrency(), params.pp_threads, std::thread::hardware_concurrency(), llama_print_system_info());
}
// determine the maximum memory usage needed to do inference for the given n_batch and n_ctx parameters

View file

@ -56,7 +56,7 @@ int main(int argc, char ** argv) {
}
// evaluate prompt
llama_eval(ctx, tokens.data(), n_prompt_tokens, n_past, params.n_threads);
llama_eval(ctx, tokens.data(), n_prompt_tokens, n_past, params.n_threads, params.pp_threads);
last_n_tokens_data.insert(last_n_tokens_data.end(), tokens.data(), tokens.data() + n_prompt_tokens);
n_past += n_prompt_tokens;
@ -93,7 +93,7 @@ int main(int argc, char ** argv) {
last_n_tokens_data.push_back(next_token);
printf("%s", next_token_str);
if (llama_eval(ctx, &next_token, 1, n_past, params.n_threads)) {
if (llama_eval(ctx, &next_token, 1, n_past, params.n_threads, params.pp_threads)) {
fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
llama_free(ctx);
llama_free_model(model);
@ -153,7 +153,7 @@ int main(int argc, char ** argv) {
last_n_tokens_data.push_back(next_token);
printf("%s", next_token_str);
if (llama_eval(ctx2, &next_token, 1, n_past, params.n_threads)) {
if (llama_eval(ctx2, &next_token, 1, n_past, params.n_threads, params.pp_threads)) {
fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
llama_free(ctx2);
llama_free_model(model);