add printout of pp_threads
This commit is contained in:
parent
1de711d4f8
commit
590feeac1d
3 changed files with 6 additions and 6 deletions
|
@ -534,7 +534,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
||||||
fprintf(stdout, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n");
|
fprintf(stdout, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n");
|
||||||
fprintf(stdout, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
|
fprintf(stdout, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
|
||||||
fprintf(stdout, " -ppt N, --pp-threads N\n");
|
fprintf(stdout, " -ppt N, --pp-threads N\n");
|
||||||
fprintf(stdout, " number of threads to use during prompt processing (default is equal to --threads)\n");
|
fprintf(stdout, " number of threads to use during prompt processing (default: %d)\n", params.pp_threads);
|
||||||
fprintf(stdout, " -p PROMPT, --prompt PROMPT\n");
|
fprintf(stdout, " -p PROMPT, --prompt PROMPT\n");
|
||||||
fprintf(stdout, " prompt to start generation with (default: empty)\n");
|
fprintf(stdout, " prompt to start generation with (default: empty)\n");
|
||||||
fprintf(stdout, " -e process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n");
|
fprintf(stdout, " -e process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n");
|
||||||
|
|
|
@ -133,8 +133,8 @@ int main(int argc, char ** argv) {
|
||||||
// print system information
|
// print system information
|
||||||
{
|
{
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
|
fprintf(stderr, "system_info: n_threads = %d / %d | pp_threads = %d / %d | %s\n",
|
||||||
params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info());
|
params.n_threads, std::thread::hardware_concurrency(), params.pp_threads, std::thread::hardware_concurrency(), llama_print_system_info());
|
||||||
}
|
}
|
||||||
|
|
||||||
// determine the maximum memory usage needed to do inference for the given n_batch and n_ctx parameters
|
// determine the maximum memory usage needed to do inference for the given n_batch and n_ctx parameters
|
||||||
|
|
|
@ -56,7 +56,7 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// evaluate prompt
|
// evaluate prompt
|
||||||
llama_eval(ctx, tokens.data(), n_prompt_tokens, n_past, params.n_threads);
|
llama_eval(ctx, tokens.data(), n_prompt_tokens, n_past, params.n_threads, params.pp_threads);
|
||||||
|
|
||||||
last_n_tokens_data.insert(last_n_tokens_data.end(), tokens.data(), tokens.data() + n_prompt_tokens);
|
last_n_tokens_data.insert(last_n_tokens_data.end(), tokens.data(), tokens.data() + n_prompt_tokens);
|
||||||
n_past += n_prompt_tokens;
|
n_past += n_prompt_tokens;
|
||||||
|
@ -93,7 +93,7 @@ int main(int argc, char ** argv) {
|
||||||
last_n_tokens_data.push_back(next_token);
|
last_n_tokens_data.push_back(next_token);
|
||||||
|
|
||||||
printf("%s", next_token_str);
|
printf("%s", next_token_str);
|
||||||
if (llama_eval(ctx, &next_token, 1, n_past, params.n_threads)) {
|
if (llama_eval(ctx, &next_token, 1, n_past, params.n_threads, params.pp_threads)) {
|
||||||
fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
|
fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
llama_free_model(model);
|
llama_free_model(model);
|
||||||
|
@ -153,7 +153,7 @@ int main(int argc, char ** argv) {
|
||||||
last_n_tokens_data.push_back(next_token);
|
last_n_tokens_data.push_back(next_token);
|
||||||
|
|
||||||
printf("%s", next_token_str);
|
printf("%s", next_token_str);
|
||||||
if (llama_eval(ctx2, &next_token, 1, n_past, params.n_threads)) {
|
if (llama_eval(ctx2, &next_token, 1, n_past, params.n_threads, params.pp_threads)) {
|
||||||
fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
|
fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
|
||||||
llama_free(ctx2);
|
llama_free(ctx2);
|
||||||
llama_free_model(model);
|
llama_free_model(model);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue