add printout of pp_threads

2023-08-06 00:13:02 -04:00 · 2023-08-06 00:13:02 -04:00 · 590feeac1d
commit 590feeac1d
parent 1de711d4f8
3 changed files with 6 additions and 6 deletions
--- a/examples/common.cpp
+++ b/examples/common.cpp
@ -534,7 +534,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
    fprintf(stdout, "  -s SEED, --seed SEED  RNG seed (default: -1, use random seed for < 0)\n");
    fprintf(stdout, "  -t N, --threads N     number of threads to use during computation (default: %d)\n", params.n_threads);
    fprintf(stdout, "  -ppt N, --pp-threads N\n");
-    fprintf(stdout, "                        number of threads to use during prompt processing (default is equal to --threads)\n");
+    fprintf(stdout, "                        number of threads to use during prompt processing (default: %d)\n", params.pp_threads);
    fprintf(stdout, "  -p PROMPT, --prompt PROMPT\n");
    fprintf(stdout, "                        prompt to start generation with (default: empty)\n");
    fprintf(stdout, "  -e                    process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n");
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -133,8 +133,8 @@ int main(int argc, char ** argv) {
    // print system information
    {
        fprintf(stderr, "\n");
-        fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
-                params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info());
+        fprintf(stderr, "system_info: n_threads = %d / %d | pp_threads = %d / %d | %s\n",
+                params.n_threads, std::thread::hardware_concurrency(), params.pp_threads, std::thread::hardware_concurrency(), llama_print_system_info());
    }

    // determine the maximum memory usage needed to do inference for the given n_batch and n_ctx parameters
--- a/examples/save-load-state/save-load-state.cpp
+++ b/examples/save-load-state/save-load-state.cpp
@ -56,7 +56,7 @@ int main(int argc, char ** argv) {
    }

    // evaluate prompt
-    llama_eval(ctx, tokens.data(), n_prompt_tokens, n_past, params.n_threads);
+    llama_eval(ctx, tokens.data(), n_prompt_tokens, n_past, params.n_threads, params.pp_threads);

    last_n_tokens_data.insert(last_n_tokens_data.end(), tokens.data(), tokens.data() + n_prompt_tokens);
    n_past += n_prompt_tokens;
@ -93,7 +93,7 @@ int main(int argc, char ** argv) {
        last_n_tokens_data.push_back(next_token);

        printf("%s", next_token_str);
-        if (llama_eval(ctx, &next_token, 1, n_past, params.n_threads)) {
+        if (llama_eval(ctx, &next_token, 1, n_past, params.n_threads, params.pp_threads)) {
            fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
            llama_free(ctx);
            llama_free_model(model);
@ -153,7 +153,7 @@ int main(int argc, char ** argv) {
        last_n_tokens_data.push_back(next_token);

        printf("%s", next_token_str);
-        if (llama_eval(ctx2, &next_token, 1, n_past, params.n_threads)) {
+        if (llama_eval(ctx2, &next_token, 1, n_past, params.n_threads, params.pp_threads)) {
            fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
            llama_free(ctx2);
            llama_free_model(model);