diff --git a/examples/common.cpp b/examples/common.cpp
index 7502c87ea..c531639fb 100644
--- a/examples/common.cpp
+++ b/examples/common.cpp
@@ -534,7 +534,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     fprintf(stdout, "  -s SEED, --seed SEED  RNG seed (default: -1, use random seed for < 0)\n");
     fprintf(stdout, "  -t N, --threads N     number of threads to use during computation (default: %d)\n", params.n_threads);
     fprintf(stdout, "  -ppt N, --pp-threads N\n");
-    fprintf(stdout, "                        number of threads to use during prompt processing (default is equal to --threads)\n");
+    fprintf(stdout, "                        number of threads to use during prompt processing (default: %d)\n", params.pp_threads);
     fprintf(stdout, "  -p PROMPT, --prompt PROMPT\n");
     fprintf(stdout, "                        prompt to start generation with (default: empty)\n");
     fprintf(stdout, "  -e                    process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n");
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index dba3bdab2..59fc8a295 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -133,8 +133,8 @@ int main(int argc, char ** argv) {
     // print system information
     {
         fprintf(stderr, "\n");
-        fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
-                params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info());
+        fprintf(stderr, "system_info: n_threads = %d / %d | pp_threads = %d / %d | %s\n",
+                params.n_threads, std::thread::hardware_concurrency(), params.pp_threads, std::thread::hardware_concurrency(), llama_print_system_info());
     }
 
     // determine the maximum memory usage needed to do inference for the given n_batch and n_ctx parameters
diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp
index 61c71c358..4821fad5d 100644
--- a/examples/save-load-state/save-load-state.cpp
+++ b/examples/save-load-state/save-load-state.cpp
@@ -56,7 +56,7 @@ int main(int argc, char ** argv) {
     }
 
     // evaluate prompt
-    llama_eval(ctx, tokens.data(), n_prompt_tokens, n_past, params.n_threads);
+    llama_eval(ctx, tokens.data(), n_prompt_tokens, n_past, params.n_threads, params.pp_threads);
 
     last_n_tokens_data.insert(last_n_tokens_data.end(), tokens.data(), tokens.data() + n_prompt_tokens);
     n_past += n_prompt_tokens;
@@ -93,7 +93,7 @@ int main(int argc, char ** argv) {
         last_n_tokens_data.push_back(next_token);
 
         printf("%s", next_token_str);
-        if (llama_eval(ctx, &next_token, 1, n_past, params.n_threads)) {
+        if (llama_eval(ctx, &next_token, 1, n_past, params.n_threads, params.pp_threads)) {
             fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
             llama_free(ctx);
             llama_free_model(model);
@@ -153,7 +153,7 @@ int main(int argc, char ** argv) {
         last_n_tokens_data.push_back(next_token);
 
         printf("%s", next_token_str);
-        if (llama_eval(ctx2, &next_token, 1, n_past, params.n_threads)) {
+        if (llama_eval(ctx2, &next_token, 1, n_past, params.n_threads, params.pp_threads)) {
             fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
             llama_free(ctx2);
             llama_free_model(model);