Interim commit

2023-10-03 10:10:00 +01:00 · 2023-10-03 10:10:00 +01:00 · 2e3dad3a9c
commit 2e3dad3a9c
parent 51196a44dc
4 changed files with 11 additions and 42 deletions
--- a/.gitignore
+++ b/.gitignore
@ -28,6 +28,8 @@ build*/
 out/
 tmp/
 cmake-all.sh
 models/*
 models-mnt
--- a/ParallelQuestions.txt
+++ b/ParallelQuestions.txt
@ -1,32 +0,0 @@
 What do you know about Hobbits?
 What is quantum field theory?
 Why did the chicken cross the road?
 Who is the president of the United States?
 How do I run CMake on MacOS?
 Do you agree that C++ is a really finicky language compared with Python3?
 Is it a good idea to invest in technology?
 Do you like Wagner's Ring?
 Do you think this file input option is really neat?
 What should we all do about climate change?
 Is time-travel possible within the laws of current physics?
 Is it like anything to be a bat?
 Once the chicken has crossed the road, does it try to go back?
 Who is the greatest of all musical composers?
 What is art?
 Is there life elsewhere in the universe?
 What is intelligence?
 What is the difference between knowledge and intelligence?
 Will religion ever die?
 Do we understand ourselves?
 What is the best way to cook eggs?
 If you cannot see things, on what basis do you evaluate them?
 Explain the role of the np junction in photovoltaic cells?
 Is professional sport a good or bad influence on human behaviour?
 Is capital punishment immoral?
 Should we care about other people?
 Who are you?
 Which sense would you surrender if you could?
 Was Henry Ford a hero or a villain?
 Do we need leaders?
 What is nucleosynthesis?
 Who is the greatest scientist of all time so far?
--- a/examples/parallel/parallel.cpp
+++ b/examples/parallel/parallel.cpp
@ -11,7 +11,6 @@
 #include <string>
 #include <vector>
 #include <ctime>
 #include <iomanip>
 // trim whitespace from the beginning and end of a string
 static std::string trim(const std::string & str) {
@ -132,14 +131,14 @@ int main(int argc, char ** argv) {
    } else {
        // Output each line of the input params.prompts vector and copy to k_prompts
        int index = 0;
-        printf("\n\033[32mNow printing the external prompt file %s\033[0m\n\n", params.prompt_file);
+        printf("\n\033[32mNow printing the external prompt file %s\033[0m\n\n", params.prompt_file.c_str());
        std::vector<std::string> prompts = split_string(params.prompt, '\n');
        for (const auto& prompt : prompts) {
            k_prompts.resize(index + 1);
            k_prompts[index] = prompt;
            index++;
-            printf("%3d prompt: %s\n", index, prompt);
+            printf("%3d prompt: %s\n", index, prompt.c_str());
        }
    }
@ -272,7 +271,7 @@ int main(int argc, char ** argv) {
                    client.n_decoded = 0;
                    client.i_batch   = batch.n_tokens - 1;
-                    LOG_TEE("\033[1mClient %3d, seq %4d, started decoding ...\033[0m\n", client.id, client.seq_id);
+                    LOG_TEE("\033[31mClient %3d, seq %4d, started decoding ...\033[0m\n", client.id, client.seq_id);
                    g_seq_id += 1;
@ -399,7 +398,7 @@ int main(int argc, char ** argv) {
    print_date_time();
    LOG_TEE("\n%s: n_parallel = %d, n_sequences = %d, cont_batching = %d, system tokens = %d\n", __func__, n_clients, n_seq, cont_batching, n_tokens_system);
-    printf("external prompt file (if any): %s \n\n", params.prompt_file);
+    printf("external prompt file (if any): %s \n\n", params.prompt_file.c_str());
    LOG_TEE("Total prompt tokens: %6d, speed: %5.2f t/s\n", n_total_prompt, (double) (n_total_prompt              ) / (t_main_end - t_main_start) * 1e6);
    LOG_TEE("Total gen tokens:    %6d, speed: %5.2f t/s\n", n_total_gen,    (double) (n_total_gen                 ) / (t_main_end - t_main_start) * 1e6);
--- a/llama.cpp
+++ b/llama.cpp
@ -7587,14 +7587,14 @@ void llama_print_timings(struct llama_context * ctx) {
    const llama_timings timings = llama_get_timings(ctx);
    LLAMA_LOG_INFO("\n");
-    LLAMA_LOG_INFO("%s:        load time = %8.2f ms\n", __func__, timings.t_load_ms);
+    LLAMA_LOG_INFO("%s:        load time = %10.2f ms\n", __func__, timings.t_load_ms);
-    LLAMA_LOG_INFO("%s:      sample time = %8.2f ms / %5d runs   (%8.2f ms per token, %8.2f tokens per second)\n",
+    LLAMA_LOG_INFO("%s:      sample time = %10.2f ms / %5d runs   (%8.2f ms per token, %8.2f tokens per second)\n",
            __func__, timings.t_sample_ms, timings.n_sample, timings.t_sample_ms / timings.n_sample, 1e3 / timings.t_sample_ms * timings.n_sample);
-    LLAMA_LOG_INFO("%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
+    LLAMA_LOG_INFO("%s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
            __func__, timings.t_p_eval_ms, timings.n_p_eval, timings.t_p_eval_ms / timings.n_p_eval, 1e3 / timings.t_p_eval_ms * timings.n_p_eval);
-    LLAMA_LOG_INFO("%s:        eval time = %8.2f ms / %5d runs   (%8.2f ms per token, %8.2f tokens per second)\n",
+    LLAMA_LOG_INFO("%s:        eval time = %10.2f ms / %5d runs   (%8.2f ms per token, %8.2f tokens per second)\n",
            __func__, timings.t_eval_ms, timings.n_eval, timings.t_eval_ms / timings.n_eval, 1e3 / timings.t_eval_ms * timings.n_eval);
-    LLAMA_LOG_INFO("%s:       total time = %8.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms));
+    LLAMA_LOG_INFO("%s:       total time = %10.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms));
 }
 void llama_reset_timings(struct llama_context * ctx) {