Interim commit

2023-10-03 10:10:00 +01:00 · 2023-10-03 10:10:00 +01:00 · 2e3dad3a9c
commit 2e3dad3a9c
parent 51196a44dc
4 changed files with 11 additions and 42 deletions
--- a/.gitignore
+++ b/.gitignore
@ -28,6 +28,8 @@ build*/
 out/
 tmp/

+cmake-all.sh
+
 models/*
 models-mnt

--- a/ParallelQuestions.txt
+++ b/ParallelQuestions.txt
@ -1,32 +0,0 @@
-What do you know about Hobbits?
-What is quantum field theory?
-Why did the chicken cross the road?
-Who is the president of the United States?
-How do I run CMake on MacOS?
-Do you agree that C++ is a really finicky language compared with Python3?
-Is it a good idea to invest in technology?
-Do you like Wagner's Ring?
-Do you think this file input option is really neat?
-What should we all do about climate change?
-Is time-travel possible within the laws of current physics?
-Is it like anything to be a bat?
-Once the chicken has crossed the road, does it try to go back?
-Who is the greatest of all musical composers?
-What is art?
-Is there life elsewhere in the universe?
-What is intelligence?
-What is the difference between knowledge and intelligence?
-Will religion ever die?
-Do we understand ourselves?
-What is the best way to cook eggs?
-If you cannot see things, on what basis do you evaluate them?
-Explain the role of the np junction in photovoltaic cells?
-Is professional sport a good or bad influence on human behaviour?
-Is capital punishment immoral?
-Should we care about other people?
-Who are you?
-Which sense would you surrender if you could?
-Was Henry Ford a hero or a villain?
-Do we need leaders?
-What is nucleosynthesis?
-Who is the greatest scientist of all time so far?
--- a/examples/parallel/parallel.cpp
+++ b/examples/parallel/parallel.cpp
@ -11,7 +11,6 @@
 #include <string>
 #include <vector>
 #include <ctime>
-#include <iomanip>

 // trim whitespace from the beginning and end of a string
 static std::string trim(const std::string & str) {
@ -132,14 +131,14 @@ int main(int argc, char ** argv) {
    } else {
        // Output each line of the input params.prompts vector and copy to k_prompts
        int index = 0;
-        printf("\n\033[32mNow printing the external prompt file %s\033[0m\n\n", params.prompt_file);
+        printf("\n\033[32mNow printing the external prompt file %s\033[0m\n\n", params.prompt_file.c_str());

        std::vector<std::string> prompts = split_string(params.prompt, '\n');
        for (const auto& prompt : prompts) {
            k_prompts.resize(index + 1);
            k_prompts[index] = prompt;
            index++;
-            printf("%3d prompt: %s\n", index, prompt);
+            printf("%3d prompt: %s\n", index, prompt.c_str());
        }
    }

@ -272,7 +271,7 @@ int main(int argc, char ** argv) {
                    client.n_decoded = 0;
                    client.i_batch   = batch.n_tokens - 1;

-                    LOG_TEE("\033[1mClient %3d, seq %4d, started decoding ...\033[0m\n", client.id, client.seq_id);
+                    LOG_TEE("\033[31mClient %3d, seq %4d, started decoding ...\033[0m\n", client.id, client.seq_id);

                    g_seq_id += 1;

@ -399,7 +398,7 @@ int main(int argc, char ** argv) {
    print_date_time();

    LOG_TEE("\n%s: n_parallel = %d, n_sequences = %d, cont_batching = %d, system tokens = %d\n", __func__, n_clients, n_seq, cont_batching, n_tokens_system);
-    printf("external prompt file (if any): %s \n\n", params.prompt_file);
+    printf("external prompt file (if any): %s \n\n", params.prompt_file.c_str());

    LOG_TEE("Total prompt tokens: %6d, speed: %5.2f t/s\n", n_total_prompt, (double) (n_total_prompt              ) / (t_main_end - t_main_start) * 1e6);
    LOG_TEE("Total gen tokens:    %6d, speed: %5.2f t/s\n", n_total_gen,    (double) (n_total_gen                 ) / (t_main_end - t_main_start) * 1e6);
--- a/llama.cpp
+++ b/llama.cpp
@ -7587,14 +7587,14 @@ void llama_print_timings(struct llama_context * ctx) {
    const llama_timings timings = llama_get_timings(ctx);

    LLAMA_LOG_INFO("\n");
-    LLAMA_LOG_INFO("%s:        load time = %8.2f ms\n", __func__, timings.t_load_ms);
-    LLAMA_LOG_INFO("%s:      sample time = %8.2f ms / %5d runs   (%8.2f ms per token, %8.2f tokens per second)\n",
+    LLAMA_LOG_INFO("%s:        load time = %10.2f ms\n", __func__, timings.t_load_ms);
+    LLAMA_LOG_INFO("%s:      sample time = %10.2f ms / %5d runs   (%8.2f ms per token, %8.2f tokens per second)\n",
            __func__, timings.t_sample_ms, timings.n_sample, timings.t_sample_ms / timings.n_sample, 1e3 / timings.t_sample_ms * timings.n_sample);
-    LLAMA_LOG_INFO("%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
+    LLAMA_LOG_INFO("%s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
            __func__, timings.t_p_eval_ms, timings.n_p_eval, timings.t_p_eval_ms / timings.n_p_eval, 1e3 / timings.t_p_eval_ms * timings.n_p_eval);
-    LLAMA_LOG_INFO("%s:        eval time = %8.2f ms / %5d runs   (%8.2f ms per token, %8.2f tokens per second)\n",
+    LLAMA_LOG_INFO("%s:        eval time = %10.2f ms / %5d runs   (%8.2f ms per token, %8.2f tokens per second)\n",
            __func__, timings.t_eval_ms, timings.n_eval, timings.t_eval_ms / timings.n_eval, 1e3 / timings.t_eval_ms * timings.n_eval);
-    LLAMA_LOG_INFO("%s:       total time = %8.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms));
+    LLAMA_LOG_INFO("%s:       total time = %10.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms));
 }

 void llama_reset_timings(struct llama_context * ctx) {