Enable external file and add datestamp

2023-09-30 16:35:28 +01:00 · 2023-09-30 16:35:28 +01:00 · 2a5c27053e
commit 2a5c27053e
parent 40e07a60f9
5 changed files with 83 additions and 3 deletions
--- a/ParallelQuestions.txt
+++ b/ParallelQuestions.txt
@ -0,0 +1,32 @@
+What do you know about Hobbits?
+What is quantum field theory?
+Why did the chicken cross the road?
+Who is the president of the United States?
+How do I run CMake on MacOS?
+Do you agree that C++ is a really finicky language compared with Python3?
+Is it a good idea to invest in technology?
+Do you like Wagner's Ring?
+Do you think this file input option is really neat?
+What should we all do about climate change?
+Is time-travel possible within the laws of current physics?
+Is it like anything to be a bat?
+Once the chicken has crossed the road, does it try to go back?
+Who is the greatest of all musical composers?
+What is art?
+Is there life elsewhere in the universe?
+What is intelligence?
+What is the difference between knowledge and intelligence?
+Will religion ever die?
+Do we understand ourselves?
+What is the best way to cook eggs?
+If you cannot see things, on what basis do you evaluate them?
+What is belief?
+Is professional sport a good or bad influence on human behaviour?
+Is capital punishment immoral?
+Should we care about other people?
+Who am I?
+Which sense would you surrender if you could?
+Was Henry Ford a hero or a villain?
+Do we need leaders?
+What is nucleosynthesis?
+Who is the greatest scientist of all time so far?
--- a/cmake_all.sh
+++ b/cmake_all.sh
@ -0,0 +1,6 @@
+cd llama.cpp
+rm -r build
+cmake -B build
+cd build
+cmake --build . --config Release
+cd ..
--- a/common/common.cpp
+++ b/common/common.cpp
@ -167,6 +167,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
                invalid_param = true;
                break;
            }
+            // store the external file name in params
+            params.prompt_file = argv[i];
            std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
            if (params.prompt.back() == '\n') {
                params.prompt.pop_back();
--- a/common/common.h
+++ b/common/common.h
@ -79,6 +79,7 @@ struct gpt_params {
    std::string model_draft       = "";                              // draft model for speculative decoding
    std::string model_alias       = "unknown"; // model alias
    std::string prompt            = "";
+    std::string prompt_file       = "";  // store for external prompt file name
    std::string path_prompt_cache = "";  // path to file for saving/loading prompt eval state
    std::string input_prefix      = "";  // string to prefix user inputs with
    std::string input_suffix      = "";  // string to suffix user inputs with
--- a/examples/parallel/parallel.cpp
+++ b/examples/parallel/parallel.cpp
@ -10,6 +10,8 @@
 #include <cstdio>
 #include <string>
 #include <vector>
+#include <ctime>
+#include <iomanip>

 // trim whitespace from the beginning and end of a string
 static std::string trim(const std::string & str) {
@ -70,6 +72,22 @@ struct client {
    std::vector<llama_token> tokens_prev;
 };

+static void printDateTime() {
+    std::time_t currentTime = std::time(nullptr);
+    std::cout << "\n\033[35mRUN PARAMETERS as at \033[0m" << std::ctime(&currentTime);
+}
+
+// Define a split string function to ...
+static std::vector<std::string> splitString(const std::string& input, char delimiter) {
+    std::vector<std::string> tokens;
+    std::istringstream stream(input);
+    std::string token;
+    while (std::getline(stream, token, delimiter)) {
+        tokens.push_back(token);
+    }
+    return tokens;
+}
+
 int main(int argc, char ** argv) {
    srand(1234);

@ -104,6 +122,23 @@ int main(int argc, char ** argv) {
    params.logits_all = true;
    std::tie(model, ctx) = llama_init_from_gpt_params(params);

+    // load the prompts from an external file if there are any
+    if (params.prompt.empty()) {
+        std::cout << "\n\033[32mNo new questions so proceed with build-in defaults.\033[0m";
+    } else {
+        // Output each line of the input params.prompts vector and copy to k_prompts
+        int index = 0;
+        std::cout << "\n\033[32mNow printing the external prompt file " << params.prompt_file << "\033[0m\n\n";
+
+        std::vector<std::string> prompts = splitString(params.prompt, '\n');
+        for (const auto& prompt : prompts) {
+            k_prompts.resize(index + 1);
+            k_prompts[index] = prompt;
+            index++;
+            std::cout << std::setw(3) << std::right << index << " prompt: " << prompt << std::endl;
+        }
+    }
+
    fprintf(stderr, "\n\n");
    fflush(stderr);

@ -336,8 +371,8 @@ int main(int argc, char ** argv) {

                    const auto t_main_end = ggml_time_us();

-                    LOG_TEE("\033[1mClient %3d, seq %4d, prompt %4d t, response %4d t, time %5.2f s, speed %5.2f t/s, cache miss %d \033[0m \n\nInput:    %s\nResponse: %s\n\n",
-                            client.id, client.seq_id, client.n_prompt, client.n_decoded,
+                    LOG_TEE("\033[31mClient %3d, seq %3d/%3d, prompt %4d t, response %4d t, time %5.2f s, speed %5.2f t/s, cache miss %d \033[0m \nInput:    %s\n\033[35mResponse: %s\033[0m\n\n",
+                            client.id, client.seq_id, n_seq, client.n_prompt, client.n_decoded,
                            (t_main_end - client.t_start_prompt) / 1e6,
                            (double) (client.n_prompt + client.n_decoded) / (t_main_end - client.t_start_prompt) * 1e6,
                            n_cache_miss,
@ -357,7 +392,11 @@ int main(int argc, char ** argv) {

    const auto t_main_end = ggml_time_us();

-    LOG_TEE("\n\n");
+    printDateTime();
+
+    LOG_TEE("\n%s: n_parallel = %d, n_sequences = %d, cont_batching = %d, system tokens = %d\n", __func__, n_clients, n_seq, cont_batching, n_tokens_system);
+    LOG_TEE("\n");
+
    LOG_TEE("Total prompt tokens: %6d, speed: %5.2f t/s\n", n_total_prompt, (double) (n_total_prompt              ) / (t_main_end - t_main_start) * 1e6);
    LOG_TEE("Total gen tokens:    %6d, speed: %5.2f t/s\n", n_total_gen,    (double) (n_total_gen                 ) / (t_main_end - t_main_start) * 1e6);
    LOG_TEE("Total speed (AVG):   %6s  speed: %5.2f t/s\n", "",             (double) (n_total_prompt + n_total_gen) / (t_main_end - t_main_start) * 1e6);