From 2a5c27053ea8149f7a87d0b7b8626dbe841bd015 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Sat, 30 Sep 2023 16:35:28 +0100 Subject: [PATCH] Enable external file and add datestamp --- ParallelQuestions.txt | 32 ++++++++++++++++++++++++ cmake_all.sh | 6 +++++ common/common.cpp | 2 ++ common/common.h | 1 + examples/parallel/parallel.cpp | 45 +++++++++++++++++++++++++++++++--- 5 files changed, 83 insertions(+), 3 deletions(-) create mode 100644 ParallelQuestions.txt create mode 100755 cmake_all.sh diff --git a/ParallelQuestions.txt b/ParallelQuestions.txt new file mode 100644 index 000000000..3f953faad --- /dev/null +++ b/ParallelQuestions.txt @@ -0,0 +1,32 @@ +What do you know about Hobbits? +What is quantum field theory? +Why did the chicken cross the road? +Who is the president of the United States? +How do I run CMake on MacOS? +Do you agree that C++ is a really finicky language compared with Python3? +Is it a good idea to invest in technology? +Do you like Wagner's Ring? +Do you think this file input option is really neat? +What should we all do about climate change? +Is time-travel possible within the laws of current physics? +Is it like anything to be a bat? +Once the chicken has crossed the road, does it try to go back? +Who is the greatest of all musical composers? +What is art? +Is there life elsewhere in the universe? +What is intelligence? +What is the difference between knowledge and intelligence? +Will religion ever die? +Do we understand ourselves? +What is the best way to cook eggs? +If you cannot see things, on what basis do you evaluate them? +What is belief? +Is professional sport a good or bad influence on human behaviour? +Is capital punishment immoral? +Should we care about other people? +Who am I? +Which sense would you surrender if you could? +Was Henry Ford a hero or a villain? +Do we need leaders? +What is nucleosynthesis? +Who is the greatest scientist of all time so far? \ No newline at end of file diff --git a/cmake_all.sh b/cmake_all.sh new file mode 100755 index 000000000..4b94a4eec --- /dev/null +++ b/cmake_all.sh @@ -0,0 +1,6 @@ +cd llama.cpp +rm -r build +cmake -B build +cd build +cmake --build . --config Release +cd .. \ No newline at end of file diff --git a/common/common.cpp b/common/common.cpp index ec181c6b3..e0bf65cb2 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -167,6 +167,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { invalid_param = true; break; } + // store the external file name in params + params.prompt_file = argv[i]; std::copy(std::istreambuf_iterator(file), std::istreambuf_iterator(), back_inserter(params.prompt)); if (params.prompt.back() == '\n') { params.prompt.pop_back(); diff --git a/common/common.h b/common/common.h index 0e2d3fa6c..cb40fd365 100644 --- a/common/common.h +++ b/common/common.h @@ -79,6 +79,7 @@ struct gpt_params { std::string model_draft = ""; // draft model for speculative decoding std::string model_alias = "unknown"; // model alias std::string prompt = ""; + std::string prompt_file = ""; // store for external prompt file name std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state std::string input_prefix = ""; // string to prefix user inputs with std::string input_suffix = ""; // string to suffix user inputs with diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp index 0434ded23..061fd3586 100644 --- a/examples/parallel/parallel.cpp +++ b/examples/parallel/parallel.cpp @@ -10,6 +10,8 @@ #include #include #include +#include +#include // trim whitespace from the beginning and end of a string static std::string trim(const std::string & str) { @@ -70,6 +72,22 @@ struct client { std::vector tokens_prev; }; +static void printDateTime() { + std::time_t currentTime = std::time(nullptr); + std::cout << "\n\033[35mRUN PARAMETERS as at \033[0m" << std::ctime(¤tTime); +} + +// Define a split string function to ... +static std::vector splitString(const std::string& input, char delimiter) { + std::vector tokens; + std::istringstream stream(input); + std::string token; + while (std::getline(stream, token, delimiter)) { + tokens.push_back(token); + } + return tokens; +} + int main(int argc, char ** argv) { srand(1234); @@ -104,6 +122,23 @@ int main(int argc, char ** argv) { params.logits_all = true; std::tie(model, ctx) = llama_init_from_gpt_params(params); + // load the prompts from an external file if there are any + if (params.prompt.empty()) { + std::cout << "\n\033[32mNo new questions so proceed with build-in defaults.\033[0m"; + } else { + // Output each line of the input params.prompts vector and copy to k_prompts + int index = 0; + std::cout << "\n\033[32mNow printing the external prompt file " << params.prompt_file << "\033[0m\n\n"; + + std::vector prompts = splitString(params.prompt, '\n'); + for (const auto& prompt : prompts) { + k_prompts.resize(index + 1); + k_prompts[index] = prompt; + index++; + std::cout << std::setw(3) << std::right << index << " prompt: " << prompt << std::endl; + } + } + fprintf(stderr, "\n\n"); fflush(stderr); @@ -336,8 +371,8 @@ int main(int argc, char ** argv) { const auto t_main_end = ggml_time_us(); - LOG_TEE("\033[1mClient %3d, seq %4d, prompt %4d t, response %4d t, time %5.2f s, speed %5.2f t/s, cache miss %d \033[0m \n\nInput: %s\nResponse: %s\n\n", - client.id, client.seq_id, client.n_prompt, client.n_decoded, + LOG_TEE("\033[31mClient %3d, seq %3d/%3d, prompt %4d t, response %4d t, time %5.2f s, speed %5.2f t/s, cache miss %d \033[0m \nInput: %s\n\033[35mResponse: %s\033[0m\n\n", + client.id, client.seq_id, n_seq, client.n_prompt, client.n_decoded, (t_main_end - client.t_start_prompt) / 1e6, (double) (client.n_prompt + client.n_decoded) / (t_main_end - client.t_start_prompt) * 1e6, n_cache_miss, @@ -357,7 +392,11 @@ int main(int argc, char ** argv) { const auto t_main_end = ggml_time_us(); - LOG_TEE("\n\n"); + printDateTime(); + + LOG_TEE("\n%s: n_parallel = %d, n_sequences = %d, cont_batching = %d, system tokens = %d\n", __func__, n_clients, n_seq, cont_batching, n_tokens_system); + LOG_TEE("\n"); + LOG_TEE("Total prompt tokens: %6d, speed: %5.2f t/s\n", n_total_prompt, (double) (n_total_prompt ) / (t_main_end - t_main_start) * 1e6); LOG_TEE("Total gen tokens: %6d, speed: %5.2f t/s\n", n_total_gen, (double) (n_total_gen ) / (t_main_end - t_main_start) * 1e6); LOG_TEE("Total speed (AVG): %6s speed: %5.2f t/s\n", "", (double) (n_total_prompt + n_total_gen) / (t_main_end - t_main_start) * 1e6);