diff --git a/.gitignore b/.gitignore index a552139f1..c63e01fb3 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,8 @@ build*/ out/ tmp/ +cmake-all.sh + models/* models-mnt diff --git a/ParallelQuestions.txt b/ParallelQuestions.txt deleted file mode 100644 index e946f209b..000000000 --- a/ParallelQuestions.txt +++ /dev/null @@ -1,32 +0,0 @@ -What do you know about Hobbits? -What is quantum field theory? -Why did the chicken cross the road? -Who is the president of the United States? -How do I run CMake on MacOS? -Do you agree that C++ is a really finicky language compared with Python3? -Is it a good idea to invest in technology? -Do you like Wagner's Ring? -Do you think this file input option is really neat? -What should we all do about climate change? -Is time-travel possible within the laws of current physics? -Is it like anything to be a bat? -Once the chicken has crossed the road, does it try to go back? -Who is the greatest of all musical composers? -What is art? -Is there life elsewhere in the universe? -What is intelligence? -What is the difference between knowledge and intelligence? -Will religion ever die? -Do we understand ourselves? -What is the best way to cook eggs? -If you cannot see things, on what basis do you evaluate them? -Explain the role of the np junction in photovoltaic cells? -Is professional sport a good or bad influence on human behaviour? -Is capital punishment immoral? -Should we care about other people? -Who are you? -Which sense would you surrender if you could? -Was Henry Ford a hero or a villain? -Do we need leaders? -What is nucleosynthesis? -Who is the greatest scientist of all time so far? \ No newline at end of file diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp index a08847224..e891be7da 100644 --- a/examples/parallel/parallel.cpp +++ b/examples/parallel/parallel.cpp @@ -11,7 +11,6 @@ #include #include #include -#include // trim whitespace from the beginning and end of a string static std::string trim(const std::string & str) { @@ -132,14 +131,14 @@ int main(int argc, char ** argv) { } else { // Output each line of the input params.prompts vector and copy to k_prompts int index = 0; - printf("\n\033[32mNow printing the external prompt file %s\033[0m\n\n", params.prompt_file); + printf("\n\033[32mNow printing the external prompt file %s\033[0m\n\n", params.prompt_file.c_str()); std::vector prompts = split_string(params.prompt, '\n'); for (const auto& prompt : prompts) { k_prompts.resize(index + 1); k_prompts[index] = prompt; index++; - printf("%3d prompt: %s\n", index, prompt); + printf("%3d prompt: %s\n", index, prompt.c_str()); } } @@ -272,7 +271,7 @@ int main(int argc, char ** argv) { client.n_decoded = 0; client.i_batch = batch.n_tokens - 1; - LOG_TEE("\033[1mClient %3d, seq %4d, started decoding ...\033[0m\n", client.id, client.seq_id); + LOG_TEE("\033[31mClient %3d, seq %4d, started decoding ...\033[0m\n", client.id, client.seq_id); g_seq_id += 1; @@ -399,7 +398,7 @@ int main(int argc, char ** argv) { print_date_time(); LOG_TEE("\n%s: n_parallel = %d, n_sequences = %d, cont_batching = %d, system tokens = %d\n", __func__, n_clients, n_seq, cont_batching, n_tokens_system); - printf("external prompt file (if any): %s \n\n", params.prompt_file); + printf("external prompt file (if any): %s \n\n", params.prompt_file.c_str()); LOG_TEE("Total prompt tokens: %6d, speed: %5.2f t/s\n", n_total_prompt, (double) (n_total_prompt ) / (t_main_end - t_main_start) * 1e6); LOG_TEE("Total gen tokens: %6d, speed: %5.2f t/s\n", n_total_gen, (double) (n_total_gen ) / (t_main_end - t_main_start) * 1e6); diff --git a/llama.cpp b/llama.cpp index 05b570bd1..1eb2be40f 100644 --- a/llama.cpp +++ b/llama.cpp @@ -7587,14 +7587,14 @@ void llama_print_timings(struct llama_context * ctx) { const llama_timings timings = llama_get_timings(ctx); LLAMA_LOG_INFO("\n"); - LLAMA_LOG_INFO("%s: load time = %8.2f ms\n", __func__, timings.t_load_ms); - LLAMA_LOG_INFO("%s: sample time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n", + LLAMA_LOG_INFO("%s: load time = %10.2f ms\n", __func__, timings.t_load_ms); + LLAMA_LOG_INFO("%s: sample time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n", __func__, timings.t_sample_ms, timings.n_sample, timings.t_sample_ms / timings.n_sample, 1e3 / timings.t_sample_ms * timings.n_sample); - LLAMA_LOG_INFO("%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n", + LLAMA_LOG_INFO("%s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n", __func__, timings.t_p_eval_ms, timings.n_p_eval, timings.t_p_eval_ms / timings.n_p_eval, 1e3 / timings.t_p_eval_ms * timings.n_p_eval); - LLAMA_LOG_INFO("%s: eval time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n", + LLAMA_LOG_INFO("%s: eval time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n", __func__, timings.t_eval_ms, timings.n_eval, timings.t_eval_ms / timings.n_eval, 1e3 / timings.t_eval_ms * timings.n_eval); - LLAMA_LOG_INFO("%s: total time = %8.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms)); + LLAMA_LOG_INFO("%s: total time = %10.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms)); } void llama_reset_timings(struct llama_context * ctx) {