parallel : add option to load external prompt file (#3416)

* Enable external file and add datestamp

* Add name of external file at end

* Upload ToK2024

* Delete ToK2024.txt

* Experiments with jeopardy

* Move ParallelQuestions to /proimpts and rename

* Interim commit

* Interim commit

* Final revision

* Remove trailing whitespace

* remove cmake_all.sh

* Remove cmake_all.sh

* Changed .gitignore

* Improved reporting and new question files.

* Corrected typo

* More LLM questions

* Update LLM-questions.txt

* Yet more LLM-questions

* Remove jeopardy results file

* Reinstate original jeopardy.sh

* Update examples/parallel/parallel.cpp

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
pudepiedj 2023-10-06 14:16:38 +01:00 committed by GitHub
parent 97af49fa39
commit a8777ad84e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 151 additions and 11 deletions

View file

@ -8219,14 +8219,14 @@ void llama_print_timings(struct llama_context * ctx) {
const llama_timings timings = llama_get_timings(ctx);
LLAMA_LOG_INFO("\n");
LLAMA_LOG_INFO("%s: load time = %8.2f ms\n", __func__, timings.t_load_ms);
LLAMA_LOG_INFO("%s: sample time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
LLAMA_LOG_INFO("%s: load time = %10.2f ms\n", __func__, timings.t_load_ms);
LLAMA_LOG_INFO("%s: sample time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
__func__, timings.t_sample_ms, timings.n_sample, timings.t_sample_ms / timings.n_sample, 1e3 / timings.t_sample_ms * timings.n_sample);
LLAMA_LOG_INFO("%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
LLAMA_LOG_INFO("%s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
__func__, timings.t_p_eval_ms, timings.n_p_eval, timings.t_p_eval_ms / timings.n_p_eval, 1e3 / timings.t_p_eval_ms * timings.n_p_eval);
LLAMA_LOG_INFO("%s: eval time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
LLAMA_LOG_INFO("%s: eval time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
__func__, timings.t_eval_ms, timings.n_eval, timings.t_eval_ms / timings.n_eval, 1e3 / timings.t_eval_ms * timings.n_eval);
LLAMA_LOG_INFO("%s: total time = %8.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms));
LLAMA_LOG_INFO("%s: total time = %10.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms));
}
void llama_reset_timings(struct llama_context * ctx) {