Interim commit
This commit is contained in:
parent
51196a44dc
commit
2e3dad3a9c
4 changed files with 11 additions and 42 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -28,6 +28,8 @@ build*/
|
||||||
out/
|
out/
|
||||||
tmp/
|
tmp/
|
||||||
|
|
||||||
|
cmake-all.sh
|
||||||
|
|
||||||
models/*
|
models/*
|
||||||
models-mnt
|
models-mnt
|
||||||
|
|
||||||
|
|
|
@ -1,32 +0,0 @@
|
||||||
What do you know about Hobbits?
|
|
||||||
What is quantum field theory?
|
|
||||||
Why did the chicken cross the road?
|
|
||||||
Who is the president of the United States?
|
|
||||||
How do I run CMake on MacOS?
|
|
||||||
Do you agree that C++ is a really finicky language compared with Python3?
|
|
||||||
Is it a good idea to invest in technology?
|
|
||||||
Do you like Wagner's Ring?
|
|
||||||
Do you think this file input option is really neat?
|
|
||||||
What should we all do about climate change?
|
|
||||||
Is time-travel possible within the laws of current physics?
|
|
||||||
Is it like anything to be a bat?
|
|
||||||
Once the chicken has crossed the road, does it try to go back?
|
|
||||||
Who is the greatest of all musical composers?
|
|
||||||
What is art?
|
|
||||||
Is there life elsewhere in the universe?
|
|
||||||
What is intelligence?
|
|
||||||
What is the difference between knowledge and intelligence?
|
|
||||||
Will religion ever die?
|
|
||||||
Do we understand ourselves?
|
|
||||||
What is the best way to cook eggs?
|
|
||||||
If you cannot see things, on what basis do you evaluate them?
|
|
||||||
Explain the role of the np junction in photovoltaic cells?
|
|
||||||
Is professional sport a good or bad influence on human behaviour?
|
|
||||||
Is capital punishment immoral?
|
|
||||||
Should we care about other people?
|
|
||||||
Who are you?
|
|
||||||
Which sense would you surrender if you could?
|
|
||||||
Was Henry Ford a hero or a villain?
|
|
||||||
Do we need leaders?
|
|
||||||
What is nucleosynthesis?
|
|
||||||
Who is the greatest scientist of all time so far?
|
|
|
@ -11,7 +11,6 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <ctime>
|
#include <ctime>
|
||||||
#include <iomanip>
|
|
||||||
|
|
||||||
// trim whitespace from the beginning and end of a string
|
// trim whitespace from the beginning and end of a string
|
||||||
static std::string trim(const std::string & str) {
|
static std::string trim(const std::string & str) {
|
||||||
|
@ -132,14 +131,14 @@ int main(int argc, char ** argv) {
|
||||||
} else {
|
} else {
|
||||||
// Output each line of the input params.prompts vector and copy to k_prompts
|
// Output each line of the input params.prompts vector and copy to k_prompts
|
||||||
int index = 0;
|
int index = 0;
|
||||||
printf("\n\033[32mNow printing the external prompt file %s\033[0m\n\n", params.prompt_file);
|
printf("\n\033[32mNow printing the external prompt file %s\033[0m\n\n", params.prompt_file.c_str());
|
||||||
|
|
||||||
std::vector<std::string> prompts = split_string(params.prompt, '\n');
|
std::vector<std::string> prompts = split_string(params.prompt, '\n');
|
||||||
for (const auto& prompt : prompts) {
|
for (const auto& prompt : prompts) {
|
||||||
k_prompts.resize(index + 1);
|
k_prompts.resize(index + 1);
|
||||||
k_prompts[index] = prompt;
|
k_prompts[index] = prompt;
|
||||||
index++;
|
index++;
|
||||||
printf("%3d prompt: %s\n", index, prompt);
|
printf("%3d prompt: %s\n", index, prompt.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -272,7 +271,7 @@ int main(int argc, char ** argv) {
|
||||||
client.n_decoded = 0;
|
client.n_decoded = 0;
|
||||||
client.i_batch = batch.n_tokens - 1;
|
client.i_batch = batch.n_tokens - 1;
|
||||||
|
|
||||||
LOG_TEE("\033[1mClient %3d, seq %4d, started decoding ...\033[0m\n", client.id, client.seq_id);
|
LOG_TEE("\033[31mClient %3d, seq %4d, started decoding ...\033[0m\n", client.id, client.seq_id);
|
||||||
|
|
||||||
g_seq_id += 1;
|
g_seq_id += 1;
|
||||||
|
|
||||||
|
@ -399,7 +398,7 @@ int main(int argc, char ** argv) {
|
||||||
print_date_time();
|
print_date_time();
|
||||||
|
|
||||||
LOG_TEE("\n%s: n_parallel = %d, n_sequences = %d, cont_batching = %d, system tokens = %d\n", __func__, n_clients, n_seq, cont_batching, n_tokens_system);
|
LOG_TEE("\n%s: n_parallel = %d, n_sequences = %d, cont_batching = %d, system tokens = %d\n", __func__, n_clients, n_seq, cont_batching, n_tokens_system);
|
||||||
printf("external prompt file (if any): %s \n\n", params.prompt_file);
|
printf("external prompt file (if any): %s \n\n", params.prompt_file.c_str());
|
||||||
|
|
||||||
LOG_TEE("Total prompt tokens: %6d, speed: %5.2f t/s\n", n_total_prompt, (double) (n_total_prompt ) / (t_main_end - t_main_start) * 1e6);
|
LOG_TEE("Total prompt tokens: %6d, speed: %5.2f t/s\n", n_total_prompt, (double) (n_total_prompt ) / (t_main_end - t_main_start) * 1e6);
|
||||||
LOG_TEE("Total gen tokens: %6d, speed: %5.2f t/s\n", n_total_gen, (double) (n_total_gen ) / (t_main_end - t_main_start) * 1e6);
|
LOG_TEE("Total gen tokens: %6d, speed: %5.2f t/s\n", n_total_gen, (double) (n_total_gen ) / (t_main_end - t_main_start) * 1e6);
|
||||||
|
|
10
llama.cpp
10
llama.cpp
|
@ -7587,14 +7587,14 @@ void llama_print_timings(struct llama_context * ctx) {
|
||||||
const llama_timings timings = llama_get_timings(ctx);
|
const llama_timings timings = llama_get_timings(ctx);
|
||||||
|
|
||||||
LLAMA_LOG_INFO("\n");
|
LLAMA_LOG_INFO("\n");
|
||||||
LLAMA_LOG_INFO("%s: load time = %8.2f ms\n", __func__, timings.t_load_ms);
|
LLAMA_LOG_INFO("%s: load time = %10.2f ms\n", __func__, timings.t_load_ms);
|
||||||
LLAMA_LOG_INFO("%s: sample time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
|
LLAMA_LOG_INFO("%s: sample time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
|
||||||
__func__, timings.t_sample_ms, timings.n_sample, timings.t_sample_ms / timings.n_sample, 1e3 / timings.t_sample_ms * timings.n_sample);
|
__func__, timings.t_sample_ms, timings.n_sample, timings.t_sample_ms / timings.n_sample, 1e3 / timings.t_sample_ms * timings.n_sample);
|
||||||
LLAMA_LOG_INFO("%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
|
LLAMA_LOG_INFO("%s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
|
||||||
__func__, timings.t_p_eval_ms, timings.n_p_eval, timings.t_p_eval_ms / timings.n_p_eval, 1e3 / timings.t_p_eval_ms * timings.n_p_eval);
|
__func__, timings.t_p_eval_ms, timings.n_p_eval, timings.t_p_eval_ms / timings.n_p_eval, 1e3 / timings.t_p_eval_ms * timings.n_p_eval);
|
||||||
LLAMA_LOG_INFO("%s: eval time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
|
LLAMA_LOG_INFO("%s: eval time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
|
||||||
__func__, timings.t_eval_ms, timings.n_eval, timings.t_eval_ms / timings.n_eval, 1e3 / timings.t_eval_ms * timings.n_eval);
|
__func__, timings.t_eval_ms, timings.n_eval, timings.t_eval_ms / timings.n_eval, 1e3 / timings.t_eval_ms * timings.n_eval);
|
||||||
LLAMA_LOG_INFO("%s: total time = %8.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms));
|
LLAMA_LOG_INFO("%s: total time = %10.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms));
|
||||||
}
|
}
|
||||||
|
|
||||||
void llama_reset_timings(struct llama_context * ctx) {
|
void llama_reset_timings(struct llama_context * ctx) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue