Enable external file and add datestamp

This commit is contained in:
pudepiedj 2023-09-30 16:35:28 +01:00
parent 40e07a60f9
commit 2a5c27053e
5 changed files with 83 additions and 3 deletions

32
ParallelQuestions.txt Normal file
View file

@ -0,0 +1,32 @@
What do you know about Hobbits?
What is quantum field theory?
Why did the chicken cross the road?
Who is the president of the United States?
How do I run CMake on MacOS?
Do you agree that C++ is a really finicky language compared with Python3?
Is it a good idea to invest in technology?
Do you like Wagner's Ring?
Do you think this file input option is really neat?
What should we all do about climate change?
Is time-travel possible within the laws of current physics?
Is it like anything to be a bat?
Once the chicken has crossed the road, does it try to go back?
Who is the greatest of all musical composers?
What is art?
Is there life elsewhere in the universe?
What is intelligence?
What is the difference between knowledge and intelligence?
Will religion ever die?
Do we understand ourselves?
What is the best way to cook eggs?
If you cannot see things, on what basis do you evaluate them?
What is belief?
Is professional sport a good or bad influence on human behaviour?
Is capital punishment immoral?
Should we care about other people?
Who am I?
Which sense would you surrender if you could?
Was Henry Ford a hero or a villain?
Do we need leaders?
What is nucleosynthesis?
Who is the greatest scientist of all time so far?

6
cmake_all.sh Executable file
View file

@ -0,0 +1,6 @@
cd llama.cpp
rm -r build
cmake -B build
cd build
cmake --build . --config Release
cd ..

View file

@ -167,6 +167,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
invalid_param = true;
break;
}
// store the external file name in params
params.prompt_file = argv[i];
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
if (params.prompt.back() == '\n') {
params.prompt.pop_back();

View file

@ -79,6 +79,7 @@ struct gpt_params {
std::string model_draft = ""; // draft model for speculative decoding
std::string model_alias = "unknown"; // model alias
std::string prompt = "";
std::string prompt_file = ""; // store for external prompt file name
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
std::string input_prefix = ""; // string to prefix user inputs with
std::string input_suffix = ""; // string to suffix user inputs with

View file

@ -10,6 +10,8 @@
#include <cstdio>
#include <string>
#include <vector>
#include <ctime>
#include <iomanip>
// trim whitespace from the beginning and end of a string
static std::string trim(const std::string & str) {
@ -70,6 +72,22 @@ struct client {
std::vector<llama_token> tokens_prev;
};
static void printDateTime() {
std::time_t currentTime = std::time(nullptr);
std::cout << "\n\033[35mRUN PARAMETERS as at \033[0m" << std::ctime(&currentTime);
}
// Define a split string function to ...
static std::vector<std::string> splitString(const std::string& input, char delimiter) {
std::vector<std::string> tokens;
std::istringstream stream(input);
std::string token;
while (std::getline(stream, token, delimiter)) {
tokens.push_back(token);
}
return tokens;
}
int main(int argc, char ** argv) {
srand(1234);
@ -104,6 +122,23 @@ int main(int argc, char ** argv) {
params.logits_all = true;
std::tie(model, ctx) = llama_init_from_gpt_params(params);
// load the prompts from an external file if there are any
if (params.prompt.empty()) {
std::cout << "\n\033[32mNo new questions so proceed with build-in defaults.\033[0m";
} else {
// Output each line of the input params.prompts vector and copy to k_prompts
int index = 0;
std::cout << "\n\033[32mNow printing the external prompt file " << params.prompt_file << "\033[0m\n\n";
std::vector<std::string> prompts = splitString(params.prompt, '\n');
for (const auto& prompt : prompts) {
k_prompts.resize(index + 1);
k_prompts[index] = prompt;
index++;
std::cout << std::setw(3) << std::right << index << " prompt: " << prompt << std::endl;
}
}
fprintf(stderr, "\n\n");
fflush(stderr);
@ -336,8 +371,8 @@ int main(int argc, char ** argv) {
const auto t_main_end = ggml_time_us();
LOG_TEE("\033[1mClient %3d, seq %4d, prompt %4d t, response %4d t, time %5.2f s, speed %5.2f t/s, cache miss %d \033[0m \n\nInput: %s\nResponse: %s\n\n",
client.id, client.seq_id, client.n_prompt, client.n_decoded,
LOG_TEE("\033[31mClient %3d, seq %3d/%3d, prompt %4d t, response %4d t, time %5.2f s, speed %5.2f t/s, cache miss %d \033[0m \nInput: %s\n\033[35mResponse: %s\033[0m\n\n",
client.id, client.seq_id, n_seq, client.n_prompt, client.n_decoded,
(t_main_end - client.t_start_prompt) / 1e6,
(double) (client.n_prompt + client.n_decoded) / (t_main_end - client.t_start_prompt) * 1e6,
n_cache_miss,
@ -357,7 +392,11 @@ int main(int argc, char ** argv) {
const auto t_main_end = ggml_time_us();
LOG_TEE("\n\n");
printDateTime();
LOG_TEE("\n%s: n_parallel = %d, n_sequences = %d, cont_batching = %d, system tokens = %d\n", __func__, n_clients, n_seq, cont_batching, n_tokens_system);
LOG_TEE("\n");
LOG_TEE("Total prompt tokens: %6d, speed: %5.2f t/s\n", n_total_prompt, (double) (n_total_prompt ) / (t_main_end - t_main_start) * 1e6);
LOG_TEE("Total gen tokens: %6d, speed: %5.2f t/s\n", n_total_gen, (double) (n_total_gen ) / (t_main_end - t_main_start) * 1e6);
LOG_TEE("Total speed (AVG): %6s speed: %5.2f t/s\n", "", (double) (n_total_prompt + n_total_gen) / (t_main_end - t_main_start) * 1e6);