Add timing/ETA

This commit is contained in:
Gary Linscott 2023-03-21 07:29:23 -07:00
parent 2f8ab68d72
commit 35ae689f78

View file

@ -801,10 +801,16 @@ void perplexity(const gpt_vocab &vocab, const llama_model &model, const gpt_para
int end = start + params.n_ctx - 1;
std::vector<gpt_vocab::id> embd(tokens.begin() + start, tokens.begin() + end);
std::vector<float> logits;
auto start_t = std::chrono::high_resolution_clock::now();
if (!llama_eval(model, params.n_threads, 0, embd, logits, mem_per_token, true)) {
fprintf(stderr, "Failed to predict\n");
return;
}
auto end_t = std::chrono::high_resolution_clock::now();
if (i == 0) {
double seconds = std::chrono::duration<double>(end_t - start_t).count();
printf("%.2f seconds per pass - ETA %.2f hours\n", seconds, (seconds * seq_count) / (60.0*60.0));
}
// We get the logits for all the tokens in the context window (params.n_ctx)
// from llama_eval above. Now, based on https://huggingface.co/docs/transformers/perplexity,
// calculate the perplexity over the last half the window (so the model always has