Add timing/ETA

2023-03-21 07:29:23 -07:00 · 2023-03-21 07:29:23 -07:00 · 35ae689f78
commit 35ae689f78
parent 2f8ab68d72
1 changed files with 6 additions and 0 deletions
--- a/main.cpp
+++ b/main.cpp
@ -801,10 +801,16 @@ void perplexity(const gpt_vocab &vocab, const llama_model &model, const gpt_para
        int end = start + params.n_ctx - 1;
        std::vector<gpt_vocab::id> embd(tokens.begin() + start, tokens.begin() + end);
        std::vector<float> logits;
        auto start_t = std::chrono::high_resolution_clock::now();
        if (!llama_eval(model, params.n_threads, 0, embd, logits, mem_per_token, true)) {
            fprintf(stderr, "Failed to predict\n");
            return;
        }
        auto end_t = std::chrono::high_resolution_clock::now();
        if (i == 0) {
            double seconds = std::chrono::duration<double>(end_t - start_t).count();
            printf("%.2f seconds per pass - ETA %.2f hours\n", seconds, (seconds * seq_count) / (60.0*60.0));
        }
        // We get the logits for all the tokens in the context window (params.n_ctx)
        // from llama_eval above.  Now, based on https://huggingface.co/docs/transformers/perplexity,
        // calculate the perplexity over the last half the window (so the model always has