Strided perplexity (#2714)

* Implementing strided computation of perplexity * Alternative way to output PPL results --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
2023-08-23 12:56:42 +03:00 · 2023-08-23 12:56:42 +03:00 · 62959e740e
commit 62959e740e
parent 7f7ddd5002
3 changed files with 141 additions and 1 deletions
--- a/common/common.h
+++ b/common/common.h
@ -64,6 +64,10 @@ struct gpt_params {
    std::string lora_adapter = "";  // lora adapter path
    std::string lora_base    = "";  // base model path for the lora adapter

+    int  ppl_stride        = 0;     // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
+    int  ppl_output_type   = 0;     // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line
+                                    //                                       (which is more convenient to use for plotting)
+                                    //
    bool hellaswag         = false; // compute HellaSwag score over random tasks from datafile supplied in prompt
    size_t hellaswag_tasks = 400;   // number of tasks to use when computing the HellaSwag score