log distribution after prompt tokens

2023-03-16 18:58:59 +01:00 · 2023-03-16 18:58:59 +01:00 · 03755743cf
commit 03755743cf
parent 4547848743
3 changed files with 26 additions and 0 deletions
--- a/main.cpp
+++ b/main.cpp
@ -1017,6 +1017,11 @@ int main(int argc, char ** argv) {
            // decrement remaining sampling budget
            --remaining_tokens;
        } else {
            if(log_file) {
                const int n_vocab = model.hparams.n_vocab;
                const float temp  = params.temp;
                print_output(vocab, logits.data() + (logits.size() - n_vocab), temp);
            }
            // some user input remains from prompt or interaction, forward it to processing
            while (embd_inp.size() > input_consumed) {
                embd.push_back(embd_inp[input_consumed]);
--- a/utils.cpp
+++ b/utils.cpp
@ -649,6 +649,21 @@ gpt_vocab::id sample_top_k_top_p(
    return sampled_tok_id;
 }
 gpt_vocab::id print_output(
        const gpt_vocab & vocab,
        const float * logits,
        double temp) {
    SoftMaxSampler probs;
    probs.reset(vocab, logits, temp);
    probs.top_k_sort();
    probs.soft_max();
    probs.print(log_file, vocab, logits, 16);
    return probs.top();
 }
 size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t * hist) {
    const int nb = k / qk;
--- a/utils.h
+++ b/utils.h
@ -106,6 +106,12 @@ gpt_vocab::id sample_top_k_top_p(
        double temp,
        std::mt19937 & rng);
 // Print would-be output after prompt samples
 gpt_vocab::id print_output(
        const gpt_vocab & vocab,
        const float * logits,
        double temp);
 //
 // Quantization
 //