From 156d70b82b607102876603f6902209460936a2f7 Mon Sep 17 00:00:00 2001 From: KerfuffleV2 Date: Thu, 25 May 2023 00:00:54 -0600 Subject: [PATCH] Always set RNG seed when restoring cached prompt in main example. Add a note in the main example README about how restoring a prompt doesn't imply restoring the exact session state. --- examples/main/README.md | 2 +- examples/main/main.cpp | 10 +--------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/examples/main/README.md b/examples/main/README.md index 7c03f92c8..e71ba6173 100644 --- a/examples/main/README.md +++ b/examples/main/README.md @@ -272,7 +272,7 @@ These options help improve the performance and memory usage of the LLaMA models. ### Prompt Caching -- `--prompt-cache FNAME`: Specify a file to cache the model state after the initial prompt. This can significantly speed up the startup time when you're using longer prompts. The file is created during the first run and is reused and updated in subsequent runs. +- `--prompt-cache FNAME`: Specify a file to cache the model state after the initial prompt. This can significantly speed up the startup time when you're using longer prompts. The file is created during the first run and is reused and updated in subsequent runs. **Note**: Restoring a cached prompt does not imply restoring the exact state of the session at the point it was saved. So even when specifying a specific seed, you are not guaranteed to get the same sequence of tokens as the original generation. ### Quantization diff --git a/examples/main/main.cpp b/examples/main/main.cpp index eeb20162a..c7c591537 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -85,9 +85,6 @@ int main(int argc, char ** argv) { fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); - // Save the initial seed parameter before overwriting it so it's possible to determine whether - // the user supplied a seed or not. This is useful when loading saved sessions. - int32_t initial_seed = params.seed; if (params.seed < 0) { params.seed = time(NULL); } @@ -156,12 +153,7 @@ int main(int argc, char ** argv) { return 1; } session_tokens.resize(n_token_count_out); - if (initial_seed != -1) { - fprintf(stderr, "%s: seed argument overrides session file RNG state, will now use seed: %d\n", __func__, params.seed); - llama_set_rng_seed(ctx, params.seed); - } else { - fprintf(stderr, "%s: using RNG state from loaded session file rather than seed\n", __func__); - } + llama_set_rng_seed(ctx, params.seed); fprintf(stderr, "%s: loaded a session with prompt size of %d tokens\n", __func__, (int) session_tokens.size()); } else {