From de5bf5bf68117b78fe60c3c727a47108382959db Mon Sep 17 00:00:00 2001
From: KerfuffleV2 <kerfliffle@keemail.me>
Date: Sun, 21 May 2023 05:20:56 -0600
Subject: [PATCH] Some improvements to loading the session with --prompt-cache

1. Currently the --seed parameter is ignored when loading the prompt. However, a very common use case would be to save a prompt and then try several attempts at generation with different seeds.
2. When loading a cached prompt from a session, you have to specify the prompt again. Even worse, if you forget to enter a prompt you'll get your cached prompt overwritten by the blank one.
---
 examples/main/main.cpp | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 47b418d97..69507cbc0 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -134,8 +134,6 @@ int main(int argc, char ** argv) {
         return 0;
     }
 
-    // Add a space in front of the first character to match OG llama tokenizer behavior
-    params.prompt.insert(0, 1, ' ');
 
     std::string path_session = params.path_prompt_cache;
     std::vector<llama_token> session_tokens;
@@ -155,6 +153,9 @@ int main(int argc, char ** argv) {
                 return 1;
             }
             session_tokens.resize(n_token_count_out);
+            if (params.seed != -1) {
+                llama_set_rng_seed(ctx, params.seed);
+            }
 
             fprintf(stderr, "%s: loaded a session with prompt size of %d tokens\n", __func__, (int) session_tokens.size());
         } else {
@@ -163,7 +164,16 @@ int main(int argc, char ** argv) {
     }
 
     // tokenize the prompt
-    auto embd_inp = ::llama_tokenize(ctx, params.prompt, true);
+    std::vector<llama_token> embd_inp;
+
+    if (params.prompt.size() > 0 || session_tokens.size() == 0) {
+        // Add a space in front of the first character to match OG llama tokenizer behavior
+        params.prompt.insert(0, 1, ' ');
+
+        embd_inp = ::llama_tokenize(ctx, params.prompt, true);
+    } else {
+        embd_inp = session_tokens;
+    }
 
     const int n_ctx = llama_n_ctx(ctx);