From dd88594585c4ae0f5217fbae1d915eaaa657b95b Mon Sep 17 00:00:00 2001 From: Ivan Stepanov Date: Sun, 30 Apr 2023 23:16:41 +0300 Subject: [PATCH] Save prompt after initial prompt eval (fixes #1257) --- examples/main/main.cpp | 12 ++++++------ llama.cpp | 7 ++++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 990d0fa02..9f27c97c6 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -385,6 +385,12 @@ int main(int argc, char ** argv) { embd.clear(); + // optionally save the session after prompt eval (for faster prompt loading next time) + if (!path_session.empty() && need_to_save_session) { + need_to_save_session = false; + llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size()); + } + if ((int) embd_inp.size() <= n_consumed && !is_interacting) { // out of user input, sample next token const float temp = params.temp; @@ -401,12 +407,6 @@ int main(int argc, char ** argv) { const float mirostat_eta = params.mirostat_eta; const bool penalize_nl = params.penalize_nl; - // optionally save the session on first sample (for faster prompt loading next time) - if (!path_session.empty() && need_to_save_session) { - need_to_save_session = false; - llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size()); - } - llama_token id = 0; { diff --git a/llama.cpp b/llama.cpp index f8b4c8e46..4702d74b0 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2724,10 +2724,11 @@ size_t llama_load_session_file(struct llama_context * ctx, const char * path_ses const size_t n_orig_state_size = llama_get_state_size(ctx); if (n_state_size != n_orig_state_size) { fprintf(stderr, "%s : failed to validate state size\n", __func__); + return 0; } - std::unique_ptr state_data(new uint8_t[n_state_size]); - file.read_raw(state_data.get(), n_state_size); - return llama_set_state_data(ctx, state_data.get()); + std::vector state_data(n_state_size); + file.read_raw(state_data.data(), n_state_size); + return llama_set_state_data(ctx, state_data.data()); } size_t llama_save_session_file(struct llama_context * ctx, const char * path_session, const llama_token * tokens, size_t n_token_count) {