Some improvements to loading the session with --prompt-cache
1. Currently the --seed parameter is ignored when loading the prompt. However, a very common use case would be to save a prompt and then try several attempts at generation with different seeds. 2. When loading a cached prompt from a session, you have to specify the prompt again. Even worse, if you forget to enter a prompt you'll get your cached prompt overwritten by the blank one.
This commit is contained in:
parent
ac7876ac20
commit
de5bf5bf68
1 changed files with 13 additions and 3 deletions
|
@ -134,8 +134,6 @@ int main(int argc, char ** argv) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add a space in front of the first character to match OG llama tokenizer behavior
|
|
||||||
params.prompt.insert(0, 1, ' ');
|
|
||||||
|
|
||||||
std::string path_session = params.path_prompt_cache;
|
std::string path_session = params.path_prompt_cache;
|
||||||
std::vector<llama_token> session_tokens;
|
std::vector<llama_token> session_tokens;
|
||||||
|
@ -155,6 +153,9 @@ int main(int argc, char ** argv) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
session_tokens.resize(n_token_count_out);
|
session_tokens.resize(n_token_count_out);
|
||||||
|
if (params.seed != -1) {
|
||||||
|
llama_set_rng_seed(ctx, params.seed);
|
||||||
|
}
|
||||||
|
|
||||||
fprintf(stderr, "%s: loaded a session with prompt size of %d tokens\n", __func__, (int) session_tokens.size());
|
fprintf(stderr, "%s: loaded a session with prompt size of %d tokens\n", __func__, (int) session_tokens.size());
|
||||||
} else {
|
} else {
|
||||||
|
@ -163,7 +164,16 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// tokenize the prompt
|
// tokenize the prompt
|
||||||
auto embd_inp = ::llama_tokenize(ctx, params.prompt, true);
|
std::vector<llama_token> embd_inp;
|
||||||
|
|
||||||
|
if (params.prompt.size() > 0 || session_tokens.size() == 0) {
|
||||||
|
// Add a space in front of the first character to match OG llama tokenizer behavior
|
||||||
|
params.prompt.insert(0, 1, ' ');
|
||||||
|
|
||||||
|
embd_inp = ::llama_tokenize(ctx, params.prompt, true);
|
||||||
|
} else {
|
||||||
|
embd_inp = session_tokens;
|
||||||
|
}
|
||||||
|
|
||||||
const int n_ctx = llama_n_ctx(ctx);
|
const int n_ctx = llama_n_ctx(ctx);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue