From c3a7f848f2d0987c1fea9854831d060c90ca42ba Mon Sep 17 00:00:00 2001 From: vvhg1 Date: Sat, 7 Oct 2023 12:07:07 +0200 Subject: [PATCH] fix interactive prompt escaping and fix server infill leading space handling --- examples/infill/infill.cpp | 21 +++++++++++++++++++-- examples/server/server.cpp | 2 +- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 47ece81c6..d994de5e8 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -639,10 +639,27 @@ int main(int argc, char ** argv) { buffer.clear(); // done taking input, reset color console::set_display(console::reset); + + if (params.escape) { + //process escape sequences, for the initial prompt this is done in common.cpp when we load the params, but for the interactive mode we need to do it here + process_escapes(params.input_prefix); + process_escapes(params.input_suffix); + } + suff_rm_leading_spc = params.escape; + if (suff_rm_leading_spc && params.input_suffix.find_first_of(" ") == 0 && params.input_suffix.size() > 1) { + params.input_suffix.erase(0, 1); + suff_rm_leading_spc = false; + } // tokenize new prefix and suffix - std::vector inp_pfx = ::llama_tokenize(ctx, params.input_prefix, add_bos); - std::vector inp_sfx = ::llama_tokenize(ctx, params.input_suffix, add_bos); + std::vector inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false); + std::vector inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false); + if (suff_rm_leading_spc && inp_sfx[0] == space_token) { + inp_sfx.erase(inp_sfx.begin()); + } inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(ctx)); + if (add_bos) { + inp_pfx.insert(inp_pfx.begin(), llama_token_bos(ctx)); + } inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(ctx)); embd_inp = inp_pfx; embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end()); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 667f5db71..dc57e55d9 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -344,7 +344,7 @@ struct llama_server_context void loadInfill() { - bool suff_rm_leading_spc = params.escape; + bool suff_rm_leading_spc = true; if (suff_rm_leading_spc && params.input_suffix.find_first_of(" ") == 0 && params.input_suffix.size() > 1) { params.input_suffix.erase(0, 1); suff_rm_leading_spc = false;