Update llama.cpp to use instruct mode

2023-03-19 13:00:02 -07:00 · 2023-03-19 13:00:02 -07:00 · edb52ab13c
commit edb52ab13c
parent 660a4d5365
2 changed files with 14 additions and 0 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -1176,6 +1176,18 @@ void llama_update_input(llama_context& ctx, const std::string& text)
    state.embd_inp.insert(state.embd_inp.end(), line_inp.begin(), line_inp.end());
    state.remaining_tokens -= line_inp.size();
 }
+/// @brief Updates the context and appends new input tokens (overloaded version)
+/// @param ctx
+/// @param tokens
+void llama_update_input(llama_context& ctx, const std::vector<gpt_vocab::id>& tokens)
+{
+    llama_state& state = *ctx.state;
+    llama_model& model = ctx.model;
+    const gpt_params& params = ctx.params;
+
+    state.embd_inp.insert(state.embd_inp.end(), tokens.begin(), tokens.end());
+    state.remaining_tokens -= tokens.size();
+}

 /// @brief  Ingests a batch of input tokens into the context
 /// @param ctx 
--- a/llama.h
+++ b/llama.h
@ -50,6 +50,8 @@ const std::vector<gpt_vocab::id> llama_tokenize_text(const llama_context& ctx, c
 void llama_add_bos(llama_context& ctx);
 // Queues up input text to the model input
 void llama_update_input(llama_context& ctx, const std::string& text);
+// Queues up input tokens to the model input
+void llama_update_input(llama_context& ctx, const std::vector<gpt_vocab::id>& tokens);
 // Ingests input previously added using llama_update_input()
 void llama_ingest_input_batch(llama_context& ctx);
 // Ingests all input previously added using llama_update_input() in multiple batches