From 7972929a3b89d95f4563ca820043a2e6f8288426 Mon Sep 17 00:00:00 2001 From: mike dupont Date: Wed, 6 Dec 2023 09:37:04 -0500 Subject: [PATCH] now getting response from python --- embedding.py | 3 ++- examples/main/main.cpp | 27 +++++++++++++++++++-------- plugin_python.cpp | 12 +++++++++--- 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/embedding.py b/embedding.py index 32977492f..d6ff4fc2b 100644 --- a/embedding.py +++ b/embedding.py @@ -1 +1,2 @@ -print("hello llama.cpp") +print("hello llama.cpp" + llm_input) +llm_output = "Is it because of your mother that " + llm_input + "?"; diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 7bc7f012b..6e6965817 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -32,6 +32,7 @@ #endif #include "print.hpp" +#include "plugin_python.hpp" static llama_context ** g_ctx; static llama_model ** g_model; @@ -130,7 +131,7 @@ int main(int argc, char ** argv) { // TODO: Dump params ? //LOG("Params perplexity: %s\n", LOG_TOSTR(params.perplexity)); - print_fields(params); + //print_fields(params); // save choice to use color for later // (note for later: this is a slightly awkward choice) @@ -248,7 +249,7 @@ int main(int argc, char ** argv) { std::vector embd_inp; - print_fields(*model); + //print_fields(*model); if (params.interactive_first || params.instruct || params.chatml || !params.prompt.empty() || session_tokens.empty()) { LOG("tokenize the prompt\n"); @@ -293,7 +294,7 @@ int main(int argc, char ** argv) { LOG_TEE("%s: error: prompt is too long (%d tokens, max %d)\n", __func__, (int) embd_inp.size(), n_ctx - 4); return 1; } - print_fields(*ctx); + //print_fields(*ctx); //print_fields(session_tokens); // debug message about similarity of saved session, if applicable size_t n_matching_session_tokens = 0; @@ -383,7 +384,7 @@ int main(int argc, char ** argv) { LOG_TEE("%6d -> '%s'\n", guidance_inp[i], llama_token_to_piece(ctx, guidance_inp[i]).c_str()); } - print_fields(*ctx_guidance); + //print_fields(*ctx_guidance); } @@ -495,7 +496,7 @@ int main(int argc, char ** argv) { std::vector embd_guidance; struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams); - print_fields(*ctx_sampling); + //print_fields(*ctx_sampling); while ((n_remain != 0 && !is_antiprompt) || params.interactive) { // predict @@ -532,7 +533,7 @@ int main(int argc, char ** argv) { LOG("context full, swapping: n_past = %d, n_left = %d, n_ctx = %d, n_keep = %d, n_discard = %d\n", n_past, n_left, n_ctx, params.n_keep, n_discard); - print_fields(*ctx); + //print_fields(*ctx); llama_kv_cache_seq_rm (ctx, 0, params.n_keep + 1 , params.n_keep + n_discard + 1); llama_kv_cache_seq_shift(ctx, 0, params.n_keep + 1 + n_discard, n_past, -n_discard); @@ -649,7 +650,7 @@ int main(int argc, char ** argv) { } const llama_token id = llama_sampling_sample(ctx_sampling, ctx, ctx_guidance); - print_fields(id); + //print_fields(id); llama_sampling_accept(ctx_sampling, ctx, id, true); LOG("last: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, ctx_sampling->prev).c_str()); @@ -686,6 +687,8 @@ int main(int argc, char ** argv) { const std::string token_str = llama_token_to_piece(ctx, id); printf("TOKEN:%s\n", token_str.c_str()); + //print_fields(id); + if (embd.size() > 1) { input_tokens.push_back(id); } else { @@ -700,12 +703,20 @@ int main(int argc, char ** argv) { console::set_display(console::reset); } + // just print the whole thing + const std::string last_output1 = output_ss.str(); + printf("%s",last_output1.c_str()); + const std::string last_output = process_output_plugin(last_output1); + printf("%s",last_output.c_str()); + // if not currently processing queued inputs; if ((int) embd_inp.size() <= n_consumed) { // check for reverse prompt in the last n_prev tokens if (!params.antiprompt.empty()) { const int n_prev = 32; - const std::string last_output = llama_sampling_prev_str(ctx_sampling, ctx, n_prev); + const std::string last_output1 = llama_sampling_prev_str(ctx_sampling, ctx, n_prev); + // now plugin the python : + const std::string last_output = process_output_plugin(last_output1); is_antiprompt = false; // Check if each of the reverse prompts appears at the end of the output. diff --git a/plugin_python.cpp b/plugin_python.cpp index 324b18d56..6900a95b0 100644 --- a/plugin_python.cpp +++ b/plugin_python.cpp @@ -26,7 +26,7 @@ using namespace boost::python; #endif -int call_python() +std::string process_output_plugin(const std::string input) { try { PyImport_AppendInittab((char*)"mymodule", INIT_MODULE); @@ -36,12 +36,18 @@ int call_python() object mymodule = import("mymodule"); main_namespace["precreated_object"] = Base("created on C++ side"); + main_namespace["llm_input"] = input; exec_file("embedding.py", main_namespace, main_namespace); + + boost::python::object llm_output = main_namespace["llm_output"]; + std::string message = boost::python::extract(llm_output); + + return message; + } catch (error_already_set& e) { PyErr_PrintEx(0); - return 1; + return ""; } - return 0; }