now getting response from python

2023-12-06 09:37:04 -05:00 · 2023-12-06 09:37:04 -05:00 · 7972929a3b
commit 7972929a3b
parent 1c861466dc
3 changed files with 30 additions and 12 deletions
--- a/embedding.py
+++ b/embedding.py
@ -1 +1,2 @@
-print("hello llama.cpp")
+print("hello llama.cpp" + llm_input)
 llm_output = "Is it because of your mother that " + llm_input + "?";
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -32,6 +32,7 @@
 #endif
 #include "print.hpp"
 #include "plugin_python.hpp"
 static llama_context           ** g_ctx;
 static llama_model             ** g_model;
@ -130,7 +131,7 @@ int main(int argc, char ** argv) {
    // TODO: Dump params ?
    //LOG("Params perplexity: %s\n", LOG_TOSTR(params.perplexity));
-    print_fields(params);
+    //print_fields(params);
    // save choice to use color for later
    // (note for later: this is a slightly awkward choice)
@ -248,7 +249,7 @@ int main(int argc, char ** argv) {
    std::vector<llama_token> embd_inp;
-    print_fields(*model);
+    //print_fields(*model);
    if (params.interactive_first || params.instruct || params.chatml || !params.prompt.empty() || session_tokens.empty()) {
        LOG("tokenize the prompt\n");
@ -293,7 +294,7 @@ int main(int argc, char ** argv) {
        LOG_TEE("%s: error: prompt is too long (%d tokens, max %d)\n", __func__, (int) embd_inp.size(), n_ctx - 4);
        return 1;
    }
-    print_fields(*ctx);
+    //print_fields(*ctx);
    //print_fields(session_tokens);
    // debug message about similarity of saved session, if applicable
    size_t n_matching_session_tokens = 0;
@ -383,7 +384,7 @@ int main(int argc, char ** argv) {
                LOG_TEE("%6d -> '%s'\n", guidance_inp[i], llama_token_to_piece(ctx, guidance_inp[i]).c_str());
            }
-	    print_fields(*ctx_guidance);
+	    //print_fields(*ctx_guidance);
        }
@ -495,7 +496,7 @@ int main(int argc, char ** argv) {
    std::vector<llama_token> embd_guidance;
    struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams);
-    print_fields(*ctx_sampling);
+    //print_fields(*ctx_sampling);
    while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
        // predict
@ -532,7 +533,7 @@ int main(int argc, char ** argv) {
                LOG("context full, swapping: n_past = %d, n_left = %d, n_ctx = %d, n_keep = %d, n_discard = %d\n",
                    n_past, n_left, n_ctx, params.n_keep, n_discard);
-		print_fields(*ctx);
+		//print_fields(*ctx);
                llama_kv_cache_seq_rm   (ctx, 0, params.n_keep + 1            , params.n_keep + n_discard + 1);
                llama_kv_cache_seq_shift(ctx, 0, params.n_keep + 1 + n_discard, n_past, -n_discard);
@ -649,7 +650,7 @@ int main(int argc, char ** argv) {
            }
            const llama_token id = llama_sampling_sample(ctx_sampling, ctx, ctx_guidance);
-	    print_fields(id);
+	    //print_fields(id);
            llama_sampling_accept(ctx_sampling, ctx, id, true);
            LOG("last: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, ctx_sampling->prev).c_str());
@ -686,6 +687,8 @@ int main(int argc, char ** argv) {
                const std::string token_str = llama_token_to_piece(ctx, id);
                printf("TOKEN:%s\n", token_str.c_str());
 		//print_fields(id);
                if (embd.size() > 1) {
                    input_tokens.push_back(id);
                } else {
@ -700,12 +703,20 @@ int main(int argc, char ** argv) {
            console::set_display(console::reset);
        }
 	// just print the whole thing       	
 	const std::string last_output1 = output_ss.str();
 	printf("%s",last_output1.c_str());
 	const std::string last_output = process_output_plugin(last_output1);
 	printf("%s",last_output.c_str());
        // if not currently processing queued inputs;
        if ((int) embd_inp.size() <= n_consumed) {
            // check for reverse prompt in the last n_prev tokens
            if (!params.antiprompt.empty()) {
                const int n_prev = 32;
-                const std::string last_output = llama_sampling_prev_str(ctx_sampling, ctx, n_prev);
+                const std::string last_output1 = llama_sampling_prev_str(ctx_sampling, ctx, n_prev);
 		// now plugin the python :
 		const std::string last_output = process_output_plugin(last_output1);
                is_antiprompt = false;
                // Check if each of the reverse prompts appears at the end of the output.
--- a/plugin_python.cpp
+++ b/plugin_python.cpp
@ -26,7 +26,7 @@ using namespace boost::python;
 #endif
-int call_python()
+std::string process_output_plugin(const std::string input)
 {
    try {
        PyImport_AppendInittab((char*)"mymodule", INIT_MODULE);
@ -36,12 +36,18 @@ int call_python()
        object mymodule = import("mymodule");
        main_namespace["precreated_object"] = Base("created on C++ side");
 	main_namespace["llm_input"] = input;       
        exec_file("embedding.py", main_namespace, main_namespace);
 	boost::python::object llm_output = main_namespace["llm_output"];
 	std::string message = boost::python::extract<std::string>(llm_output);
 	return message;
    } catch (error_already_set& e) {
        PyErr_PrintEx(0);
-        return 1;
+        return "";
    }
    return 0;
 }
`@ -1 +1,2 @@`
	`print("hello llama.cpp")`	`print("hello llama.cpp" + llm_input)`
		`llm_output = "Is it because of your mother that " + llm_input + "?";`