now getting response from python
This commit is contained in:
parent
1c861466dc
commit
7972929a3b
3 changed files with 30 additions and 12 deletions
|
@ -1 +1,2 @@
|
||||||
print("hello llama.cpp")
|
print("hello llama.cpp" + llm_input)
|
||||||
|
llm_output = "Is it because of your mother that " + llm_input + "?";
|
||||||
|
|
|
@ -32,6 +32,7 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "print.hpp"
|
#include "print.hpp"
|
||||||
|
#include "plugin_python.hpp"
|
||||||
|
|
||||||
static llama_context ** g_ctx;
|
static llama_context ** g_ctx;
|
||||||
static llama_model ** g_model;
|
static llama_model ** g_model;
|
||||||
|
@ -130,7 +131,7 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
// TODO: Dump params ?
|
// TODO: Dump params ?
|
||||||
//LOG("Params perplexity: %s\n", LOG_TOSTR(params.perplexity));
|
//LOG("Params perplexity: %s\n", LOG_TOSTR(params.perplexity));
|
||||||
print_fields(params);
|
//print_fields(params);
|
||||||
|
|
||||||
// save choice to use color for later
|
// save choice to use color for later
|
||||||
// (note for later: this is a slightly awkward choice)
|
// (note for later: this is a slightly awkward choice)
|
||||||
|
@ -248,7 +249,7 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
std::vector<llama_token> embd_inp;
|
std::vector<llama_token> embd_inp;
|
||||||
|
|
||||||
print_fields(*model);
|
//print_fields(*model);
|
||||||
|
|
||||||
if (params.interactive_first || params.instruct || params.chatml || !params.prompt.empty() || session_tokens.empty()) {
|
if (params.interactive_first || params.instruct || params.chatml || !params.prompt.empty() || session_tokens.empty()) {
|
||||||
LOG("tokenize the prompt\n");
|
LOG("tokenize the prompt\n");
|
||||||
|
@ -293,7 +294,7 @@ int main(int argc, char ** argv) {
|
||||||
LOG_TEE("%s: error: prompt is too long (%d tokens, max %d)\n", __func__, (int) embd_inp.size(), n_ctx - 4);
|
LOG_TEE("%s: error: prompt is too long (%d tokens, max %d)\n", __func__, (int) embd_inp.size(), n_ctx - 4);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
print_fields(*ctx);
|
//print_fields(*ctx);
|
||||||
//print_fields(session_tokens);
|
//print_fields(session_tokens);
|
||||||
// debug message about similarity of saved session, if applicable
|
// debug message about similarity of saved session, if applicable
|
||||||
size_t n_matching_session_tokens = 0;
|
size_t n_matching_session_tokens = 0;
|
||||||
|
@ -383,7 +384,7 @@ int main(int argc, char ** argv) {
|
||||||
LOG_TEE("%6d -> '%s'\n", guidance_inp[i], llama_token_to_piece(ctx, guidance_inp[i]).c_str());
|
LOG_TEE("%6d -> '%s'\n", guidance_inp[i], llama_token_to_piece(ctx, guidance_inp[i]).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
print_fields(*ctx_guidance);
|
//print_fields(*ctx_guidance);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -495,7 +496,7 @@ int main(int argc, char ** argv) {
|
||||||
std::vector<llama_token> embd_guidance;
|
std::vector<llama_token> embd_guidance;
|
||||||
|
|
||||||
struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams);
|
struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams);
|
||||||
print_fields(*ctx_sampling);
|
//print_fields(*ctx_sampling);
|
||||||
|
|
||||||
while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
|
while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
|
||||||
// predict
|
// predict
|
||||||
|
@ -532,7 +533,7 @@ int main(int argc, char ** argv) {
|
||||||
LOG("context full, swapping: n_past = %d, n_left = %d, n_ctx = %d, n_keep = %d, n_discard = %d\n",
|
LOG("context full, swapping: n_past = %d, n_left = %d, n_ctx = %d, n_keep = %d, n_discard = %d\n",
|
||||||
n_past, n_left, n_ctx, params.n_keep, n_discard);
|
n_past, n_left, n_ctx, params.n_keep, n_discard);
|
||||||
|
|
||||||
print_fields(*ctx);
|
//print_fields(*ctx);
|
||||||
llama_kv_cache_seq_rm (ctx, 0, params.n_keep + 1 , params.n_keep + n_discard + 1);
|
llama_kv_cache_seq_rm (ctx, 0, params.n_keep + 1 , params.n_keep + n_discard + 1);
|
||||||
llama_kv_cache_seq_shift(ctx, 0, params.n_keep + 1 + n_discard, n_past, -n_discard);
|
llama_kv_cache_seq_shift(ctx, 0, params.n_keep + 1 + n_discard, n_past, -n_discard);
|
||||||
|
|
||||||
|
@ -649,7 +650,7 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const llama_token id = llama_sampling_sample(ctx_sampling, ctx, ctx_guidance);
|
const llama_token id = llama_sampling_sample(ctx_sampling, ctx, ctx_guidance);
|
||||||
print_fields(id);
|
//print_fields(id);
|
||||||
llama_sampling_accept(ctx_sampling, ctx, id, true);
|
llama_sampling_accept(ctx_sampling, ctx, id, true);
|
||||||
|
|
||||||
LOG("last: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, ctx_sampling->prev).c_str());
|
LOG("last: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, ctx_sampling->prev).c_str());
|
||||||
|
@ -686,6 +687,8 @@ int main(int argc, char ** argv) {
|
||||||
const std::string token_str = llama_token_to_piece(ctx, id);
|
const std::string token_str = llama_token_to_piece(ctx, id);
|
||||||
printf("TOKEN:%s\n", token_str.c_str());
|
printf("TOKEN:%s\n", token_str.c_str());
|
||||||
|
|
||||||
|
//print_fields(id);
|
||||||
|
|
||||||
if (embd.size() > 1) {
|
if (embd.size() > 1) {
|
||||||
input_tokens.push_back(id);
|
input_tokens.push_back(id);
|
||||||
} else {
|
} else {
|
||||||
|
@ -700,12 +703,20 @@ int main(int argc, char ** argv) {
|
||||||
console::set_display(console::reset);
|
console::set_display(console::reset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// just print the whole thing
|
||||||
|
const std::string last_output1 = output_ss.str();
|
||||||
|
printf("%s",last_output1.c_str());
|
||||||
|
const std::string last_output = process_output_plugin(last_output1);
|
||||||
|
printf("%s",last_output.c_str());
|
||||||
|
|
||||||
// if not currently processing queued inputs;
|
// if not currently processing queued inputs;
|
||||||
if ((int) embd_inp.size() <= n_consumed) {
|
if ((int) embd_inp.size() <= n_consumed) {
|
||||||
// check for reverse prompt in the last n_prev tokens
|
// check for reverse prompt in the last n_prev tokens
|
||||||
if (!params.antiprompt.empty()) {
|
if (!params.antiprompt.empty()) {
|
||||||
const int n_prev = 32;
|
const int n_prev = 32;
|
||||||
const std::string last_output = llama_sampling_prev_str(ctx_sampling, ctx, n_prev);
|
const std::string last_output1 = llama_sampling_prev_str(ctx_sampling, ctx, n_prev);
|
||||||
|
// now plugin the python :
|
||||||
|
const std::string last_output = process_output_plugin(last_output1);
|
||||||
|
|
||||||
is_antiprompt = false;
|
is_antiprompt = false;
|
||||||
// Check if each of the reverse prompts appears at the end of the output.
|
// Check if each of the reverse prompts appears at the end of the output.
|
||||||
|
|
|
@ -26,7 +26,7 @@ using namespace boost::python;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
int call_python()
|
std::string process_output_plugin(const std::string input)
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
PyImport_AppendInittab((char*)"mymodule", INIT_MODULE);
|
PyImport_AppendInittab((char*)"mymodule", INIT_MODULE);
|
||||||
|
@ -36,12 +36,18 @@ int call_python()
|
||||||
object mymodule = import("mymodule");
|
object mymodule = import("mymodule");
|
||||||
|
|
||||||
main_namespace["precreated_object"] = Base("created on C++ side");
|
main_namespace["precreated_object"] = Base("created on C++ side");
|
||||||
|
main_namespace["llm_input"] = input;
|
||||||
exec_file("embedding.py", main_namespace, main_namespace);
|
exec_file("embedding.py", main_namespace, main_namespace);
|
||||||
|
|
||||||
|
boost::python::object llm_output = main_namespace["llm_output"];
|
||||||
|
std::string message = boost::python::extract<std::string>(llm_output);
|
||||||
|
|
||||||
|
return message;
|
||||||
|
|
||||||
} catch (error_already_set& e) {
|
} catch (error_already_set& e) {
|
||||||
PyErr_PrintEx(0);
|
PyErr_PrintEx(0);
|
||||||
return 1;
|
return "";
|
||||||
}
|
}
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue