now it is letting the llm control the output

This commit is contained in:
mike dupont 2023-12-06 10:03:45 -05:00
parent 7972929a3b
commit 7eb27b3443
3 changed files with 21 additions and 10 deletions

View file

@ -1,3 +1,10 @@
# llama.cpp python hack
`./bin/main -m ~/.ollama/models/mistral --interactive -r STOP -p 'WHat is a tensor?'`
Will call embedding.py and then if the plugin ends in stop the results will start a new prompt for the llm.
# llama.cpp # llama.cpp
![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png) ![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png)

View file

@ -1,2 +1,6 @@
print("hello llama.cpp" + llm_input) print("hello llama.cpp, got input:\n" + llm_input + "\n")
llm_output = "Is it because of your mother that " + llm_input + "?";
if len(llm_input) > 20:
llm_output = "Reinterpret with emojis " + llm_input + "?\nSTOP";
else:
llm_output = llm_input

View file

@ -497,7 +497,7 @@ int main(int argc, char ** argv) {
struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams); struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams);
//print_fields(*ctx_sampling); //print_fields(*ctx_sampling);
std::string last_output; // the output from python at any time
while ((n_remain != 0 && !is_antiprompt) || params.interactive) { while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
// predict // predict
if (!embd.empty()) { if (!embd.empty()) {
@ -706,7 +706,7 @@ int main(int argc, char ** argv) {
// just print the whole thing // just print the whole thing
const std::string last_output1 = output_ss.str(); const std::string last_output1 = output_ss.str();
printf("%s",last_output1.c_str()); printf("%s",last_output1.c_str());
const std::string last_output = process_output_plugin(last_output1); last_output = process_output_plugin(last_output1);
printf("%s",last_output.c_str()); printf("%s",last_output.c_str());
// if not currently processing queued inputs; // if not currently processing queued inputs;
@ -716,7 +716,7 @@ int main(int argc, char ** argv) {
const int n_prev = 32; const int n_prev = 32;
const std::string last_output1 = llama_sampling_prev_str(ctx_sampling, ctx, n_prev); const std::string last_output1 = llama_sampling_prev_str(ctx_sampling, ctx, n_prev);
// now plugin the python : // now plugin the python :
const std::string last_output = process_output_plugin(last_output1); const std::string partial_output = process_output_plugin(last_output1);
is_antiprompt = false; is_antiprompt = false;
// Check if each of the reverse prompts appears at the end of the output. // Check if each of the reverse prompts appears at the end of the output.
@ -783,11 +783,11 @@ int main(int argc, char ** argv) {
console::set_display(console::user_input); console::set_display(console::user_input);
std::string line; std::string line;
bool another_line = true; //bool another_line = true;
do { //do {
another_line = console::readline(line, params.multiline_input); // another_line = console::readline(line, params.multiline_input);
buffer += line; buffer += last_output;
} while (another_line); //} while (another_line);
// done taking input, reset color // done taking input, reset color
console::set_display(console::reset); console::set_display(console::reset);