more_responsive

This commit is contained in:
Jeffersoncgo 2023-04-19 08:01:35 -04:00
parent 8944a13296
commit 99eafe908f
4 changed files with 22 additions and 3 deletions

View file

@ -199,6 +199,14 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
break; break;
} }
params.input_prefix = argv[i]; params.input_prefix = argv[i];
} else if (arg == "--forceendtoken") {
params.forceendtoken = true;
} else if (arg == "--eot_token") {
if (++i >= argc) {
invalid_param = true;
break;
}
params.eot_token = argv[i];
} else { } else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
gpt_print_usage(argc, argv, default_params); gpt_print_usage(argc, argv, default_params);

View file

@ -36,6 +36,7 @@ struct gpt_params {
std::string lora_adapter = ""; // lora adapter path std::string lora_adapter = ""; // lora adapter path
std::string lora_base = ""; // base model path for the lora adapter std::string lora_base = ""; // base model path for the lora adapter
std::string eot_token = "[end of text]";
bool memory_f16 = true; // use f16 instead of f32 for memory kv bool memory_f16 = true; // use f16 instead of f32 for memory kv
bool random_prompt = false; // do not randomize prompt if none provided bool random_prompt = false; // do not randomize prompt if none provided
@ -52,6 +53,7 @@ struct gpt_params {
bool use_mlock = false; // use mlock to keep model in memory bool use_mlock = false; // use mlock to keep model in memory
bool mem_test = false; // compute maximum memory usage bool mem_test = false; // compute maximum memory usage
bool verbose_prompt = false; // print prompt tokens before generation bool verbose_prompt = false; // print prompt tokens before generation
bool forceendtoken = true; // Force show the "[end of text]" token after the generation
}; };
bool gpt_params_parse(int argc, char ** argv, gpt_params & params); bool gpt_params_parse(int argc, char ** argv, gpt_params & params);

View file

@ -377,6 +377,9 @@ int main(int argc, char ** argv) {
is_antiprompt = true; is_antiprompt = true;
set_console_color(con_st, CONSOLE_COLOR_USER_INPUT); set_console_color(con_st, CONSOLE_COLOR_USER_INPUT);
fflush(stdout); fflush(stdout);
if (params.forceendtoken) {
fprintf(stderr, (params.eot_token + "\n").c_str());
}
break; break;
} }
} }
@ -459,10 +462,13 @@ int main(int argc, char ** argv) {
// end of text token // end of text token
if (!embd.empty() && embd.back() == llama_token_eos()) { if (!embd.empty() && embd.back() == llama_token_eos()) {
if (params.forceendtoken || !params.instruct) {
fprintf(stderr, (params.eot_token + "\n").c_str());
}
if (params.instruct) { if (params.instruct) {
is_interacting = true; is_interacting = true;
} else { }
fprintf(stderr, " [end of text]\n"); else {
break; break;
} }
} }
@ -470,6 +476,9 @@ int main(int argc, char ** argv) {
// In interactive mode, respect the maximum number of tokens and drop back to user input when reached. // In interactive mode, respect the maximum number of tokens and drop back to user input when reached.
if (params.interactive && n_remain <= 0 && params.n_predict != -1) { if (params.interactive && n_remain <= 0 && params.n_predict != -1) {
n_remain = params.n_predict; n_remain = params.n_predict;
if (params.forceendtoken) {
fprintf(stderr, (params.eot_token + "\n").c_str());
}
is_interacting = true; is_interacting = true;
} }
} }

View file

@ -1711,7 +1711,7 @@ struct llama_context * llama_init_from_file(
unsigned percentage = (unsigned) (100 * progress); unsigned percentage = (unsigned) (100 * progress);
while (percentage > *cur_percentage_p) { while (percentage > *cur_percentage_p) {
++*cur_percentage_p; ++*cur_percentage_p;
fprintf(stderr, "."); fprintf(stderr, "[porcentage] %u%%\n", *cur_percentage_p);
fflush(stderr); fflush(stderr);
if (percentage >= 100) { if (percentage >= 100) {
fprintf(stderr, "\n"); fprintf(stderr, "\n");