Merge 35b0bf0585
into c4fe84fb0d
This commit is contained in:
commit
d2c2630307
4 changed files with 25 additions and 4 deletions
|
@ -199,6 +199,14 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params.input_prefix = argv[i];
|
params.input_prefix = argv[i];
|
||||||
|
} else if (arg == "--forceendtoken") {
|
||||||
|
params.forceendtoken = true;
|
||||||
|
} else if (arg == "--eot_token") {
|
||||||
|
if (++i >= argc) {
|
||||||
|
invalid_param = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
params.eot_token = argv[i];
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||||
gpt_print_usage(argc, argv, default_params);
|
gpt_print_usage(argc, argv, default_params);
|
||||||
|
|
|
@ -36,6 +36,7 @@ struct gpt_params {
|
||||||
|
|
||||||
std::string lora_adapter = ""; // lora adapter path
|
std::string lora_adapter = ""; // lora adapter path
|
||||||
std::string lora_base = ""; // base model path for the lora adapter
|
std::string lora_base = ""; // base model path for the lora adapter
|
||||||
|
std::string eot_token = "[end of text]";
|
||||||
|
|
||||||
bool memory_f16 = true; // use f16 instead of f32 for memory kv
|
bool memory_f16 = true; // use f16 instead of f32 for memory kv
|
||||||
bool random_prompt = false; // do not randomize prompt if none provided
|
bool random_prompt = false; // do not randomize prompt if none provided
|
||||||
|
@ -52,6 +53,7 @@ struct gpt_params {
|
||||||
bool use_mlock = false; // use mlock to keep model in memory
|
bool use_mlock = false; // use mlock to keep model in memory
|
||||||
bool mem_test = false; // compute maximum memory usage
|
bool mem_test = false; // compute maximum memory usage
|
||||||
bool verbose_prompt = false; // print prompt tokens before generation
|
bool verbose_prompt = false; // print prompt tokens before generation
|
||||||
|
bool forceendtoken = true; // Force show the "[end of text]" token after the generation
|
||||||
};
|
};
|
||||||
|
|
||||||
bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
|
bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
|
||||||
|
|
|
@ -245,7 +245,8 @@ int main(int argc, char ** argv) {
|
||||||
" - Press Ctrl+C to interject at any time.\n"
|
" - Press Ctrl+C to interject at any time.\n"
|
||||||
#endif
|
#endif
|
||||||
" - Press Return to return control to LLaMa.\n"
|
" - Press Return to return control to LLaMa.\n"
|
||||||
" - If you want to submit another line, end your input in '\\'.\n\n");
|
" - If you want to submit another line, end your input in '\\'.\n"
|
||||||
|
"[model ready]\n");
|
||||||
is_interacting = params.interactive_start;
|
is_interacting = params.interactive_start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -388,6 +389,9 @@ int main(int argc, char ** argv) {
|
||||||
is_antiprompt = true;
|
is_antiprompt = true;
|
||||||
set_console_color(con_st, CONSOLE_COLOR_USER_INPUT);
|
set_console_color(con_st, CONSOLE_COLOR_USER_INPUT);
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
|
if (params.forceendtoken) {
|
||||||
|
fprintf(stderr, (params.eot_token + "\n").c_str());
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -470,10 +474,13 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
// end of text token
|
// end of text token
|
||||||
if (!embd.empty() && embd.back() == llama_token_eos()) {
|
if (!embd.empty() && embd.back() == llama_token_eos()) {
|
||||||
|
if (params.forceendtoken || !params.instruct) {
|
||||||
|
fprintf(stderr, (params.eot_token + "\n").c_str());
|
||||||
|
}
|
||||||
if (params.instruct) {
|
if (params.instruct) {
|
||||||
is_interacting = true;
|
is_interacting = true;
|
||||||
} else {
|
}
|
||||||
fprintf(stderr, " [end of text]\n");
|
else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -481,6 +488,9 @@ int main(int argc, char ** argv) {
|
||||||
// In interactive mode, respect the maximum number of tokens and drop back to user input when reached.
|
// In interactive mode, respect the maximum number of tokens and drop back to user input when reached.
|
||||||
if (params.interactive && n_remain <= 0 && params.n_predict != -1) {
|
if (params.interactive && n_remain <= 0 && params.n_predict != -1) {
|
||||||
n_remain = params.n_predict;
|
n_remain = params.n_predict;
|
||||||
|
if (params.forceendtoken) {
|
||||||
|
fprintf(stderr, (params.eot_token + "\n").c_str());
|
||||||
|
}
|
||||||
is_interacting = true;
|
is_interacting = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -938,6 +938,7 @@ static void llama_model_load_internal(
|
||||||
|
|
||||||
fprintf(stderr, "%s: mem required = %7.2f MB (+ %7.2f MB per state)\n", __func__,
|
fprintf(stderr, "%s: mem required = %7.2f MB (+ %7.2f MB per state)\n", __func__,
|
||||||
mem_required / 1024.0 / 1024.0, mem_required_state / 1024.0 / 1024.0);
|
mem_required / 1024.0 / 1024.0, mem_required_state / 1024.0 / 1024.0);
|
||||||
|
fprintf(stderr, "[model loading]\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// create the ggml context
|
// create the ggml context
|
||||||
|
@ -1749,7 +1750,7 @@ struct llama_context * llama_init_from_file(
|
||||||
unsigned percentage = (unsigned) (100 * progress);
|
unsigned percentage = (unsigned) (100 * progress);
|
||||||
while (percentage > *cur_percentage_p) {
|
while (percentage > *cur_percentage_p) {
|
||||||
++*cur_percentage_p;
|
++*cur_percentage_p;
|
||||||
fprintf(stderr, ".");
|
fprintf(stderr, "[porcentage] %u%%\n", *cur_percentage_p);
|
||||||
fflush(stderr);
|
fflush(stderr);
|
||||||
if (percentage >= 100) {
|
if (percentage >= 100) {
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue