common : remove chatml and instruct params
ggml-ci
This commit is contained in:
parent
ea4665e6ef
commit
4df81854ab
4 changed files with 7 additions and 75 deletions
|
@ -238,10 +238,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (params.prompt_cache_all &&
|
if (params.prompt_cache_all && (params.interactive || params.interactive_first)) {
|
||||||
(params.interactive || params.interactive_first ||
|
|
||||||
params.instruct)) {
|
|
||||||
|
|
||||||
throw std::invalid_argument("error: --prompt-cache-all not supported in interactive mode yet\n");
|
throw std::invalid_argument("error: --prompt-cache-all not supported in interactive mode yet\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -920,18 +917,10 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
||||||
params.interactive_first = true;
|
params.interactive_first = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (arg == "-ins" || arg == "--instruct") {
|
|
||||||
params.instruct = true;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (arg == "-cnv" || arg == "--conversation") {
|
if (arg == "-cnv" || arg == "--conversation") {
|
||||||
params.conversation = true;
|
params.conversation = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (arg == "-cml" || arg == "--chatml") {
|
|
||||||
params.chatml = true;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (arg == "--infill") {
|
if (arg == "--infill") {
|
||||||
params.infill = true;
|
params.infill = true;
|
||||||
return true;
|
return true;
|
||||||
|
@ -1645,8 +1634,6 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
|
||||||
"can be specified more than once for multiple prompts" });
|
"can be specified more than once for multiple prompts" });
|
||||||
options.push_back({ "main", "-sp, --special", "special tokens output enabled (default: %s)", params.special ? "true" : "false" });
|
options.push_back({ "main", "-sp, --special", "special tokens output enabled (default: %s)", params.special ? "true" : "false" });
|
||||||
options.push_back({ "main", "-cnv, --conversation", "run in conversation mode (does not print special tokens and suffix/prefix) (default: %s)", params.conversation ? "true" : "false" });
|
options.push_back({ "main", "-cnv, --conversation", "run in conversation mode (does not print special tokens and suffix/prefix) (default: %s)", params.conversation ? "true" : "false" });
|
||||||
options.push_back({ "main", "-ins, --instruct", "run in instruction mode (use with Alpaca models) (default: %s)", params.instruct ? "true" : "false" });
|
|
||||||
options.push_back({ "main", "-cml, --chatml", "run in chatml mode (use with ChatML-compatible models) (default: %s)", params.chatml ? "true" : "false" });
|
|
||||||
options.push_back({ "main infill", "-i, --interactive", "run in interactive mode (default: %s)", params.interactive ? "true" : "false" });
|
options.push_back({ "main infill", "-i, --interactive", "run in interactive mode (default: %s)", params.interactive ? "true" : "false" });
|
||||||
options.push_back({ "main infill", "-if, --interactive-first", "run in interactive mode and wait for input right away (default: %s)", params.interactive_first ? "true" : "false" });
|
options.push_back({ "main infill", "-if, --interactive-first", "run in interactive mode and wait for input right away (default: %s)", params.interactive_first ? "true" : "false" });
|
||||||
options.push_back({ "main infill", "-mli, --multiline-input", "allows you to write or paste multiple lines without ending each in '\\'" });
|
options.push_back({ "main infill", "-mli, --multiline-input", "allows you to write or paste multiple lines without ending each in '\\'" });
|
||||||
|
@ -3200,7 +3187,6 @@ void yaml_dump_non_result_info(FILE * stream, const gpt_params & params, const l
|
||||||
yaml_dump_string_multiline(stream, "in_prefix", params.input_prefix.c_str());
|
yaml_dump_string_multiline(stream, "in_prefix", params.input_prefix.c_str());
|
||||||
fprintf(stream, "in_prefix_bos: %s # default: false\n", params.input_prefix_bos ? "true" : "false");
|
fprintf(stream, "in_prefix_bos: %s # default: false\n", params.input_prefix_bos ? "true" : "false");
|
||||||
yaml_dump_string_multiline(stream, "in_suffix", params.input_prefix.c_str());
|
yaml_dump_string_multiline(stream, "in_suffix", params.input_prefix.c_str());
|
||||||
fprintf(stream, "instruct: %s # default: false\n", params.instruct ? "true" : "false");
|
|
||||||
fprintf(stream, "interactive: %s # default: false\n", params.interactive ? "true" : "false");
|
fprintf(stream, "interactive: %s # default: false\n", params.interactive ? "true" : "false");
|
||||||
fprintf(stream, "interactive_first: %s # default: false\n", params.interactive_first ? "true" : "false");
|
fprintf(stream, "interactive_first: %s # default: false\n", params.interactive_first ? "true" : "false");
|
||||||
fprintf(stream, "keep: %d # default: 0\n", params.n_keep);
|
fprintf(stream, "keep: %d # default: 0\n", params.n_keep);
|
||||||
|
|
|
@ -148,7 +148,6 @@ struct gpt_params {
|
||||||
bool interactive = false; // interactive mode
|
bool interactive = false; // interactive mode
|
||||||
bool interactive_first = false; // wait for user input immediately
|
bool interactive_first = false; // wait for user input immediately
|
||||||
bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix)
|
bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix)
|
||||||
bool chatml = false; // chatml mode (used for models trained on chatml syntax)
|
|
||||||
bool prompt_cache_all = false; // save user input and generations to prompt cache
|
bool prompt_cache_all = false; // save user input and generations to prompt cache
|
||||||
bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it
|
bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it
|
||||||
|
|
||||||
|
@ -161,7 +160,6 @@ struct gpt_params {
|
||||||
|
|
||||||
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
||||||
bool ignore_eos = false; // ignore generated EOS tokens
|
bool ignore_eos = false; // ignore generated EOS tokens
|
||||||
bool instruct = false; // instruction mode (used for Alpaca models)
|
|
||||||
bool logits_all = false; // return logits for all tokens in the batch
|
bool logits_all = false; // return logits for all tokens in the batch
|
||||||
bool use_mmap = true; // use mmap for faster loads
|
bool use_mmap = true; // use mmap for faster loads
|
||||||
bool use_mlock = false; // use mlock to keep model in memory
|
bool use_mlock = false; // use mlock to keep model in memory
|
||||||
|
|
|
@ -249,11 +249,8 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
std::vector<llama_token> embd_inp;
|
std::vector<llama_token> embd_inp;
|
||||||
|
|
||||||
if (params.interactive_first || params.instruct || params.chatml || !params.prompt.empty() || session_tokens.empty()) {
|
if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {
|
||||||
LOG("tokenize the prompt\n");
|
LOG("tokenize the prompt\n");
|
||||||
if (params.chatml) {
|
|
||||||
params.prompt = "<|im_start|>system\n" + params.prompt + "<|im_end|>";
|
|
||||||
}
|
|
||||||
embd_inp = ::llama_tokenize(ctx, params.prompt, true, true);
|
embd_inp = ::llama_tokenize(ctx, params.prompt, true, true);
|
||||||
} else {
|
} else {
|
||||||
LOG("use session tokens\n");
|
LOG("use session tokens\n");
|
||||||
|
@ -331,37 +328,13 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// number of tokens to keep when resetting context
|
// number of tokens to keep when resetting context
|
||||||
if (params.n_keep < 0 || params.n_keep > (int) embd_inp.size() || params.instruct || params.chatml) {
|
if (params.n_keep < 0 || params.n_keep > (int) embd_inp.size()) {
|
||||||
params.n_keep = (int)embd_inp.size();
|
params.n_keep = (int)embd_inp.size();
|
||||||
} else {
|
} else {
|
||||||
params.n_keep += add_bos; // always keep the BOS token
|
params.n_keep += add_bos; // always keep the BOS token
|
||||||
}
|
}
|
||||||
|
|
||||||
// prefix & suffix for instruct mode
|
if (params.conversation) {
|
||||||
const auto inp_pfx = ::llama_tokenize(ctx, "\n\n### Instruction:\n\n", true, true);
|
|
||||||
const auto inp_sfx = ::llama_tokenize(ctx, "\n\n### Response:\n\n", false, true);
|
|
||||||
|
|
||||||
LOG("inp_pfx: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, inp_pfx).c_str());
|
|
||||||
LOG("inp_sfx: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, inp_sfx).c_str());
|
|
||||||
|
|
||||||
// chatml prefix & suffix
|
|
||||||
const auto cml_pfx = ::llama_tokenize(ctx, "\n<|im_start|>user\n", true, true);
|
|
||||||
const auto cml_sfx = ::llama_tokenize(ctx, "<|im_end|>\n<|im_start|>assistant\n", false, true);
|
|
||||||
|
|
||||||
LOG("cml_pfx: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, cml_pfx).c_str());
|
|
||||||
LOG("cml_sfx: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, cml_sfx).c_str());
|
|
||||||
|
|
||||||
// in instruct mode, we inject a prefix and a suffix to each input by the user
|
|
||||||
if (params.instruct) {
|
|
||||||
params.interactive_first = true;
|
|
||||||
params.antiprompt.emplace_back("### Instruction:\n\n");
|
|
||||||
}
|
|
||||||
// similar for chatml mode
|
|
||||||
else if (params.chatml) {
|
|
||||||
params.interactive_first = true;
|
|
||||||
params.antiprompt.emplace_back("<|im_start|>user\n");
|
|
||||||
}
|
|
||||||
else if (params.conversation) {
|
|
||||||
params.interactive_first = true;
|
params.interactive_first = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -822,15 +795,13 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
is_interacting = true;
|
is_interacting = true;
|
||||||
printf("\n");
|
printf("\n");
|
||||||
} else if (params.instruct || params.chatml) {
|
|
||||||
is_interacting = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (n_past > 0 && is_interacting) {
|
if (n_past > 0 && is_interacting) {
|
||||||
LOG("waiting for user input\n");
|
LOG("waiting for user input\n");
|
||||||
|
|
||||||
if (params.conversation || params.instruct || params.chatml) {
|
if (params.conversation) {
|
||||||
printf("\n> ");
|
printf("\n> ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -873,18 +844,6 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
const size_t original_size = embd_inp.size();
|
const size_t original_size = embd_inp.size();
|
||||||
|
|
||||||
// instruct mode: insert instruction prefix
|
|
||||||
if (params.instruct && !is_antiprompt) {
|
|
||||||
LOG("inserting instruction prefix\n");
|
|
||||||
n_consumed = embd_inp.size();
|
|
||||||
embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end());
|
|
||||||
}
|
|
||||||
// chatml mode: insert user chat prefix
|
|
||||||
if (params.chatml && !is_antiprompt) {
|
|
||||||
LOG("inserting chatml prefix\n");
|
|
||||||
n_consumed = embd_inp.size();
|
|
||||||
embd_inp.insert(embd_inp.end(), cml_pfx.begin(), cml_pfx.end());
|
|
||||||
}
|
|
||||||
if (params.escape) {
|
if (params.escape) {
|
||||||
string_process_escapes(buffer);
|
string_process_escapes(buffer);
|
||||||
}
|
}
|
||||||
|
@ -899,17 +858,6 @@ int main(int argc, char ** argv) {
|
||||||
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
|
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
|
||||||
embd_inp.insert(embd_inp.end(), line_sfx.begin(), line_sfx.end());
|
embd_inp.insert(embd_inp.end(), line_sfx.begin(), line_sfx.end());
|
||||||
|
|
||||||
// instruct mode: insert response suffix
|
|
||||||
if (params.instruct) {
|
|
||||||
LOG("inserting instruction suffix\n");
|
|
||||||
embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end());
|
|
||||||
}
|
|
||||||
// chatml mode: insert assistant chat suffix
|
|
||||||
if (params.chatml) {
|
|
||||||
LOG("inserting chatml suffix\n");
|
|
||||||
embd_inp.insert(embd_inp.end(), cml_sfx.begin(), cml_sfx.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i = original_size; i < embd_inp.size(); ++i) {
|
for (size_t i = original_size; i < embd_inp.size(); ++i) {
|
||||||
const llama_token token = embd_inp[i];
|
const llama_token token = embd_inp[i];
|
||||||
output_tokens.push_back(token);
|
output_tokens.push_back(token);
|
||||||
|
@ -934,7 +882,7 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// end of generation
|
// end of generation
|
||||||
if (!embd.empty() && llama_token_is_eog(model, embd.back()) && !(params.instruct || params.interactive || params.chatml)) {
|
if (!embd.empty() && llama_token_is_eog(model, embd.back()) && !(params.interactive)) {
|
||||||
LOG_TEE(" [end of text]\n");
|
LOG_TEE(" [end of text]\n");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,7 +13,7 @@ logger = logging.getLogger("run-with-preset")
|
||||||
CLI_ARGS_MAIN_PERPLEXITY = [
|
CLI_ARGS_MAIN_PERPLEXITY = [
|
||||||
"batch-size", "cfg-negative-prompt", "cfg-scale", "chunks", "color", "ctx-size", "escape",
|
"batch-size", "cfg-negative-prompt", "cfg-scale", "chunks", "color", "ctx-size", "escape",
|
||||||
"export", "file", "frequency-penalty", "grammar", "grammar-file", "hellaswag",
|
"export", "file", "frequency-penalty", "grammar", "grammar-file", "hellaswag",
|
||||||
"hellaswag-tasks", "ignore-eos", "in-prefix", "in-prefix-bos", "in-suffix", "instruct",
|
"hellaswag-tasks", "ignore-eos", "in-prefix", "in-prefix-bos", "in-suffix",
|
||||||
"interactive", "interactive-first", "keep", "logdir", "logit-bias", "lora", "lora-base",
|
"interactive", "interactive-first", "keep", "logdir", "logit-bias", "lora", "lora-base",
|
||||||
"low-vram", "main-gpu", "memory-f32", "mirostat", "mirostat-ent", "mirostat-lr", "mlock",
|
"low-vram", "main-gpu", "memory-f32", "mirostat", "mirostat-ent", "mirostat-lr", "mlock",
|
||||||
"model", "multiline-input", "n-gpu-layers", "n-predict", "no-mmap", "no-mul-mat-q",
|
"model", "multiline-input", "n-gpu-layers", "n-predict", "no-mmap", "no-mul-mat-q",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue