cli : auto activate conversation mode if chat template is available (#11214)
* cli : auto activate conversation mode if chat template is detected * add warn on bad template * update readme (writing with the help of chatgpt) * update readme (2) * do not activate -cnv for non-instruct models
This commit is contained in:
parent
39509fb082
commit
84a44815f7
4 changed files with 75 additions and 36 deletions
|
@ -777,15 +777,19 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}));
|
||||
add_opt(common_arg(
|
||||
{"-cnv", "--conversation"},
|
||||
string_format(
|
||||
"run in conversation mode:\n"
|
||||
"- does not print special tokens and suffix/prefix\n"
|
||||
"- interactive mode is also enabled\n"
|
||||
"(default: %s)",
|
||||
params.conversation ? "true" : "false"
|
||||
),
|
||||
"run in conversation mode:\n"
|
||||
"- does not print special tokens and suffix/prefix\n"
|
||||
"- interactive mode is also enabled\n"
|
||||
"(default: auto enabled if chat template is available)",
|
||||
[](common_params & params) {
|
||||
params.conversation = true;
|
||||
params.conversation_mode = COMMON_CONVERSATION_MODE_ENABLED;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_MAIN}));
|
||||
add_opt(common_arg(
|
||||
{"-no-cnv", "--no-conversation"},
|
||||
"force disable conversation mode (default: false)",
|
||||
[](common_params & params) {
|
||||
params.conversation_mode = COMMON_CONVERSATION_MODE_DISABLED;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_MAIN}));
|
||||
add_opt(common_arg(
|
||||
|
|
|
@ -103,6 +103,12 @@ enum dimre_method {
|
|||
DIMRE_METHOD_MEAN,
|
||||
};
|
||||
|
||||
enum common_conversation_mode {
|
||||
COMMON_CONVERSATION_MODE_DISABLED = 0,
|
||||
COMMON_CONVERSATION_MODE_ENABLED = 1,
|
||||
COMMON_CONVERSATION_MODE_AUTO = 2,
|
||||
};
|
||||
|
||||
// sampling parameters
|
||||
struct common_params_sampling {
|
||||
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
|
||||
|
@ -275,7 +281,6 @@ struct common_params {
|
|||
bool special = false; // enable special token output
|
||||
bool interactive = false; // interactive mode
|
||||
bool interactive_first = false; // wait for user input immediately
|
||||
bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix)
|
||||
bool prompt_cache_all = false; // save user input and generations to prompt cache
|
||||
bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it
|
||||
|
||||
|
@ -301,6 +306,8 @@ struct common_params {
|
|||
ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
|
||||
ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V
|
||||
|
||||
common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
|
||||
|
||||
// multimodal models (see examples/llava)
|
||||
std::string mmproj = ""; // path to multimodal projector // NOLINT
|
||||
std::vector<std::string> image; // path to image file(s)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue