common : bring back missing args
This commit is contained in:
parent
19f4a7b296
commit
056822ec4f
3 changed files with 76 additions and 32 deletions
|
@ -790,8 +790,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-C", "--cpu-mask"}, "M",
|
{"-C", "--cpu-mask"}, "M",
|
||||||
"CPU affinity mask: arbitrarily long hex. Complements cpu-range (default: \"\")",
|
"CPU affinity mask: arbitrarily long hex. Complements cpu-range (default: \"\")",
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & mask) {
|
||||||
std::string mask = value;
|
|
||||||
params.cpuparams.mask_valid = true;
|
params.cpuparams.mask_valid = true;
|
||||||
if (!parse_cpu_mask(mask, params.cpuparams.cpumask)) {
|
if (!parse_cpu_mask(mask, params.cpuparams.cpumask)) {
|
||||||
throw std::invalid_argument("invalid cpumask");
|
throw std::invalid_argument("invalid cpumask");
|
||||||
|
@ -801,8 +800,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-Cr", "--cpu-range"}, "lo-hi",
|
{"-Cr", "--cpu-range"}, "lo-hi",
|
||||||
"range of CPUs for affinity. Complements --cpu-mask",
|
"range of CPUs for affinity. Complements --cpu-mask",
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & range) {
|
||||||
std::string range = value;
|
|
||||||
params.cpuparams.mask_valid = true;
|
params.cpuparams.mask_valid = true;
|
||||||
if (!parse_cpu_range(range, params.cpuparams.cpumask)) {
|
if (!parse_cpu_range(range, params.cpuparams.cpumask)) {
|
||||||
throw std::invalid_argument("invalid range");
|
throw std::invalid_argument("invalid range");
|
||||||
|
@ -816,6 +814,16 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
params.cpuparams.strict_cpu = std::stoul(value);
|
params.cpuparams.strict_cpu = std::stoul(value);
|
||||||
}
|
}
|
||||||
));
|
));
|
||||||
|
add_opt(llama_arg(
|
||||||
|
{"--prio"}, "N",
|
||||||
|
format("set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams.priority),
|
||||||
|
[](gpt_params & params, int prio) {
|
||||||
|
if (prio < 0 || prio > 3) {
|
||||||
|
throw std::invalid_argument("invalid value");
|
||||||
|
}
|
||||||
|
params.cpuparams.priority = (enum ggml_sched_priority) prio;
|
||||||
|
}
|
||||||
|
));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--poll"}, "<0...100>",
|
{"--poll"}, "<0...100>",
|
||||||
format("use polling level to wait for work (0 - no polling, default: %u)\n", (unsigned) params.cpuparams.poll),
|
format("use polling level to wait for work (0 - no polling, default: %u)\n", (unsigned) params.cpuparams.poll),
|
||||||
|
@ -826,8 +834,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-Cb", "--cpu-mask-batch"}, "M",
|
{"-Cb", "--cpu-mask-batch"}, "M",
|
||||||
"CPU affinity mask: arbitrarily long hex. Complements cpu-range-batch (default: same as --cpu-mask)",
|
"CPU affinity mask: arbitrarily long hex. Complements cpu-range-batch (default: same as --cpu-mask)",
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & mask) {
|
||||||
std::string mask = value;
|
|
||||||
params.cpuparams_batch.mask_valid = true;
|
params.cpuparams_batch.mask_valid = true;
|
||||||
if (!parse_cpu_mask(mask, params.cpuparams_batch.cpumask)) {
|
if (!parse_cpu_mask(mask, params.cpuparams_batch.cpumask)) {
|
||||||
throw std::invalid_argument("invalid cpumask");
|
throw std::invalid_argument("invalid cpumask");
|
||||||
|
@ -837,8 +844,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-Crb", "--cpu-range-batch"}, "lo-hi",
|
{"-Crb", "--cpu-range-batch"}, "lo-hi",
|
||||||
"ranges of CPUs for affinity. Complements --cpu-mask-batch",
|
"ranges of CPUs for affinity. Complements --cpu-mask-batch",
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & range) {
|
||||||
std::string range = value;
|
|
||||||
params.cpuparams_batch.mask_valid = true;
|
params.cpuparams_batch.mask_valid = true;
|
||||||
if (!parse_cpu_range(range, params.cpuparams_batch.cpumask)) {
|
if (!parse_cpu_range(range, params.cpuparams_batch.cpumask)) {
|
||||||
throw std::invalid_argument("invalid range");
|
throw std::invalid_argument("invalid range");
|
||||||
|
@ -852,6 +858,16 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
params.cpuparams_batch.strict_cpu = value;
|
params.cpuparams_batch.strict_cpu = value;
|
||||||
}
|
}
|
||||||
));
|
));
|
||||||
|
add_opt(llama_arg(
|
||||||
|
{"--prio-batch"}, "N",
|
||||||
|
format("set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams.priority),
|
||||||
|
[](gpt_params & params, int prio) {
|
||||||
|
if (prio < 0 || prio > 3) {
|
||||||
|
throw std::invalid_argument("invalid value");
|
||||||
|
}
|
||||||
|
params.cpuparams_batch.priority = (enum ggml_sched_priority) prio;
|
||||||
|
}
|
||||||
|
));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--poll-batch"}, "<0|1>",
|
{"--poll-batch"}, "<0|1>",
|
||||||
"use polling to wait for work (default: same as --poll)",
|
"use polling to wait for work (default: same as --poll)",
|
||||||
|
@ -862,8 +878,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-Cd", "--cpu-mask-draft"}, "M",
|
{"-Cd", "--cpu-mask-draft"}, "M",
|
||||||
"Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)",
|
"Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)",
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & mask) {
|
||||||
std::string mask = value;
|
|
||||||
params.draft_cpuparams.mask_valid = true;
|
params.draft_cpuparams.mask_valid = true;
|
||||||
if (!parse_cpu_mask(mask, params.draft_cpuparams.cpumask)) {
|
if (!parse_cpu_mask(mask, params.draft_cpuparams.cpumask)) {
|
||||||
throw std::invalid_argument("invalid cpumask");
|
throw std::invalid_argument("invalid cpumask");
|
||||||
|
@ -873,8 +888,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-Crd", "--cpu-range-draft"}, "lo-hi",
|
{"-Crd", "--cpu-range-draft"}, "lo-hi",
|
||||||
"Ranges of CPUs for affinity. Complements --cpu-mask-draft",
|
"Ranges of CPUs for affinity. Complements --cpu-mask-draft",
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & range) {
|
||||||
std::string range = value;
|
|
||||||
params.draft_cpuparams.mask_valid = true;
|
params.draft_cpuparams.mask_valid = true;
|
||||||
if (!parse_cpu_range(range, params.draft_cpuparams.cpumask)) {
|
if (!parse_cpu_range(range, params.draft_cpuparams.cpumask)) {
|
||||||
throw std::invalid_argument("invalid range");
|
throw std::invalid_argument("invalid range");
|
||||||
|
@ -888,6 +902,16 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
params.draft_cpuparams.strict_cpu = value;
|
params.draft_cpuparams.strict_cpu = value;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
|
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
|
||||||
|
add_opt(llama_arg(
|
||||||
|
{"--prio-draft"}, "N",
|
||||||
|
format("set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams.priority),
|
||||||
|
[](gpt_params & params, int prio) {
|
||||||
|
if (prio < 0 || prio > 3) {
|
||||||
|
throw std::invalid_argument("invalid value");
|
||||||
|
}
|
||||||
|
params.draft_cpuparams.priority = (enum ggml_sched_priority) prio;
|
||||||
|
}
|
||||||
|
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--poll-draft"}, "<0|1>",
|
{"--poll-draft"}, "<0|1>",
|
||||||
"Use polling to wait for draft model work (default: same as --poll])",
|
"Use polling to wait for draft model work (default: same as --poll])",
|
||||||
|
@ -895,11 +919,20 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
params.draft_cpuparams.poll = value;
|
params.draft_cpuparams.poll = value;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
|
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
|
||||||
|
add_opt(llama_arg(
|
||||||
|
{"-Cbd", "--cpu-mask-batch-draft"}, "M",
|
||||||
|
"Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)",
|
||||||
|
[](gpt_params & params, const std::string & mask) {
|
||||||
|
params.draft_cpuparams_batch.mask_valid = true;
|
||||||
|
if (!parse_cpu_mask(mask, params.draft_cpuparams_batch.cpumask)) {
|
||||||
|
throw std::invalid_argument("invalid cpumask");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-Crbd", "--cpu-range-batch-draft"}, "lo-hi",
|
{"-Crbd", "--cpu-range-batch-draft"}, "lo-hi",
|
||||||
"Ranges of CPUs for affinity. Complements --cpu-mask-draft-batch)",
|
"Ranges of CPUs for affinity. Complements --cpu-mask-draft-batch)",
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & range) {
|
||||||
std::string range = value;
|
|
||||||
params.draft_cpuparams_batch.mask_valid = true;
|
params.draft_cpuparams_batch.mask_valid = true;
|
||||||
if (!parse_cpu_range(range, params.draft_cpuparams_batch.cpumask)) {
|
if (!parse_cpu_range(range, params.draft_cpuparams_batch.cpumask)) {
|
||||||
throw std::invalid_argument("invalid cpumask");
|
throw std::invalid_argument("invalid cpumask");
|
||||||
|
@ -913,6 +946,16 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
params.draft_cpuparams_batch.strict_cpu = value;
|
params.draft_cpuparams_batch.strict_cpu = value;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
|
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
|
||||||
|
add_opt(llama_arg(
|
||||||
|
{"--prio-batch-draft"}, "N",
|
||||||
|
format("set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams.priority),
|
||||||
|
[](gpt_params & params, int prio) {
|
||||||
|
if (prio < 0 || prio > 3) {
|
||||||
|
throw std::invalid_argument("invalid value");
|
||||||
|
}
|
||||||
|
params.draft_cpuparams_batch.priority = (enum ggml_sched_priority) prio;
|
||||||
|
}
|
||||||
|
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--poll-batch-draft"}, "<0|1>",
|
{"--poll-batch-draft"}, "<0|1>",
|
||||||
"Use polling to wait for draft model work (default: --poll-draft)",
|
"Use polling to wait for draft model work (default: --poll-draft)",
|
||||||
|
@ -1124,21 +1167,21 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
[](gpt_params & params) {
|
[](gpt_params & params) {
|
||||||
params.interactive = true;
|
params.interactive = true;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_INFILL}));
|
).set_examples({LLAMA_EXAMPLE_MAIN}));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-if", "--interactive-first"},
|
{"-if", "--interactive-first"},
|
||||||
format("run in interactive mode and wait for input right away (default: %s)", params.interactive_first ? "true" : "false"),
|
format("run in interactive mode and wait for input right away (default: %s)", params.interactive_first ? "true" : "false"),
|
||||||
[](gpt_params & params) {
|
[](gpt_params & params) {
|
||||||
params.interactive_first = true;
|
params.interactive_first = true;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_INFILL}));
|
).set_examples({LLAMA_EXAMPLE_MAIN}));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-mli", "--multiline-input"},
|
{"-mli", "--multiline-input"},
|
||||||
"allows you to write or paste multiple lines without ending each in '\\'",
|
"allows you to write or paste multiple lines without ending each in '\\'",
|
||||||
[](gpt_params & params) {
|
[](gpt_params & params) {
|
||||||
params.multiline_input = true;
|
params.multiline_input = true;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_INFILL}));
|
).set_examples({LLAMA_EXAMPLE_MAIN}));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--in-prefix-bos"},
|
{"--in-prefix-bos"},
|
||||||
"prefix BOS to user inputs, preceding the `--in-prefix` string",
|
"prefix BOS to user inputs, preceding the `--in-prefix` string",
|
||||||
|
@ -1146,7 +1189,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
params.input_prefix_bos = true;
|
params.input_prefix_bos = true;
|
||||||
params.enable_chat_template = false;
|
params.enable_chat_template = false;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_INFILL}));
|
).set_examples({LLAMA_EXAMPLE_MAIN}));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--in-prefix"}, "STRING",
|
{"--in-prefix"}, "STRING",
|
||||||
"string to prefix user inputs with (default: empty)",
|
"string to prefix user inputs with (default: empty)",
|
||||||
|
@ -1154,7 +1197,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
params.input_prefix = value;
|
params.input_prefix = value;
|
||||||
params.enable_chat_template = false;
|
params.enable_chat_template = false;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_INFILL}));
|
).set_examples({LLAMA_EXAMPLE_MAIN}));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--in-suffix"}, "STRING",
|
{"--in-suffix"}, "STRING",
|
||||||
"string to suffix after user inputs with (default: empty)",
|
"string to suffix after user inputs with (default: empty)",
|
||||||
|
@ -1162,7 +1205,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
params.input_suffix = value;
|
params.input_suffix = value;
|
||||||
params.enable_chat_template = false;
|
params.enable_chat_template = false;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_INFILL}));
|
).set_examples({LLAMA_EXAMPLE_MAIN}));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--no-warmup"},
|
{"--no-warmup"},
|
||||||
"skip warming up the model with an empty run",
|
"skip warming up the model with an empty run",
|
||||||
|
@ -1499,7 +1542,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
}
|
}
|
||||||
));
|
));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--all-logits"},
|
{"--perplexity", "--all-logits"},
|
||||||
format("return logits for all tokens in the batch (default: %s)", params.logits_all ? "true" : "false"),
|
format("return logits for all tokens in the batch (default: %s)", params.logits_all ? "true" : "false"),
|
||||||
[](gpt_params & params) {
|
[](gpt_params & params) {
|
||||||
params.logits_all = true;
|
params.logits_all = true;
|
||||||
|
@ -1554,6 +1597,13 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
params.kl_divergence = true;
|
params.kl_divergence = true;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_PERPLEXITY}));
|
).set_examples({LLAMA_EXAMPLE_PERPLEXITY}));
|
||||||
|
add_opt(llama_arg(
|
||||||
|
{"--save-all-logits", "--kl-divergence-base"}, "FNAME",
|
||||||
|
"set logits file",
|
||||||
|
[](gpt_params & params, const std::string & value) {
|
||||||
|
params.logits_file = value;
|
||||||
|
}
|
||||||
|
).set_examples({LLAMA_EXAMPLE_PERPLEXITY}));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--ppl-stride"}, "N",
|
{"--ppl-stride"}, "N",
|
||||||
format("stride for perplexity calculation (default: %d)", params.ppl_stride),
|
format("stride for perplexity calculation (default: %d)", params.ppl_stride),
|
||||||
|
@ -1656,7 +1706,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
}
|
}
|
||||||
));
|
));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-ngl", "--gpu-layers"}, "N",
|
{"-ngl", "--gpu-layers", "--n-gpu-layers"}, "N",
|
||||||
"number of layers to store in VRAM",
|
"number of layers to store in VRAM",
|
||||||
[](gpt_params & params, int value) {
|
[](gpt_params & params, int value) {
|
||||||
params.n_gpu_layers = value;
|
params.n_gpu_layers = value;
|
||||||
|
@ -1667,7 +1717,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
}
|
}
|
||||||
).set_env("LLAMA_ARG_N_GPU_LAYERS"));
|
).set_env("LLAMA_ARG_N_GPU_LAYERS"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-ngld", "--gpu-layers-draft"}, "N",
|
{"-ngld", "--gpu-layers-draft", "--n-gpu-layers-draft"}, "N",
|
||||||
"number of layers to store in VRAM for the draft model",
|
"number of layers to store in VRAM for the draft model",
|
||||||
[](gpt_params & params, int value) {
|
[](gpt_params & params, int value) {
|
||||||
params.n_gpu_layers_draft = value;
|
params.n_gpu_layers_draft = value;
|
||||||
|
@ -1890,7 +1940,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_PASSKEY}));
|
).set_examples({LLAMA_EXAMPLE_PASSKEY}));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-o", "--output"}, "FNAME",
|
{"-o", "--output", "--output-file"}, "FNAME",
|
||||||
format("output file (default: '%s')",
|
format("output file (default: '%s')",
|
||||||
ex == LLAMA_EXAMPLE_EXPORT_LORA
|
ex == LLAMA_EXAMPLE_EXPORT_LORA
|
||||||
? params.lora_outfile.c_str()
|
? params.lora_outfile.c_str()
|
||||||
|
@ -1932,7 +1982,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
|
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--chunk"}, "N",
|
{"--chunk", "--from-chunk"}, "N",
|
||||||
format("start processing the input from chunk N (default: %d)", params.i_chunk),
|
format("start processing the input from chunk N (default: %d)", params.i_chunk),
|
||||||
[](gpt_params & params, int value) {
|
[](gpt_params & params, int value) {
|
||||||
params.i_chunk = value;
|
params.i_chunk = value;
|
||||||
|
@ -2057,7 +2107,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--timeout"}, "N",
|
{"-to", "--timeout"}, "N",
|
||||||
format("server read/write timeout in seconds (default: %d)", params.timeout_read),
|
format("server read/write timeout in seconds (default: %d)", params.timeout_read),
|
||||||
[](gpt_params & params, int value) {
|
[](gpt_params & params, int value) {
|
||||||
params.timeout_read = value;
|
params.timeout_read = value;
|
||||||
|
|
|
@ -211,7 +211,6 @@ struct gpt_params {
|
||||||
bool use_mlock = false; // use mlock to keep model in memory
|
bool use_mlock = false; // use mlock to keep model in memory
|
||||||
bool verbose_prompt = false; // print prompt tokens before generation
|
bool verbose_prompt = false; // print prompt tokens before generation
|
||||||
bool display_prompt = true; // print prompt before generation
|
bool display_prompt = true; // print prompt before generation
|
||||||
bool infill = false; // use infill mode
|
|
||||||
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
|
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
|
||||||
bool no_kv_offload = false; // disable KV offloading
|
bool no_kv_offload = false; // disable KV offloading
|
||||||
bool warmup = true; // warmup run
|
bool warmup = true; // warmup run
|
||||||
|
|
|
@ -306,11 +306,6 @@ int main(int argc, char ** argv) {
|
||||||
LOG_TEE("\n\n");
|
LOG_TEE("\n\n");
|
||||||
|
|
||||||
LOG_TEE("\n##### Infill mode #####\n\n");
|
LOG_TEE("\n##### Infill mode #####\n\n");
|
||||||
if (params.infill) {
|
|
||||||
printf("\n************\n");
|
|
||||||
printf("no need to specify '--infill', always running infill\n");
|
|
||||||
printf("************\n\n");
|
|
||||||
}
|
|
||||||
if (params.interactive) {
|
if (params.interactive) {
|
||||||
const char *control_message;
|
const char *control_message;
|
||||||
if (params.multiline_input) {
|
if (params.multiline_input) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue