server : add more env vars, improve gen-docs
This commit is contained in:
parent
3d6bf6919f
commit
3d4c45064d
3 changed files with 89 additions and 54 deletions
|
@ -1102,7 +1102,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
else if (value == "last") { params.pooling_type = LLAMA_POOLING_TYPE_LAST; }
|
else if (value == "last") { params.pooling_type = LLAMA_POOLING_TYPE_LAST; }
|
||||||
else { throw std::invalid_argument("invalid value"); }
|
else { throw std::invalid_argument("invalid value"); }
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_EMBEDDING}));
|
).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_POOLING"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--attention"}, "{causal,non,causal}",
|
{"--attention"}, "{causal,non,causal}",
|
||||||
"attention type for embeddings, use model default if unspecified",
|
"attention type for embeddings, use model default if unspecified",
|
||||||
|
@ -1121,77 +1121,77 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
else if (value == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; }
|
else if (value == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; }
|
||||||
else { throw std::invalid_argument("invalid value"); }
|
else { throw std::invalid_argument("invalid value"); }
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_ROPE_SCALING_TYPE"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--rope-scale"}, "N",
|
{"--rope-scale"}, "N",
|
||||||
"RoPE context scaling factor, expands context by a factor of N",
|
"RoPE context scaling factor, expands context by a factor of N",
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & value) {
|
||||||
params.rope_freq_scale = 1.0f / std::stof(value);
|
params.rope_freq_scale = 1.0f / std::stof(value);
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_ROPE_SCALE"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--rope-freq-base"}, "N",
|
{"--rope-freq-base"}, "N",
|
||||||
"RoPE base frequency, used by NTK-aware scaling (default: loaded from model)",
|
"RoPE base frequency, used by NTK-aware scaling (default: loaded from model)",
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & value) {
|
||||||
params.rope_freq_base = std::stof(value);
|
params.rope_freq_base = std::stof(value);
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_ROPE_FREQ_BASE"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--rope-freq-scale"}, "N",
|
{"--rope-freq-scale"}, "N",
|
||||||
"RoPE frequency scaling factor, expands context by a factor of 1/N",
|
"RoPE frequency scaling factor, expands context by a factor of 1/N",
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & value) {
|
||||||
params.rope_freq_scale = std::stof(value);
|
params.rope_freq_scale = std::stof(value);
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_ROPE_FREQ_SCALE"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--yarn-orig-ctx"}, "N",
|
{"--yarn-orig-ctx"}, "N",
|
||||||
format("YaRN: original context size of model (default: %d = model training context size)", params.yarn_orig_ctx),
|
format("YaRN: original context size of model (default: %d = model training context size)", params.yarn_orig_ctx),
|
||||||
[](gpt_params & params, int value) {
|
[](gpt_params & params, int value) {
|
||||||
params.yarn_orig_ctx = value;
|
params.yarn_orig_ctx = value;
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_YARN_ORIG_CTX"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--yarn-ext-factor"}, "N",
|
{"--yarn-ext-factor"}, "N",
|
||||||
format("YaRN: extrapolation mix factor (default: %.1f, 0.0 = full interpolation)", (double)params.yarn_ext_factor),
|
format("YaRN: extrapolation mix factor (default: %.1f, 0.0 = full interpolation)", (double)params.yarn_ext_factor),
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & value) {
|
||||||
params.yarn_ext_factor = std::stof(value);
|
params.yarn_ext_factor = std::stof(value);
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_YARN_EXT_FACTOR"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--yarn-attn-factor"}, "N",
|
{"--yarn-attn-factor"}, "N",
|
||||||
format("YaRN: scale sqrt(t) or attention magnitude (default: %.1f)", (double)params.yarn_attn_factor),
|
format("YaRN: scale sqrt(t) or attention magnitude (default: %.1f)", (double)params.yarn_attn_factor),
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & value) {
|
||||||
params.yarn_attn_factor = std::stof(value);
|
params.yarn_attn_factor = std::stof(value);
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_YARN_ATTN_FACTOR"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--yarn-beta-slow"}, "N",
|
{"--yarn-beta-slow"}, "N",
|
||||||
format("YaRN: high correction dim or alpha (default: %.1f)", (double)params.yarn_beta_slow),
|
format("YaRN: high correction dim or alpha (default: %.1f)", (double)params.yarn_beta_slow),
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & value) {
|
||||||
params.yarn_beta_slow = std::stof(value);
|
params.yarn_beta_slow = std::stof(value);
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_YARN_BETA_SLOW"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--yarn-beta-fast"}, "N",
|
{"--yarn-beta-fast"}, "N",
|
||||||
format("YaRN: low correction dim or beta (default: %.1f)", (double)params.yarn_beta_fast),
|
format("YaRN: low correction dim or beta (default: %.1f)", (double)params.yarn_beta_fast),
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & value) {
|
||||||
params.yarn_beta_fast = std::stof(value);
|
params.yarn_beta_fast = std::stof(value);
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_YARN_BETA_FAST"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-gan", "--grp-attn-n"}, "N",
|
{"-gan", "--grp-attn-n"}, "N",
|
||||||
format("group-attention factor (default: %d)", params.grp_attn_n),
|
format("group-attention factor (default: %d)", params.grp_attn_n),
|
||||||
[](gpt_params & params, int value) {
|
[](gpt_params & params, int value) {
|
||||||
params.grp_attn_n = value;
|
params.grp_attn_n = value;
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_GRP_ATTN_N"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-gaw", "--grp-attn-w"}, "N",
|
{"-gaw", "--grp-attn-w"}, "N",
|
||||||
format("group-attention width (default: %.1f)", (double)params.grp_attn_w),
|
format("group-attention width (default: %.1f)", (double)params.grp_attn_w),
|
||||||
[](gpt_params & params, int value) {
|
[](gpt_params & params, int value) {
|
||||||
params.grp_attn_w = value;
|
params.grp_attn_w = value;
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_GRP_ATTN_W"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-dkvc", "--dump-kv-cache"},
|
{"-dkvc", "--dump-kv-cache"},
|
||||||
"verbose print of the KV cache",
|
"verbose print of the KV cache",
|
||||||
|
@ -1205,7 +1205,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
[](gpt_params & params) {
|
[](gpt_params & params) {
|
||||||
params.no_kv_offload = true;
|
params.no_kv_offload = true;
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_NO_KV_OFFLOAD"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-ctk", "--cache-type-k"}, "TYPE",
|
{"-ctk", "--cache-type-k"}, "TYPE",
|
||||||
format("KV cache data type for K (default: %s)", params.cache_type_k.c_str()),
|
format("KV cache data type for K (default: %s)", params.cache_type_k.c_str()),
|
||||||
|
@ -1213,7 +1213,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
// TODO: get the type right here
|
// TODO: get the type right here
|
||||||
params.cache_type_k = value;
|
params.cache_type_k = value;
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_CACHE_TYPE_K"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-ctv", "--cache-type-v"}, "TYPE",
|
{"-ctv", "--cache-type-v"}, "TYPE",
|
||||||
format("KV cache data type for V (default: %s)", params.cache_type_v.c_str()),
|
format("KV cache data type for V (default: %s)", params.cache_type_v.c_str()),
|
||||||
|
@ -1221,7 +1221,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
// TODO: get the type right here
|
// TODO: get the type right here
|
||||||
params.cache_type_v = value;
|
params.cache_type_v = value;
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_CACHE_TYPE_V"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--perplexity", "--all-logits"},
|
{"--perplexity", "--all-logits"},
|
||||||
format("return logits for all tokens in the batch (default: %s)", params.logits_all ? "true" : "false"),
|
format("return logits for all tokens in the batch (default: %s)", params.logits_all ? "true" : "false"),
|
||||||
|
@ -1355,7 +1355,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & value) {
|
||||||
params.rpc_servers = value;
|
params.rpc_servers = value;
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_RPC"));
|
||||||
#endif
|
#endif
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--mlock"},
|
{"--mlock"},
|
||||||
|
@ -1363,14 +1363,14 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
[](gpt_params & params) {
|
[](gpt_params & params) {
|
||||||
params.use_mlock = true;
|
params.use_mlock = true;
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_MLOCK"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--no-mmap"},
|
{"--no-mmap"},
|
||||||
"do not memory-map model (slower load but may reduce pageouts if not using mlock)",
|
"do not memory-map model (slower load but may reduce pageouts if not using mlock)",
|
||||||
[](gpt_params & params) {
|
[](gpt_params & params) {
|
||||||
params.use_mmap = false;
|
params.use_mmap = false;
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_NO_MMAP"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--numa"}, "TYPE",
|
{"--numa"}, "TYPE",
|
||||||
"attempt optimizations that help on some NUMA systems\n"
|
"attempt optimizations that help on some NUMA systems\n"
|
||||||
|
@ -1385,7 +1385,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; }
|
else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; }
|
||||||
else { throw std::invalid_argument("invalid value"); }
|
else { throw std::invalid_argument("invalid value"); }
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_NUMA"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-ngl", "--gpu-layers", "--n-gpu-layers"}, "N",
|
{"-ngl", "--gpu-layers", "--n-gpu-layers"}, "N",
|
||||||
"number of layers to store in VRAM",
|
"number of layers to store in VRAM",
|
||||||
|
@ -1433,7 +1433,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting the split mode has no effect.\n");
|
fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting the split mode has no effect.\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_SPLIT_MODE"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-ts", "--tensor-split"}, "N0,N1,N2,...",
|
{"-ts", "--tensor-split"}, "N0,N1,N2,...",
|
||||||
"fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1",
|
"fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1",
|
||||||
|
@ -1460,7 +1460,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting a tensor split has no effect.\n");
|
fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting a tensor split has no effect.\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_TENSOR_SPLIT"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-mg", "--main-gpu"}, "INDEX",
|
{"-mg", "--main-gpu"}, "INDEX",
|
||||||
format("the GPU to use for the model (with split-mode = none), or for intermediate results and KV (with split-mode = row) (default: %d)", params.main_gpu),
|
format("the GPU to use for the model (with split-mode = none), or for intermediate results and KV (with split-mode = row) (default: %d)", params.main_gpu),
|
||||||
|
@ -1470,7 +1470,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting the main GPU has no effect.\n");
|
fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting the main GPU has no effect.\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
));
|
).set_env("LLAMA_ARG_MAIN_GPU"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--check-tensors"},
|
{"--check-tensors"},
|
||||||
format("check model tensor data for invalid values (default: %s)", params.check_tensors ? "true" : "false"),
|
format("check model tensor data for invalid values (default: %s)", params.check_tensors ? "true" : "false"),
|
||||||
|
@ -1533,7 +1533,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & value) {
|
||||||
params.model_alias = value;
|
params.model_alias = value;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ALIAS"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-m", "--model"}, "FNAME",
|
{"-m", "--model"}, "FNAME",
|
||||||
ex == LLAMA_EXAMPLE_EXPORT_LORA
|
ex == LLAMA_EXAMPLE_EXPORT_LORA
|
||||||
|
@ -1741,7 +1741,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & value) {
|
||||||
params.public_path = value;
|
params.public_path = value;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_STATIC_PATH"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--embedding", "--embeddings"},
|
{"--embedding", "--embeddings"},
|
||||||
format("restrict to only support embedding use case; use only with dedicated embedding models (default: %s)", params.embedding ? "enabled" : "disabled"),
|
format("restrict to only support embedding use case; use only with dedicated embedding models (default: %s)", params.embedding ? "enabled" : "disabled"),
|
||||||
|
@ -1779,14 +1779,14 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & value) {
|
||||||
params.ssl_file_key = value;
|
params.ssl_file_key = value;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SSL_KEY_FILE"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--ssl-cert-file"}, "FNAME",
|
{"--ssl-cert-file"}, "FNAME",
|
||||||
"path to file a PEM-encoded SSL certificate",
|
"path to file a PEM-encoded SSL certificate",
|
||||||
[](gpt_params & params, const std::string & value) {
|
[](gpt_params & params, const std::string & value) {
|
||||||
params.ssl_file_cert = value;
|
params.ssl_file_cert = value;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SSL_CERT_FILE"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-to", "--timeout"}, "N",
|
{"-to", "--timeout"}, "N",
|
||||||
format("server read/write timeout in seconds (default: %d)", params.timeout_read),
|
format("server read/write timeout in seconds (default: %d)", params.timeout_read),
|
||||||
|
@ -1794,7 +1794,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
params.timeout_read = value;
|
params.timeout_read = value;
|
||||||
params.timeout_write = value;
|
params.timeout_write = value;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_TIMEOUT"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--threads-http"}, "N",
|
{"--threads-http"}, "N",
|
||||||
format("number of threads used to process HTTP requests (default: %d)", params.n_threads_http),
|
format("number of threads used to process HTTP requests (default: %d)", params.n_threads_http),
|
||||||
|
|
|
@ -6,42 +6,73 @@
|
||||||
|
|
||||||
// Export usage message (-h) to markdown format
|
// Export usage message (-h) to markdown format
|
||||||
|
|
||||||
|
static void write_table_header(std::ofstream & file) {
|
||||||
|
file << "| Argument | Explanation |\n";
|
||||||
|
file << "| -------- | ----------- |\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
static void write_table_entry(std::ofstream & file, const llama_arg & opt) {
|
||||||
|
file << "| `";
|
||||||
|
// args
|
||||||
|
for (const auto & arg : opt.args) {
|
||||||
|
if (arg == opt.args.front()) {
|
||||||
|
file << arg;
|
||||||
|
if (opt.args.size() > 1) file << ", ";
|
||||||
|
} else {
|
||||||
|
file << arg << (arg != opt.args.back() ? ", " : "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// value hint
|
||||||
|
if (opt.value_hint) {
|
||||||
|
std::string md_value_hint(opt.value_hint);
|
||||||
|
string_replace_all(md_value_hint, "|", "\\|");
|
||||||
|
file << " " << md_value_hint;
|
||||||
|
}
|
||||||
|
if (opt.value_hint_2) {
|
||||||
|
std::string md_value_hint_2(opt.value_hint_2);
|
||||||
|
string_replace_all(md_value_hint_2, "|", "\\|");
|
||||||
|
file << " " << md_value_hint_2;
|
||||||
|
}
|
||||||
|
// help text
|
||||||
|
std::string md_help(opt.help);
|
||||||
|
string_replace_all(md_help, "\n", "<br/>");
|
||||||
|
string_replace_all(md_help, "|", "\\|");
|
||||||
|
file << "` | " << md_help << " |\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
static void write_table(std::ofstream & file, std::vector<llama_arg *> & opts) {
|
||||||
|
write_table_header(file);
|
||||||
|
for (const auto & opt : opts) {
|
||||||
|
write_table_entry(file, *opt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void export_md(std::string fname, llama_example ex) {
|
static void export_md(std::string fname, llama_example ex) {
|
||||||
std::ofstream file(fname, std::ofstream::out | std::ofstream::trunc);
|
std::ofstream file(fname, std::ofstream::out | std::ofstream::trunc);
|
||||||
|
|
||||||
gpt_params params;
|
gpt_params params;
|
||||||
auto ctx_arg = gpt_params_parser_init(params, ex);
|
auto ctx_arg = gpt_params_parser_init(params, ex);
|
||||||
|
|
||||||
file << "| Argument | Explanation |\n";
|
std::vector<llama_arg *> common_options;
|
||||||
file << "| -------- | ----------- |\n";
|
std::vector<llama_arg *> sparam_options;
|
||||||
|
std::vector<llama_arg *> specific_options;
|
||||||
for (auto & opt : ctx_arg.options) {
|
for (auto & opt : ctx_arg.options) {
|
||||||
file << "| `";
|
// in case multiple LLAMA_EXAMPLE_* are set, we prioritize the LLAMA_EXAMPLE_* matching current example
|
||||||
// args
|
if (opt.is_sparam) {
|
||||||
for (const auto & arg : opt.args) {
|
sparam_options.push_back(&opt);
|
||||||
if (arg == opt.args.front()) {
|
} else if (opt.in_example(ctx_arg.ex)) {
|
||||||
file << arg;
|
specific_options.push_back(&opt);
|
||||||
if (opt.args.size() > 1) file << ", ";
|
} else {
|
||||||
} else {
|
common_options.push_back(&opt);
|
||||||
file << arg << (arg != opt.args.back() ? ", " : "");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// value hint
|
|
||||||
if (opt.value_hint) {
|
|
||||||
std::string md_value_hint(opt.value_hint);
|
|
||||||
string_replace_all(md_value_hint, "|", "\\|");
|
|
||||||
file << " " << md_value_hint;
|
|
||||||
}
|
|
||||||
if (opt.value_hint_2) {
|
|
||||||
std::string md_value_hint_2(opt.value_hint_2);
|
|
||||||
string_replace_all(md_value_hint_2, "|", "\\|");
|
|
||||||
file << " " << md_value_hint_2;
|
|
||||||
}
|
|
||||||
// help text
|
|
||||||
std::string md_help(opt.help);
|
|
||||||
string_replace_all(md_help, "\n", "<br/>");
|
|
||||||
string_replace_all(md_help, "|", "\\|");
|
|
||||||
file << "` | " << md_help << " |\n";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
file << "**Common params**\n\n";
|
||||||
|
write_table(file, common_options);
|
||||||
|
file << "\n\n**Sampling params**\n\n";
|
||||||
|
write_table(file, sparam_options);
|
||||||
|
file << "\n\n**Example-specific params**\n\n";
|
||||||
|
write_table(file, specific_options);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int, char **) {
|
int main(int, char **) {
|
||||||
|
|
|
@ -2356,6 +2356,10 @@ int main(int argc, char ** argv) {
|
||||||
svr.reset(new httplib::Server());
|
svr.reset(new httplib::Server());
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
if (params.ssl_file_key != "" && params.ssl_file_cert != "") {
|
||||||
|
LOG_ERR("Server is built without SSL support\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
svr.reset(new httplib::Server());
|
svr.reset(new httplib::Server());
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue