Merge remote-tracking branch 'origin/master' into jinja
This commit is contained in:
commit
cb72cf1fc3
215 changed files with 23423 additions and 18704 deletions
|
@ -22,6 +22,11 @@ common_arg & common_arg::set_examples(std::initializer_list<enum llama_example>
|
|||
return *this;
|
||||
}
|
||||
|
||||
common_arg & common_arg::set_excludes(std::initializer_list<enum llama_example> excludes) {
|
||||
this->excludes = std::move(excludes);
|
||||
return *this;
|
||||
}
|
||||
|
||||
common_arg & common_arg::set_env(const char * env) {
|
||||
help = help + "\n(env: " + env + ")";
|
||||
this->env = env;
|
||||
|
@ -37,6 +42,10 @@ bool common_arg::in_example(enum llama_example ex) {
|
|||
return examples.find(ex) != examples.end();
|
||||
}
|
||||
|
||||
bool common_arg::is_exclude(enum llama_example ex) {
|
||||
return excludes.find(ex) != excludes.end();
|
||||
}
|
||||
|
||||
bool common_arg::get_value_from_env(std::string & output) {
|
||||
if (env == nullptr) return false;
|
||||
char * value = std::getenv(env);
|
||||
|
@ -121,17 +130,26 @@ std::string common_arg::to_string() {
|
|||
|
||||
static void common_params_handle_model_default(
|
||||
std::string & model,
|
||||
std::string & model_url,
|
||||
const std::string & model_url,
|
||||
std::string & hf_repo,
|
||||
std::string & hf_file) {
|
||||
std::string & hf_file,
|
||||
const std::string & hf_token) {
|
||||
if (!hf_repo.empty()) {
|
||||
// short-hand to avoid specifying --hf-file -> default it to --model
|
||||
if (hf_file.empty()) {
|
||||
if (model.empty()) {
|
||||
throw std::invalid_argument("error: --hf-repo requires either --hf-file or --model\n");
|
||||
auto auto_detected = common_get_hf_file(hf_repo, hf_token);
|
||||
if (auto_detected.first.empty() || auto_detected.second.empty()) {
|
||||
exit(1); // built without CURL, error message already printed
|
||||
}
|
||||
hf_repo = auto_detected.first;
|
||||
hf_file = auto_detected.second;
|
||||
} else {
|
||||
hf_file = model;
|
||||
}
|
||||
hf_file = model;
|
||||
} else if (model.empty()) {
|
||||
}
|
||||
// make sure model path is present (for caching purposes)
|
||||
if (model.empty()) {
|
||||
// this is to avoid different repo having same file name, or same file name in different subdirs
|
||||
std::string filename = hf_repo + "_" + hf_file;
|
||||
// to make sure we don't have any slashes in the filename
|
||||
|
@ -281,8 +299,8 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
|
|||
}
|
||||
|
||||
// TODO: refactor model params in a common struct
|
||||
common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file);
|
||||
common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file);
|
||||
common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file, params.hf_token);
|
||||
common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file, params.hf_token);
|
||||
|
||||
if (params.escape) {
|
||||
string_process_escapes(params.prompt);
|
||||
|
@ -420,7 +438,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
* - if both {LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_*,} are set, we will prioritize the LLAMA_EXAMPLE_* matching current example
|
||||
*/
|
||||
auto add_opt = [&](common_arg arg) {
|
||||
if (arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) {
|
||||
if ((arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) && !arg.is_exclude(ex)) {
|
||||
ctx_arg.options.push_back(std::move(arg));
|
||||
}
|
||||
};
|
||||
|
@ -649,7 +667,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
[](common_params & params, const std::string & value) {
|
||||
params.prompt = value;
|
||||
}
|
||||
));
|
||||
).set_excludes({LLAMA_EXAMPLE_SERVER}));
|
||||
add_opt(common_arg(
|
||||
{"--no-perf"},
|
||||
string_format("disable internal libllama performance timings (default: %s)", params.no_perf ? "true" : "false"),
|
||||
|
@ -673,7 +691,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
params.prompt.pop_back();
|
||||
}
|
||||
}
|
||||
));
|
||||
).set_excludes({LLAMA_EXAMPLE_SERVER}));
|
||||
add_opt(common_arg(
|
||||
{"--in-file"}, "FNAME",
|
||||
"an input file (repeat to specify multiple files)",
|
||||
|
@ -700,7 +718,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
params.prompt = ss.str();
|
||||
fprintf(stderr, "Read %zu bytes from binary file %s\n", params.prompt.size(), value.c_str());
|
||||
}
|
||||
));
|
||||
).set_excludes({LLAMA_EXAMPLE_SERVER}));
|
||||
add_opt(common_arg(
|
||||
{"-e", "--escape"},
|
||||
string_format("process escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\) (default: %s)", params.escape ? "true" : "false"),
|
||||
|
@ -1512,7 +1530,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
{"--lora"}, "FNAME",
|
||||
"path to LoRA adapter (can be repeated to use multiple adapters)",
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.lora_adapters.push_back({ std::string(value), 1.0 });
|
||||
params.lora_adapters.push_back({ std::string(value), 1.0, nullptr });
|
||||
}
|
||||
// we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg
|
||||
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}));
|
||||
|
@ -1520,7 +1538,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
{"--lora-scaled"}, "FNAME", "SCALE",
|
||||
"path to LoRA adapter with user defined scaling (can be repeated to use multiple adapters)",
|
||||
[](common_params & params, const std::string & fname, const std::string & scale) {
|
||||
params.lora_adapters.push_back({ fname, std::stof(scale) });
|
||||
params.lora_adapters.push_back({ fname, std::stof(scale), nullptr });
|
||||
}
|
||||
// we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg
|
||||
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}));
|
||||
|
@ -1574,21 +1592,23 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
}
|
||||
).set_env("LLAMA_ARG_MODEL_URL"));
|
||||
add_opt(common_arg(
|
||||
{"-hfr", "--hf-repo"}, "REPO",
|
||||
"Hugging Face model repository (default: unused)",
|
||||
{"-hf", "-hfr", "--hf-repo"}, "<user>/<model>[:quant]",
|
||||
"Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n"
|
||||
"example: unsloth/phi-4-GGUF:q4_k_m\n"
|
||||
"(default: unused)",
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.hf_repo = value;
|
||||
}
|
||||
).set_env("LLAMA_ARG_HF_REPO"));
|
||||
add_opt(common_arg(
|
||||
{"-hff", "--hf-file"}, "FILE",
|
||||
"Hugging Face model file (default: unused)",
|
||||
"Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)",
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.hf_file = value;
|
||||
}
|
||||
).set_env("LLAMA_ARG_HF_FILE"));
|
||||
add_opt(common_arg(
|
||||
{"-hfrv", "--hf-repo-v"}, "REPO",
|
||||
{"-hfv", "-hfrv", "--hf-repo-v"}, "<user>/<model>[:quant]",
|
||||
"Hugging Face model repository for the vocoder model (default: unused)",
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.vocoder.hf_repo = value;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue