common : add -hfd option for the draft model
This commit is contained in:
parent
aea8ddd516
commit
6ef22f0547
2 changed files with 17 additions and 3 deletions
|
@ -300,6 +300,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
|
||||||
|
|
||||||
// TODO: refactor model params in a common struct
|
// TODO: refactor model params in a common struct
|
||||||
common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file, params.hf_token);
|
common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file, params.hf_token);
|
||||||
|
common_params_handle_model_default(params.speculative.model, params.speculative.model_url, params.speculative.hf_repo, params.speculative.hf_file, params.hf_token);
|
||||||
common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file, params.hf_token);
|
common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file, params.hf_token);
|
||||||
|
|
||||||
if (params.escape) {
|
if (params.escape) {
|
||||||
|
@ -1629,6 +1630,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||||
params.hf_repo = value;
|
params.hf_repo = value;
|
||||||
}
|
}
|
||||||
).set_env("LLAMA_ARG_HF_REPO"));
|
).set_env("LLAMA_ARG_HF_REPO"));
|
||||||
|
add_opt(common_arg(
|
||||||
|
{"-hfd", "-hfrd", "--hf-repo-draft"}, "<user>/<model>[:quant]",
|
||||||
|
"Same as --hf-repo, but for the draft model (default: unused)",
|
||||||
|
[](common_params & params, const std::string & value) {
|
||||||
|
params.speculative.hf_repo = value;
|
||||||
|
}
|
||||||
|
).set_env("LLAMA_ARG_HF_REPO"));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"-hff", "--hf-file"}, "FILE",
|
{"-hff", "--hf-file"}, "FILE",
|
||||||
"Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)",
|
"Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)",
|
||||||
|
|
|
@ -175,7 +175,11 @@ struct common_params_speculative {
|
||||||
struct cpu_params cpuparams;
|
struct cpu_params cpuparams;
|
||||||
struct cpu_params cpuparams_batch;
|
struct cpu_params cpuparams_batch;
|
||||||
|
|
||||||
|
std::string hf_repo = ""; // HF repo // NOLINT
|
||||||
|
std::string hf_file = ""; // HF file // NOLINT
|
||||||
|
|
||||||
std::string model = ""; // draft model for speculative decoding // NOLINT
|
std::string model = ""; // draft model for speculative decoding // NOLINT
|
||||||
|
std::string model_url = ""; // model url to download // NOLINT
|
||||||
};
|
};
|
||||||
|
|
||||||
struct common_params_vocoder {
|
struct common_params_vocoder {
|
||||||
|
@ -508,12 +512,14 @@ struct llama_model * common_load_model_from_url(
|
||||||
const std::string & local_path,
|
const std::string & local_path,
|
||||||
const std::string & hf_token,
|
const std::string & hf_token,
|
||||||
const struct llama_model_params & params);
|
const struct llama_model_params & params);
|
||||||
|
|
||||||
struct llama_model * common_load_model_from_hf(
|
struct llama_model * common_load_model_from_hf(
|
||||||
const std::string & repo,
|
const std::string & repo,
|
||||||
const std::string & remote_path,
|
const std::string & remote_path,
|
||||||
const std::string & local_path,
|
const std::string & local_path,
|
||||||
const std::string & hf_token,
|
const std::string & hf_token,
|
||||||
const struct llama_model_params & params);
|
const struct llama_model_params & params);
|
||||||
|
|
||||||
std::pair<std::string, std::string> common_get_hf_file(
|
std::pair<std::string, std::string> common_get_hf_file(
|
||||||
const std::string & hf_repo_with_tag,
|
const std::string & hf_repo_with_tag,
|
||||||
const std::string & hf_token);
|
const std::string & hf_token);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue