common : add -hfd option for the draft model

2025-01-20 21:46:58 +02:00 · 2025-01-20 21:46:58 +02:00 · 6ef22f0547
commit 6ef22f0547
parent aea8ddd516
2 changed files with 17 additions and 3 deletions
--- a/common/arg.cpp
+++ b/common/arg.cpp
@ -300,6 +300,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
    // TODO: refactor model params in a common struct
    common_params_handle_model_default(params.model,             params.model_url,             params.hf_repo,             params.hf_file,             params.hf_token);
    common_params_handle_model_default(params.speculative.model, params.speculative.model_url, params.speculative.hf_repo, params.speculative.hf_file, params.hf_token);
    common_params_handle_model_default(params.vocoder.model,     params.vocoder.model_url,     params.vocoder.hf_repo,     params.vocoder.hf_file,     params.hf_token);
    if (params.escape) {
@ -1629,6 +1630,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
            params.hf_repo = value;
        }
    ).set_env("LLAMA_ARG_HF_REPO"));
    add_opt(common_arg(
        {"-hfd", "-hfrd", "--hf-repo-draft"}, "<user>/<model>[:quant]",
        "Same as --hf-repo, but for the draft model (default: unused)",
        [](common_params & params, const std::string & value) {
            params.speculative.hf_repo = value;
        }
    ).set_env("LLAMA_ARG_HF_REPO"));
    add_opt(common_arg(
        {"-hff", "--hf-file"}, "FILE",
        "Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)",
--- a/common/common.h
+++ b/common/common.h
@ -175,7 +175,11 @@ struct common_params_speculative {
    struct cpu_params cpuparams;
    struct cpu_params cpuparams_batch;
    std::string hf_repo = ""; // HF repo                                                     // NOLINT
    std::string hf_file = ""; // HF file                                                     // NOLINT
    std::string model = "";     // draft model for speculative decoding                      // NOLINT
    std::string model_url = ""; // model url to download                                     // NOLINT
 };
 struct common_params_vocoder {
@ -508,12 +512,14 @@ struct llama_model * common_load_model_from_url(
    const std::string & local_path,
    const std::string & hf_token,
    const struct llama_model_params & params);
 struct llama_model * common_load_model_from_hf(
    const std::string & repo,
    const std::string & remote_path,
    const std::string & local_path,
    const std::string & hf_token,
    const struct llama_model_params & params);
 std::pair<std::string, std::string> common_get_hf_file(
    const std::string & hf_repo_with_tag,
    const std::string & hf_token);