From c0df192838f51507e06b7293030b43232cd2670f Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 18 Dec 2024 19:22:56 +0200 Subject: [PATCH] common : support HF download for vocoder --- common/arg.cpp | 50 +++++++++++++++++++++++++++++++------------- common/common.cpp | 7 ++++--- common/common.h | 6 +++++- examples/tts/tts.cpp | 7 ++++++- 4 files changed, 50 insertions(+), 20 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 93c15ecdc..e5ddd8318 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -119,29 +119,33 @@ std::string common_arg::to_string() { // utils // -static void common_params_handle_model_default(common_params & params) { - if (!params.hf_repo.empty()) { +static void common_params_handle_model_default( + std::string & model, + std::string & model_url, + std::string & hf_repo, + std::string & hf_file) { + if (!hf_repo.empty()) { // short-hand to avoid specifying --hf-file -> default it to --model - if (params.hf_file.empty()) { - if (params.model.empty()) { + if (hf_file.empty()) { + if (model.empty()) { throw std::invalid_argument("error: --hf-repo requires either --hf-file or --model\n"); } - params.hf_file = params.model; - } else if (params.model.empty()) { + hf_file = model; + } else if (model.empty()) { // this is to avoid different repo having same file name, or same file name in different subdirs - std::string filename = params.hf_repo + "_" + params.hf_file; + std::string filename = hf_repo + "_" + hf_file; // to make sure we don't have any slashes in the filename string_replace_all(filename, "/", "_"); - params.model = fs_get_cache_file(filename); + model = fs_get_cache_file(filename); } - } else if (!params.model_url.empty()) { - if (params.model.empty()) { - auto f = string_split(params.model_url, '#').front(); + } else if (!model_url.empty()) { + if (model.empty()) { + auto f = string_split(model_url, '#').front(); f = string_split(f, '?').front(); - params.model = fs_get_cache_file(string_split(f, '/').back()); + model = fs_get_cache_file(string_split(f, '/').back()); } - } else if (params.model.empty()) { - params.model = DEFAULT_MODEL_PATH; + } else if (model.empty()) { + model = DEFAULT_MODEL_PATH; } } @@ -276,7 +280,9 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context throw std::invalid_argument("error: --prompt-cache-all not supported in interactive mode yet\n"); } - common_params_handle_model_default(params); + // TODO: refactor model params in a common struct + common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file); + common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file); if (params.escape) { string_process_escapes(params.prompt); @@ -1581,6 +1587,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.hf_file = value; } ).set_env("LLAMA_ARG_HF_FILE")); + add_opt(common_arg( + {"-hfrv", "--hf-repo-v"}, "REPO", + "Hugging Face model repository for the vocoder model (default: unused)", + [](common_params & params, const std::string & value) { + params.vocoder.hf_repo = value; + } + ).set_env("LLAMA_ARG_HF_REPO_V")); + add_opt(common_arg( + {"-hffv", "--hf-file-v"}, "FILE", + "Hugging Face model file for the vocoder model (default: unused)", + [](common_params & params, const std::string & value) { + params.vocoder.hf_file = value; + } + ).set_env("LLAMA_ARG_HF_FILE_V")); add_opt(common_arg( {"-hft", "--hf-token"}, "TOKEN", "Hugging Face access token (default: value from HF_TOKEN environment variable)", diff --git a/common/common.cpp b/common/common.cpp index 05d3ba766..20be92911 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1095,7 +1095,7 @@ struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_p #define CURL_MAX_RETRY 3 #define CURL_RETRY_DELAY_SECONDS 2 -static bool curl_perform_with_retry(const std::string& url, CURL* curl, int max_attempts, int retry_delay_seconds) { +static bool curl_perform_with_retry(const std::string & url, CURL * curl, int max_attempts, int retry_delay_seconds) { int remaining_attempts = max_attempts; while (remaining_attempts > 0) { @@ -1119,7 +1119,6 @@ static bool curl_perform_with_retry(const std::string& url, CURL* curl, int max_ } static bool common_download_file(const std::string & url, const std::string & path, const std::string & hf_token) { - // Initialize libcurl std::unique_ptr curl(curl_easy_init(), &curl_easy_cleanup); if (!curl) { @@ -1192,11 +1191,13 @@ static bool common_download_file(const std::string & url, const std::string & pa std::string etag; std::string last_modified; }; + common_load_model_from_url_headers headers; + { typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *); auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t { - common_load_model_from_url_headers *headers = (common_load_model_from_url_headers *) userdata; + common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata; static std::regex header_regex("([^:]+): (.*)\r\n"); static std::regex etag_regex("ETag", std::regex_constants::icase); diff --git a/common/common.h b/common/common.h index c09c4eb0d..1d2bd932c 100644 --- a/common/common.h +++ b/common/common.h @@ -175,7 +175,11 @@ struct common_params_speculative { }; struct common_params_vocoder { - std::string model = ""; // vocoder model for producing audio // NOLINT + std::string hf_repo = ""; // HF repo // NOLINT + std::string hf_file = ""; // HF file // NOLINT + + std::string model = ""; // model path // NOLINT + std::string model_url = ""; // model url to download // NOLINT }; struct common_params { diff --git a/examples/tts/tts.cpp b/examples/tts/tts.cpp index 9c3d58f89..7f36b80f0 100644 --- a/examples/tts/tts.cpp +++ b/examples/tts/tts.cpp @@ -461,7 +461,12 @@ int main(int argc, char ** argv) { model_ttc = llama_init_ttc.model; ctx_ttc = llama_init_ttc.context; - params.model = params.vocoder.model; + // TODO: refactor in a common struct + params.model = params.vocoder.model; + params.model_url = params.vocoder.model_url; + params.hf_repo = params.vocoder.hf_repo; + params.hf_file = params.vocoder.hf_file; + params.embedding = true; common_init_result llama_init_cts = common_init_from_params(params);