diff --git a/common/arg.cpp b/common/arg.cpp index 4a9d8ecd0..74803c6a9 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -135,7 +135,10 @@ std::string common_arg::to_string() { * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s * Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo) + * * Return pair of (with "repo" already having tag removed) + * + * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files. */ static std::pair common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & hf_token) { auto parts = string_split(hf_repo_with_tag, ':'); @@ -219,7 +222,7 @@ static void common_params_handle_model_default( auto auto_detected = common_get_hf_file(hf_repo, hf_token); hf_repo = auto_detected.first; hf_file = auto_detected.second; - printf("%s: using hf_file = %s\n", __func__, hf_file.c_str()); + LOG_INF("%s: using hf_file = %s\n", __func__, hf_file.c_str()); } catch (std::exception & e) { fprintf(stderr, "%s: %s\n", __func__, e.what()); exit(1); @@ -227,7 +230,9 @@ static void common_params_handle_model_default( } else { hf_file = model; } - } else if (model.empty()) { + } + // make sure model path is present (for caching purposes) + if (model.empty()) { // this is to avoid different repo having same file name, or same file name in different subdirs std::string filename = hf_repo + "_" + hf_file; // to make sure we don't have any slashes in the filename