From 40a961db6031d8a14ad1e150270747d4a49e637b Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 26 Apr 2024 00:40:45 +0100 Subject: [PATCH] args: default --model to models/ + filename from --model-url or --hf-file (or else legacy models/7B/ggml-model-f16.gguf) --- common/common.cpp | 22 +++++++++++++++++++--- common/common.h | 2 +- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 97f55b053..2e7ddce12 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1338,9 +1338,25 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { throw std::invalid_argument("error: --prompt-cache-all not supported in interactive mode yet\n"); } - // short-hand to avoid specifying --hf-file -> default it to --model - if (!params.hf_repo.empty() && params.hf_file.empty()) { - params.hf_file = params.model; + if (!params.hf_repo.empty()) { + // short-hand to avoid specifying --hf-file -> default it to --model + if (params.hf_file.empty()) { + if (params.model.empty()) { + throw std::invalid_argument("error: --hf-repo requires either --hf-file or --model\n"); + } + params.hf_file = params.model; + } else if (params.model.empty()) { + params.model = "models/" + string_split(params.hf_file, '/').back(); + } + } else if (!params.model_url.empty()) { + if (params.model.empty()) { + auto f = string_split(params.model_url, '#').front(); + f = string_split(f, '?').front(); + f = string_split(f, '/').back(); + params.model = "models/" + f; + } + } else if (params.model.empty()) { + params.model = "models/7B/ggml-model-f16.gguf"; } if (params.escape) { diff --git a/common/common.h b/common/common.h index 87361e8e9..ff0eed055 100644 --- a/common/common.h +++ b/common/common.h @@ -92,7 +92,7 @@ struct gpt_params { // // sampling parameters struct llama_sampling_params sparams; - std::string model = "models/7B/ggml-model-f16.gguf"; // model path + std::string model = ""; // model path std::string model_draft = ""; // draft model for speculative decoding std::string model_alias = "unknown"; // model alias std::string model_url = ""; // model url to download