From e55dfde3b078c9146c1423cfd927f3188cc24a0b Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 26 Apr 2024 14:33:59 +0100 Subject: [PATCH] args: define DEFAULT_MODEL_PATH + update cli docs --- common/common.cpp | 6 +++--- common/common.h | 2 ++ examples/main/README.md | 2 +- examples/quantize-stats/quantize-stats.cpp | 2 +- examples/server/server.cpp | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 8cede30b0..525c5ec15 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1329,7 +1329,7 @@ void gpt_params_handle_model_default(gpt_params & params) { params.model = "models/" + f; } } else if (params.model.empty()) { - params.model = "models/7B/ggml-model-f16.gguf"; + params.model = DEFAULT_MODEL_PATH; } } @@ -1554,7 +1554,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" --control-vector-layer-range START END\n"); printf(" layer range to apply the control vector(s) to, start and end inclusive\n"); printf(" -m FNAME, --model FNAME\n"); - printf(" model path (default: %s)\n", params.model.c_str()); + printf(" model path (default: models/$filename with filename from --hf-file or --model-url if set, otherwise %s)\n", DEFAULT_MODEL_PATH); printf(" -md FNAME, --model-draft FNAME\n"); printf(" draft model for speculative decoding (default: unused)\n"); printf(" -mu MODEL_URL, --model-url MODEL_URL\n"); @@ -2660,7 +2660,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l fprintf(stream, "mirostat_ent: %f # default: 5.0\n", sparams.mirostat_tau); fprintf(stream, "mirostat_lr: %f # default: 0.1\n", sparams.mirostat_eta); fprintf(stream, "mlock: %s # default: false\n", params.use_mlock ? "true" : "false"); - fprintf(stream, "model: %s # default: models/7B/ggml-model.bin\n", params.model.c_str()); + fprintf(stream, "model: %s # default: %s\n", params.model.c_str(), DEFAULT_MODEL_PATH); fprintf(stream, "model_draft: %s # default:\n", params.model_draft.c_str()); fprintf(stream, "multiline_input: %s # default: false\n", params.multiline_input ? "true" : "false"); fprintf(stream, "n_gpu_layers: %d # default: -1\n", params.n_gpu_layers); diff --git a/common/common.h b/common/common.h index d828c3e7d..f94cc71e7 100644 --- a/common/common.h +++ b/common/common.h @@ -31,6 +31,8 @@ fprintf(stderr, "%s: built with %s for %s\n", __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET); \ } while(0) +#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf" + // build info extern int LLAMA_BUILD_NUMBER; extern char const *LLAMA_COMMIT; diff --git a/examples/main/README.md b/examples/main/README.md index 649f4e0f3..e7a38743c 100644 --- a/examples/main/README.md +++ b/examples/main/README.md @@ -66,7 +66,7 @@ main.exe -m models\7B\ggml-model.bin --ignore-eos -n -1 --random-prompt In this section, we cover the most commonly used options for running the `main` program with the LLaMA models: -- `-m FNAME, --model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.bin`). +- `-m FNAME, --model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.gguf`; inferred from `--model-url` if set). - `-mu MODEL_URL --model-url MODEL_URL`: Specify a remote http url to download the file (e.g https://huggingface.co/ggml-org/models/resolve/main/phi-2/ggml-model-q4_0.gguf). - `-i, --interactive`: Run the program in interactive mode, allowing you to provide input directly and receive real-time responses. - `-ins, --instruct`: Run the program in instruction mode, which is particularly useful when working with Alpaca models. diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 1d05f1391..86e03a80b 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -50,7 +50,7 @@ static void quantize_stats_print_usage(int /*argc*/, char ** argv) { fprintf(stderr, "options:\n"); fprintf(stderr, " -h, --help show this help message and exit\n"); fprintf(stderr, " -m FNAME, --model FNAME\n"); - fprintf(stderr, " model path (default: %s)\n", params.model.c_str()); + fprintf(stderr, " model path (default: %s)\n", DEFAULT_MODEL_PATH); fprintf(stderr, " -r, --reference\n"); fprintf(stderr, " use reference implementation (default: false)\n"); fprintf(stderr, " -v, --verbose\n"); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index cf57e96e9..9a822b709 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2332,7 +2332,7 @@ static void server_print_usage(const char * argv0, const gpt_params & params, co printf(" disable KV offload\n"); } printf(" -m FNAME, --model FNAME\n"); - printf(" model path (default: %s)\n", params.model.c_str()); + printf(" model path (default: models/$filename with filename from --hf-file or --model-url if set, otherwise %s)\n", DEFAULT_MODEL_PATH); printf(" -mu MODEL_URL, --model-url MODEL_URL\n"); printf(" model download url (default: unused)\n"); printf(" -hfr REPO, --hf-repo REPO\n");