args: define DEFAULT_MODEL_PATH + update cli docs

2024-04-26 14:33:59 +01:00 · 2024-04-26 14:33:59 +01:00 · e55dfde3b0
commit e55dfde3b0
parent 9c0db4dd9d
5 changed files with 8 additions and 6 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -1329,7 +1329,7 @@ void gpt_params_handle_model_default(gpt_params & params) {
            params.model =  "models/" + f;
        }
    } else if (params.model.empty()) {
-        params.model =  "models/7B/ggml-model-f16.gguf";
+        params.model = DEFAULT_MODEL_PATH;
    }
 }

@ -1554,7 +1554,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
    printf("  --control-vector-layer-range START END\n");
    printf("                        layer range to apply the control vector(s) to, start and end inclusive\n");
    printf("  -m FNAME, --model FNAME\n");
-    printf("                        model path (default: %s)\n", params.model.c_str());
+    printf("                        model path (default: models/$filename with filename from --hf-file or --model-url if set, otherwise %s)\n", DEFAULT_MODEL_PATH);
    printf("  -md FNAME, --model-draft FNAME\n");
    printf("                        draft model for speculative decoding (default: unused)\n");
    printf("  -mu MODEL_URL, --model-url MODEL_URL\n");
@ -2660,7 +2660,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
    fprintf(stream, "mirostat_ent: %f # default: 5.0\n", sparams.mirostat_tau);
    fprintf(stream, "mirostat_lr: %f # default: 0.1\n", sparams.mirostat_eta);
    fprintf(stream, "mlock: %s # default: false\n", params.use_mlock ? "true" : "false");
-    fprintf(stream, "model: %s # default: models/7B/ggml-model.bin\n", params.model.c_str());
+    fprintf(stream, "model: %s # default: %s\n", params.model.c_str(), DEFAULT_MODEL_PATH);
    fprintf(stream, "model_draft: %s # default:\n", params.model_draft.c_str());
    fprintf(stream, "multiline_input: %s # default: false\n", params.multiline_input ? "true" : "false");
    fprintf(stream, "n_gpu_layers: %d # default: -1\n", params.n_gpu_layers);
--- a/common/common.h
+++ b/common/common.h
@ -31,6 +31,8 @@
    fprintf(stderr, "%s: built with %s for %s\n", __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET);    \
 } while(0)

+#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf"
+
 // build info
 extern int LLAMA_BUILD_NUMBER;
 extern char const *LLAMA_COMMIT;
--- a/examples/main/README.md
+++ b/examples/main/README.md
@ -66,7 +66,7 @@ main.exe -m models\7B\ggml-model.bin --ignore-eos -n -1 --random-prompt

 In this section, we cover the most commonly used options for running the `main` program with the LLaMA models:

-   `-m FNAME, --model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.bin`).
+-   `-m FNAME, --model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.gguf`; inferred from `--model-url` if set).
 -   `-mu MODEL_URL --model-url MODEL_URL`: Specify a remote http url to download the file (e.g https://huggingface.co/ggml-org/models/resolve/main/phi-2/ggml-model-q4_0.gguf).
 -   `-i, --interactive`: Run the program in interactive mode, allowing you to provide input directly and receive real-time responses.
 -   `-ins, --instruct`: Run the program in instruction mode, which is particularly useful when working with Alpaca models.
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@ -50,7 +50,7 @@ static void quantize_stats_print_usage(int /*argc*/, char ** argv) {
    fprintf(stderr, "options:\n");
    fprintf(stderr, "  -h, --help            show this help message and exit\n");
    fprintf(stderr, "  -m FNAME, --model FNAME\n");
-    fprintf(stderr, "                        model path (default: %s)\n", params.model.c_str());
+    fprintf(stderr, "                        model path (default: %s)\n", DEFAULT_MODEL_PATH);
    fprintf(stderr, "  -r, --reference\n");
    fprintf(stderr, "                        use reference implementation (default: false)\n");
    fprintf(stderr, "  -v, --verbose\n");
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -2332,7 +2332,7 @@ static void server_print_usage(const char * argv0, const gpt_params & params, co
        printf("                            disable KV offload\n");
    }
    printf("  -m FNAME, --model FNAME\n");
-    printf("                            model path (default: %s)\n", params.model.c_str());
+    printf("                            model path (default: models/$filename with filename from --hf-file or --model-url if set, otherwise %s)\n", DEFAULT_MODEL_PATH);
    printf("  -mu MODEL_URL, --model-url MODEL_URL\n");
    printf("                            model download url (default: unused)\n");
    printf("  -hfr REPO, --hf-repo REPO\n");