migrated

2024-09-05 15:55:44 +02:00 · 2024-09-05 15:55:44 +02:00 · 9ae4d8a96d
commit 9ae4d8a96d
parent 6a3a2fcc5b
26 changed files with 1828 additions and 1770 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
--- a/common/common.h
+++ b/common/common.h
@ -170,6 +170,7 @@ struct gpt_params {
    bool   kl_divergence    = false; // compute KL divergence
    std::function<void(int, char **)> print_usage = nullptr; // print example-specific usage and example
    bool usage             = false; // print usage
    bool use_color         = false; // use color to distinguish generations and inputs
    bool special           = false; // enable special token output
@ -279,73 +280,67 @@ struct gpt_params {
 };
 enum llama_example {
-    LLAMA_EXAMPLE_ALL,
+    LLAMA_EXAMPLE_COMMON,
-    LLAMA_EXAMPLE_SERVER,
+    LLAMA_EXAMPLE_SPECULATIVE,
    LLAMA_EXAMPLE_MAIN,
    LLAMA_EXAMPLE_INFILL,
    LLAMA_EXAMPLE_EMBEDDING,
    LLAMA_EXAMPLE_PERPLEXITY,
    LLAMA_EXAMPLE_RETRIEVAL,
    LLAMA_EXAMPLE_PASSKEY,
    LLAMA_EXAMPLE_IMATRIX,
    LLAMA_EXAMPLE_BENCH,
    LLAMA_EXAMPLE_SERVER,
    LLAMA_EXAMPLE_CVECTOR_GENERATOR,
    LLAMA_EXAMPLE_EXPORT_LORA,
    LLAMA_EXAMPLE_COUNT,
 };
 struct llama_arg {
-    std::set<enum llama_example> examples = {LLAMA_EXAMPLE_ALL};
+    std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
    std::vector<std::string> args;
-    std::string value_ex;
+    std::string value_hint; // help text or example for arg value
    std::string value_hint_2; // for second arg value
    std::string env;
    std::string help;
-    std::function<bool(void)>        handler_void   = nullptr;
+    std::function<void(void)>                     handler_void    = nullptr;
-    std::function<bool(std::string)> handler_string = nullptr;
+    std::function<void(std::string)>              handler_string  = nullptr;
-    std::function<bool(bool)>        handler_bool   = nullptr;
+    std::function<void(std::string, std::string)> handler_str_str = nullptr;
-    std::function<bool(int)>         handler_int    = nullptr;
+    std::function<void(int)>                      handler_int     = nullptr;
    std::function<bool(float)>       handler_float  = nullptr;
-    llama_arg(std::vector<std::string> args, std::string help, std::function<bool(std::string)> handler) : args(args), help(help), handler_string(handler) {}
+    llama_arg(std::vector<std::string> args, std::string value_hint, std::string help, std::function<void(std::string)> handler) : args(args), value_hint(value_hint), help(help), handler_string(handler) {}
-    llama_arg(std::vector<std::string> args, std::string help, std::function<bool(bool)> handler) : args(args), help(help), handler_bool(handler) {}
+    llama_arg(std::vector<std::string> args, std::string value_hint, std::string help, std::function<void(int)> handler) : args(args), value_hint(value_hint), help(help), handler_int(handler) {}
-    llama_arg(std::vector<std::string> args, std::string help, std::function<bool(void)> handler) : args(args), help(help), handler_void(handler) {}
+    llama_arg(std::vector<std::string> args, std::string help, std::function<void(void)> handler) : args(args), help(help), handler_void(handler) {}
-    llama_arg & set_examples(std::set<enum llama_example> _examples) {
+    // support 2 values for arg
-        examples = std::move(_examples);
+    llama_arg(std::vector<std::string> args, std::string value_hint, std::string value_hint_2, std::string help, std::function<void(std::string, std::string)> handler) : args(args), value_hint(value_hint), value_hint_2(value_hint_2), help(help), handler_str_str(handler) {}
    llama_arg & set_examples(std::set<enum llama_example> examples) {
        this->examples = std::move(examples);
        return *this;
    }
-    llama_arg & set_value_ex(std::string _value_ex) {
+    llama_arg & set_env(std::string env) {
-        value_ex = std::move(_value_ex);
+        this->env = std::move(env);
        return *this;
    }
-    llama_arg & set_env(std::string _env) {
+    bool in_example(enum llama_example ex) {
-        env = _env;
+        return examples.find(ex) != examples.end();
        return *this;
    }
    // utility function
    static std::vector<std::string> break_str_into_lines(std::string input, size_t max_char_per_line) {
        std::vector<std::string> result;
        std::istringstream iss(input);
        std::string word, line;
        while (iss >> word) {
            if (line.length() + !line.empty() + word.length() > max_char_per_line) {
                if (!line.empty()) result.push_back(line);
                line = word;
            } else {
                line += (!line.empty() ? " " : "") + word;
            }
        }
        if (!line.empty()) result.push_back(line);
        return result;
    }
 };
-std::vector<llama_arg> gpt_params_parser_register(gpt_params & params);
+std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example ex);
-bool gpt_params_parser_run(int argc, char ** argv, std::vector<llama_arg> & options);
+std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example ex, std::function<void(int, char **)> print_usage);
 bool gpt_params_parse      (int argc, char ** argv, gpt_params & params, std::vector<llama_arg> & options);
 bool gpt_params_parse_ex   (int argc, char ** argv, gpt_params & params, std::vector<llama_arg> & options);
 void gpt_params_print_usage(std::vector<llama_arg> & options);
 void gpt_params_parse_from_env(gpt_params & params);
 void gpt_params_handle_model_default(gpt_params & params);
 bool gpt_params_parse_ex   (int argc, char ** argv, gpt_params & params);
 bool gpt_params_parse      (int argc, char ** argv, gpt_params & params);
 bool gpt_params_find_arg   (int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param);
 void gpt_params_print_usage(int argc, char ** argv, const gpt_params & params);
 std::string gpt_params_get_system_info(const gpt_params & params);
 bool parse_cpu_range(const std::string& range, bool(&boolmask)[GGML_MAX_N_THREADS]);
--- a/examples/batched-bench/batched-bench.cpp
+++ b/examples/batched-bench/batched-bench.cpp
@ -28,9 +28,7 @@ static std::vector<int> parse_list(char * p) {
    return ret;
 }
-static void print_usage(int argc, char ** argv, const gpt_params & params) {
+static void print_usage(int, char ** argv) {
    gpt_params_print_usage(argc, argv, params);
    LOG_TEE("\nexample usage:\n");
    LOG_TEE("\n    %s -m model.gguf -c 2048 -b 2048 -ub 512 -npp 128,256,512 -ntg 128,256 -npl 1,2,4,8,16,32 [-pps]\n", argv[0]);
    LOG_TEE("\n");
@ -39,8 +37,8 @@ static void print_usage(int argc, char ** argv, const gpt_params & params) {
 int main(int argc, char ** argv) {
    gpt_params params;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_BENCH, print_usage);
-        print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
--- a/examples/batched/batched.cpp
+++ b/examples/batched/batched.cpp
@ -7,9 +7,7 @@
 #include <string>
 #include <vector>
-static void print_usage(int argc, char ** argv, const gpt_params & params) {
+static void print_usage(int, char ** argv) {
    gpt_params_print_usage(argc, argv, params);
    LOG_TEE("\nexample usage:\n");
    LOG_TEE("\n    %s -m model.gguf -p \"Hello my name is\" -n 32 -np 4\n", argv[0]);
    LOG_TEE("\n");
@ -21,8 +19,8 @@ int main(int argc, char ** argv) {
    params.prompt = "Hello my name is";
    params.n_predict = 32;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON, print_usage);
-        print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
--- a/examples/cvector-generator/cvector-generator.cpp
+++ b/examples/cvector-generator/cvector-generator.cpp
@ -35,9 +35,7 @@ static std::string tokens_to_str(llama_context * ctx, Iter begin, Iter end) {
    return ret;
 }
-static void print_usage(int argc, char ** argv, const gpt_params & params) {
+static void print_usage(int, char ** argv) {
    gpt_params_print_usage(argc, argv, params);
    printf("\nexample usage:\n");
    printf("\n    CPU only:   %s -m ./llama-3.Q4_K_M.gguf\n", argv[0]);
    printf("\n    with GPU:   %s -m ./llama-3.Q4_K_M.gguf -ngl 99\n", argv[0]);
@ -390,8 +388,8 @@ static int prepare_entries(gpt_params & params, train_context & ctx_train) {
 int main(int argc, char ** argv) {
    gpt_params params;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_CVECTOR_GENERATOR, print_usage);
-        print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@ -79,8 +79,8 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu
 int main(int argc, char ** argv) {
    gpt_params params;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_EMBEDDING);
-        gpt_params_print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
--- a/examples/eval-callback/eval-callback.cpp
+++ b/examples/eval-callback/eval-callback.cpp
@ -144,8 +144,8 @@ int main(int argc, char ** argv) {
    gpt_params params;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON);
-        gpt_params_print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
--- a/examples/export-lora/export-lora.cpp
+++ b/examples/export-lora/export-lora.cpp
@ -391,9 +391,7 @@ struct lora_merge_ctx {
    }
 };
-static void print_usage(int argc, char ** argv, const gpt_params & params) {
+static void print_usage(int, char ** argv) {
    gpt_params_print_usage(argc, argv, params);
    printf("\nexample usage:\n");
    printf("\n  %s -m base-model.gguf --lora lora-file.gguf -o merged-model-f16.gguf\n", argv[0]);
    printf("\nNOTE: output model is F16\n");
@ -403,8 +401,8 @@ static void print_usage(int argc, char ** argv, const gpt_params & params) {
 int main(int argc, char ** argv) {
    gpt_params params;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_EXPORT_LORA, print_usage);
-        print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
--- a/examples/gritlm/gritlm.cpp
+++ b/examples/gritlm/gritlm.cpp
@ -157,8 +157,8 @@ static std::string gritlm_instruction(const std::string & instruction) {
 int main(int argc, char * argv[]) {
    gpt_params params;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON);
-        gpt_params_print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@ -17,9 +17,7 @@
 #pragma warning(disable: 4244 4267) // possible loss of data
 #endif
-static void print_usage(int argc, char ** argv, const gpt_params & params) {
+static void print_usage(int, char ** argv) {
    gpt_params_print_usage(argc, argv, params);
    LOG_TEE("\nexample usage:\n");
    LOG_TEE("\n    %s \\\n"
            "       -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] [--verbosity 1] \\\n"
@ -579,8 +577,8 @@ int main(int argc, char ** argv) {
    params.logits_all = true;
    params.verbosity = 1;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON, print_usage);
-        print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
--- a/examples/infill/infill.cpp
+++ b/examples/infill/infill.cpp
@ -106,8 +106,8 @@ int main(int argc, char ** argv) {
    llama_sampling_params & sparams = params.sparams;
    g_params = &params;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_INFILL);
-        gpt_params_print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
--- a/examples/llava/llava-cli.cpp
+++ b/examples/llava/llava-cli.cpp
@ -112,9 +112,7 @@ struct llava_context {
    struct llama_model * model = NULL;
 };
-static void print_usage(int argc, char ** argv, const gpt_params & params) {
+static void print_usage(int, char ** argv) {
    gpt_params_print_usage(argc, argv, params);
    LOG_TEE("\n example usage:\n");
    LOG_TEE("\n     %s -m <llava-v1.5-7b/ggml-model-q5_k.gguf> --mmproj <llava-v1.5-7b/mmproj-model-f16.gguf> --image <path/to/an/image.jpg> --image <path/to/another/image.jpg> [--temp 0.1] [-p \"describe the image in detail.\"]\n", argv[0]);
    LOG_TEE("\n note: a lower temperature value like 0.1 is recommended for better quality.\n");
@ -280,8 +278,8 @@ int main(int argc, char ** argv) {
    gpt_params params;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON, print_usage);
-        print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
@ -293,7 +291,7 @@ int main(int argc, char ** argv) {
 #endif // LOG_DISABLE_LOGS
    if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
-        print_usage(argc, argv, {});
+        print_usage(argc, argv);
        return 1;
    }
    auto model = llava_init(&params);
--- a/examples/llava/minicpmv-cli.cpp
+++ b/examples/llava/minicpmv-cli.cpp
@ -253,8 +253,8 @@ int main(int argc, char ** argv) {
    gpt_params params;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON, show_additional_info);
-        show_additional_info(argc, argv);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
@ -266,7 +266,6 @@ int main(int argc, char ** argv) {
 #endif // LOG_DISABLE_LOGS
    if (params.mmproj.empty() || (params.image.empty())) {
        gpt_params_print_usage(argc, argv, params);
        show_additional_info(argc, argv);
        return 1;
    }
--- a/examples/lookahead/lookahead.cpp
+++ b/examples/lookahead/lookahead.cpp
@ -37,8 +37,8 @@ struct ngram_container {
 int main(int argc, char ** argv) {
    gpt_params params;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON);
-        gpt_params_print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
--- a/examples/lookup/lookup-create.cpp
+++ b/examples/lookup/lookup-create.cpp
@ -13,8 +13,8 @@
 int main(int argc, char ** argv){
    gpt_params params;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON);
-        gpt_params_print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
--- a/examples/lookup/lookup-stats.cpp
+++ b/examples/lookup/lookup-stats.cpp
@ -15,8 +15,8 @@
 int main(int argc, char ** argv){
    gpt_params params;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON);
-        gpt_params_print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
--- a/examples/lookup/lookup.cpp
+++ b/examples/lookup/lookup.cpp
@ -14,8 +14,8 @@
 int main(int argc, char ** argv){
    gpt_params params;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON);
-        gpt_params_print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -131,12 +131,9 @@ static std::string chat_add_and_format(struct llama_model * model, std::vector<l
 int main(int argc, char ** argv) {
    gpt_params params;
    g_params = &params;
-    auto options = gpt_params_parser_register(params);
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_MAIN);
    gpt_params_parser_run(argc, argv, options);
    return 0;
-    if (!gpt_params_parse(argc, argv, params)) {
+    if (!gpt_params_parse(argc, argv, params, options)) {
        gpt_params_print_usage(argc, argv, params);
        return 1;
    }
--- a/examples/parallel/parallel.cpp
+++ b/examples/parallel/parallel.cpp
@ -100,8 +100,8 @@ int main(int argc, char ** argv) {
    gpt_params params;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON);
-        gpt_params_print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
--- a/examples/passkey/passkey.cpp
+++ b/examples/passkey/passkey.cpp
@ -6,9 +6,7 @@
 #include <string>
 #include <vector>
-static void print_usage(int argc, char ** argv, const gpt_params & params) {
+static void print_usage(int, char ** argv) {
    gpt_params_print_usage(argc, argv, params);
    LOG_TEE("\nexample usage:\n");
    LOG_TEE("\n    %s -m model.gguf --junk 250 --pos 90 --keep 32 --grp-attn-n 2 [--seed 1234]\n", argv[0]);
    LOG_TEE("\n");
@ -21,8 +19,8 @@ int main(int argc, char ** argv) {
    params.n_keep = 32;
    params.i_pos  = -1;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_PASSKEY, print_usage);
-        print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@ -1967,8 +1967,8 @@ int main(int argc, char ** argv) {
    params.n_ctx = 512;
    params.logits_all = true;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_PERPLEXITY);
-        gpt_params_print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
--- a/examples/retrieval/retrieval.cpp
+++ b/examples/retrieval/retrieval.cpp
@ -4,9 +4,7 @@
 #include <algorithm>
 #include <fstream>
-static void print_usage(int argc, char ** argv, const gpt_params & params) {
+static void print_usage(int, char ** argv) {
    gpt_params_print_usage(argc, argv, params);
    LOG_TEE("\nexample usage:\n");
    LOG_TEE("\n    %s --model ./models/bge-base-en-v1.5-f16.gguf --top-k 3 --context-file README.md --context-file License --chunk-size 100 --chunk-separator .\n", argv[0]);
    LOG_TEE("\n");
@ -113,8 +111,8 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu
 int main(int argc, char ** argv) {
    gpt_params params;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_RETRIEVAL, print_usage);
-        print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
--- a/examples/save-load-state/save-load-state.cpp
+++ b/examples/save-load-state/save-load-state.cpp
@ -10,8 +10,8 @@ int main(int argc, char ** argv) {
    params.prompt = "The quick brown fox";
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON);
-        gpt_params_print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -2491,14 +2491,11 @@ int main(int argc, char ** argv) {
    // own arguments required by this example
    gpt_params params;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_SERVER);
-        gpt_params_print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
    // parse arguments from environment variables
    gpt_params_parse_from_env(params);
    // TODO: not great to use extern vars
    server_log_json = params.log_json;
    server_verbose = params.verbosity > 0;
--- a/examples/simple/simple.cpp
+++ b/examples/simple/simple.cpp
@ -6,9 +6,7 @@
 #include <string>
 #include <vector>
-static void print_usage(int argc, char ** argv, const gpt_params & params) {
+static void print_usage(int, char ** argv) {
    gpt_params_print_usage(argc, argv, params);
    LOG_TEE("\nexample usage:\n");
    LOG_TEE("\n    %s -m model.gguf -p \"Hello my name is\" -n 32\n", argv[0]);
    LOG_TEE("\n");
@ -20,8 +18,8 @@ int main(int argc, char ** argv) {
    params.prompt = "Hello my name is";
    params.n_predict = 32;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_COMMON, print_usage);
-        print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }
--- a/examples/speculative/speculative.cpp
+++ b/examples/speculative/speculative.cpp
@ -27,8 +27,8 @@ struct seq_draft {
 int main(int argc, char ** argv) {
    gpt_params params;
-    if (!gpt_params_parse(argc, argv, params)) {
+    auto options = gpt_params_parser_init(params, LLAMA_EXAMPLE_SPECULATIVE);
-        gpt_params_print_usage(argc, argv, params);
+    if (!gpt_params_parse(argc, argv, params, options)) {
        return 1;
    }