From 961bd19da102c8dec63a23acc01976bb84ed2565 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Fri, 6 Sep 2024 13:42:20 +0200
Subject: [PATCH] add comments

---
 common/common.cpp | 22 +++++++++++++-------
 common/common.h   | 52 +++++++++++++++++++++++++++--------------------
 2 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/common/common.cpp b/common/common.cpp
index d04fc5f7f..d8d2caac3 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -456,6 +456,11 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params, std::vecto
         params.kv_overrides.back().key[0] = 0;
     }
 
+    if (params.seed == LLAMA_DEFAULT_SEED) {
+        params.seed = time(NULL);
+        sparams.seed = params.seed;
+    }
+
     return true;
 }
 
@@ -468,7 +473,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params, std::vector<l
             return false;
         }
         if (params.usage) {
-            gpt_params_print_usage(options);
+            gpt_params_print_usage(params, options);
             if (params.print_usage) {
                 params.print_usage(argc, argv);
             }
@@ -612,7 +617,7 @@ std::string llama_arg::to_string() {
     return ss.str();
 }
 
-void gpt_params_print_usage(std::vector<llama_arg> & options) {
+void gpt_params_print_usage(gpt_params & params, std::vector<llama_arg> & options) {
     auto print_options = [](std::vector<llama_arg *> & options) {
         for (llama_arg * opt : options) {
             printf("%s", opt->to_string().c_str());
@@ -622,14 +627,16 @@ void gpt_params_print_usage(std::vector<llama_arg> & options) {
     std::vector<llama_arg *> common_options;
     std::vector<llama_arg *> specific_options;
     for (auto & opt : options) {
-        if (opt.in_example(LLAMA_EXAMPLE_COMMON)) {
-            common_options.push_back(&opt);
-        } else {
+        // in case multiple LLAMA_EXAMPLE_* are set, we prioritize the LLAMA_EXAMPLE_* matching current example
+        if (opt.in_example(params.curr_ex)) {
             specific_options.push_back(&opt);
+        } else {
+            common_options.push_back(&opt);
         }
     }
     printf("----- common options -----\n\n");
     print_options(common_options);
+    // TODO: maybe convert enum llama_example to string
     printf("\n\n----- example-specific options -----\n\n");
     print_options(specific_options);
 }
@@ -641,6 +648,7 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
 std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example ex, std::function<void(int, char **)> print_usage) {
     std::vector<llama_arg> options;
     params.print_usage = print_usage;
+    params.curr_ex     = ex;
     llama_sampling_params & sparams = params.sparams;
 
     std::string sampler_type_chars;
@@ -1772,14 +1780,14 @@ std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example
         [&params](std::string value) {
             params.lora_adapters.push_back({ std::string(value), 1.0 });
         }
-    ));
+    ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}));
     add_opt(llama_arg(
         {"--lora-scaled"}, "FNAME", "SCALE",
         "path to LoRA adapter with user defined scaling (can be repeated to use multiple adapters)",
         [&params](std::string fname, std::string scale) {
             params.lora_adapters.push_back({ fname, std::stof(scale) });
         }
-    ));
+    ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}));
     add_opt(llama_arg(
         {"--control-vector"}, "FNAME",
         "add a control vector\nnote: this argument can be repeated to add multiple control vectors",
diff --git a/common/common.h b/common/common.h
index 7536120fc..8f5e3a96a 100644
--- a/common/common.h
+++ b/common/common.h
@@ -63,6 +63,24 @@ int32_t cpu_get_num_math();
 // CLI argument parsing
 //
 
+enum llama_example {
+    LLAMA_EXAMPLE_COMMON,
+    LLAMA_EXAMPLE_SPECULATIVE,
+    LLAMA_EXAMPLE_MAIN,
+    LLAMA_EXAMPLE_INFILL,
+    LLAMA_EXAMPLE_EMBEDDING,
+    LLAMA_EXAMPLE_PERPLEXITY,
+    LLAMA_EXAMPLE_RETRIEVAL,
+    LLAMA_EXAMPLE_PASSKEY,
+    LLAMA_EXAMPLE_IMATRIX,
+    LLAMA_EXAMPLE_BENCH,
+    LLAMA_EXAMPLE_SERVER,
+    LLAMA_EXAMPLE_CVECTOR_GENERATOR,
+    LLAMA_EXAMPLE_EXPORT_LORA,
+
+    LLAMA_EXAMPLE_COUNT,
+};
+
 // dimensionality reduction methods, used by cvector-generator
 enum dimre_method {
     DIMRE_METHOD_PCA,
@@ -79,6 +97,7 @@ struct cpu_params {
 };
 
 struct gpt_params {
+    enum llama_example curr_ex    = LLAMA_EXAMPLE_COMMON;
     uint32_t seed                 = LLAMA_DEFAULT_SEED; // RNG seed
 
     int32_t n_predict             =    -1; // new tokens to predict
@@ -125,7 +144,7 @@ struct gpt_params {
     // // sampling parameters
     struct llama_sampling_params sparams;
 
-    std::string model                = "model.gguf"; // model path
+    std::string model                = ""; // model path
     std::string model_draft          = ""; // draft model for speculative decoding
     std::string model_alias          = "unknown"; // model alias
     std::string model_url            = ""; // model url to download
@@ -280,24 +299,6 @@ struct gpt_params {
     std::string lora_outfile = "ggml-lora-merged-f16.gguf";
 };
 
-enum llama_example {
-    LLAMA_EXAMPLE_COMMON,
-    LLAMA_EXAMPLE_SPECULATIVE,
-    LLAMA_EXAMPLE_MAIN,
-    LLAMA_EXAMPLE_INFILL,
-    LLAMA_EXAMPLE_EMBEDDING,
-    LLAMA_EXAMPLE_PERPLEXITY,
-    LLAMA_EXAMPLE_RETRIEVAL,
-    LLAMA_EXAMPLE_PASSKEY,
-    LLAMA_EXAMPLE_IMATRIX,
-    LLAMA_EXAMPLE_BENCH,
-    LLAMA_EXAMPLE_SERVER,
-    LLAMA_EXAMPLE_CVECTOR_GENERATOR,
-    LLAMA_EXAMPLE_EXPORT_LORA,
-
-    LLAMA_EXAMPLE_COUNT,
-};
-
 struct llama_arg {
     std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
     std::vector<std::string> args;
@@ -352,11 +353,18 @@ struct llama_arg {
     std::string to_string();
 };
 
+// initialize list of options (arguments) that can be used by the current example
 std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example ex);
+// optionally, we can provide "print_usage" to print example usage
 std::vector<llama_arg> gpt_params_parser_init(gpt_params & params, llama_example ex, std::function<void(int, char **)> print_usage);
-bool gpt_params_parse      (int argc, char ** argv, gpt_params & params, std::vector<llama_arg> & options);
-bool gpt_params_parse_ex   (int argc, char ** argv, gpt_params & params, std::vector<llama_arg> & options);
-void gpt_params_print_usage(std::vector<llama_arg> & options);
+
+// parse input arguments from CLI
+// if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message)
+bool gpt_params_parse   (int argc, char ** argv, gpt_params & params, std::vector<llama_arg> & options);
+bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params, std::vector<llama_arg> & options);
+
+// print full usage message; it will be called internally by gpt_params_parse() if "-h" is set
+void gpt_params_print_usage(gpt_params & params, std::vector<llama_arg> & options);
 
 void gpt_params_handle_model_default(gpt_params & params);