From 679f5137f88ebfbc1aa2e8160cfe0c9633982e21 Mon Sep 17 00:00:00 2001
From: ngxson <thichthat@gmail.com>
Date: Wed, 12 Jun 2024 15:58:20 +0200
Subject: [PATCH] move param parser to common

---
 common/common.cpp                             |  59 +++++
 common/common.h                               |   9 +
 examples/control-vector-generator/README.md   |   9 +-
 .../control-vector-generator.cpp              | 212 +++---------------
 4 files changed, 99 insertions(+), 190 deletions(-)

diff --git a/common/common.cpp b/common/common.cpp
index 1591790e6..7dfd55dc0 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1576,6 +1576,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
             return true;
         }
         params.out_file = argv[i];
+        params.cvector_outfile = argv[i];
         return true;
     }
     if (arg == "-ofreq" || arg == "--output-frequency") {
@@ -1610,6 +1611,55 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
         params.i_chunk = std::stoi(argv[i]);
         return true;
     }
+    // control-vector-generator params
+    if (arg == "--completions-file") {
+        if (++i >= argc) {
+            invalid_param = true;
+            return true;
+        }
+        params.cvector_completions_file = argv[i];
+        return true;
+    }
+    if (arg == "--positive-file") {
+        if (++i >= argc) {
+            invalid_param = true;
+            return true;
+        }
+        params.cvector_positive_file = argv[i];
+        return true;
+    }
+    if (arg == "--negative-file") {
+        if (++i >= argc) {
+            invalid_param = true;
+            return true;
+        }
+        params.cvector_negative_file = argv[i];
+        return true;
+    }
+    if (arg == "--num-completions") {
+        if (++i >= argc) {
+            invalid_param = true;
+            return true;
+        }
+        params.n_completions = std::stoi(argv[i]);
+        return true;
+    }
+    if (arg == "--pca-batch") {
+        if (++i >= argc) {
+            invalid_param = true;
+            return true;
+        }
+        params.n_pca_batch = std::stoi(argv[i]);
+        return true;
+    }
+    if (arg == "--pca-iter") {
+        if (++i >= argc) {
+            invalid_param = true;
+            return true;
+        }
+        params.n_pca_iterations = std::stoi(argv[i]);
+        return true;
+    }
 #ifndef LOG_DISABLE_LOGS
     // Parse args for logging parameters
     if (log_param_single_parse(argv[i])) {
@@ -1931,6 +1981,15 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
     options.push_back({ "logging",     "       --log-append",           "Don't truncate the old log file." });
 #endif // LOG_DISABLE_LOGS
 
+    options.push_back({ "control-vector-generator" });
+    options.push_back({ "control-vector-generator", "-o,  --output FNAME",    "output file (default: '%s')", params.cvector_outfile.c_str() });
+    options.push_back({ "control-vector-generator", "--positive-file FNAME",  "positive prompts file, one prompt per line (default: '%s')", params.cvector_positive_file.c_str() });
+    options.push_back({ "control-vector-generator", "--negative-file FNAME",  "negative prompts file, one prompt per line (default: '%s')", params.cvector_negative_file.c_str() });
+    options.push_back({ "control-vector-generator", "--completions-file",     "completions file (default: '%s')", params.cvector_completions_file.c_str() });
+    options.push_back({ "control-vector-generator", "--num-completions N",    "number of lines of completions file to use (default: %d)", params.n_completions });
+    options.push_back({ "control-vector-generator", "--batch-pca N",          "batch size used for PCA. Larger batch runs faster, but uses more memory (default: %d)", params.n_pca_batch });
+    options.push_back({ "control-vector-generator", "--iter-pca N",           "number of iterations used for PCA (default: %d)", params.n_pca_iterations });
+
     printf("usage: %s [options]\n", argv[0]);
 
     for (const auto & o : options) {
diff --git a/common/common.h b/common/common.h
index 2345d855e..fa44859c6 100644
--- a/common/common.h
+++ b/common/common.h
@@ -232,6 +232,15 @@ struct gpt_params {
 
     bool process_output = false; // collect data for the output tensor
     bool compute_ppl    = true;  // whether to compute perplexity
+
+    // control-vector-generator params
+    int n_completions = 64;
+    int n_pca_batch = 20;
+    int n_pca_iterations = 1000;
+    std::string cvector_outfile          = "control_vector.gguf";
+    std::string cvector_completions_file = "examples/control-vector-generator/completions.txt";
+    std::string cvector_positive_file    = "examples/control-vector-generator/positive.txt";
+    std::string cvector_negative_file    = "examples/control-vector-generator/negative.txt";
 };
 
 void gpt_params_handle_model_default(gpt_params & params);
diff --git a/examples/control-vector-generator/README.md b/examples/control-vector-generator/README.md
index b06460778..1ccb05d78 100644
--- a/examples/control-vector-generator/README.md
+++ b/examples/control-vector-generator/README.md
@@ -5,7 +5,7 @@ This example demonstrates how to generate a control vector using gguf models.
 Related PRs:
 - [Add support for control vectors](https://github.com/ggerganov/llama.cpp/pull/5970)
 - (Issue) [Generate control vector using llama.cpp](https://github.com/ggerganov/llama.cpp/issues/6880)
-- [Add control-vector-generator](https://github.com/ggerganov/llama.cpp/pull/7514)
+- [Add control-vector-generator example](https://github.com/ggerganov/llama.cpp/pull/7514)
 
 Example:
 
@@ -14,13 +14,12 @@ Example:
 ./control-vector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf
 
 # With GPU
-./control-vector-generator --num-completions 2 --pca-iter 40 -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99
+./control-vector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99
 
 # With advanced options
-# Please note that the ORDER of arguments does matter
-# example-related options (i.e., --num-completions, --pca-iter) always come before model options (i.e., -m, -ngl)
-./control-vector-generator --num-completions 128 --pca-iter 2000 --batch-pca 100 -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99
+./control-vector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99 --num-completions 128 --pca-iter 2000 --batch-pca 100
 
 # To see help message
 ./control-vector-generator -h
+# Then, have a look at "control-vector-generator" section
 ```
diff --git a/examples/control-vector-generator/control-vector-generator.cpp b/examples/control-vector-generator/control-vector-generator.cpp
index 477592bfd..0e4bb8a02 100644
--- a/examples/control-vector-generator/control-vector-generator.cpp
+++ b/examples/control-vector-generator/control-vector-generator.cpp
@@ -168,6 +168,10 @@ struct train_context {
     int n_embd;
     int n_layers;
 
+    /* pair of prompts to be used for generating final vector */
+    std::vector<std::string> positive_entries;
+    std::vector<std::string> negative_entries;
+
     // each element of the vector correspond to one layer
     // NOTE: the last layer is discard. therefore, we will have (n_layers - 1) elements here
     // NOTE (2): v_diff is transposed from v_diff_tmp
@@ -243,23 +247,6 @@ struct train_context {
     }
 };
 
-struct ctrl_params {
-    /* default meta parameters */
-    int n_completions = 64;
-    int n_pca_batch = 20;
-    int n_pca_iterations = 1000;
-
-    /* default filepaths */
-    std::string outfile = "control_vector.gguf";
-    std::string completions_file = "examples/control-vector-generator/completions.txt";
-    std::string positive_prompts_file = "examples/control-vector-generator/positive.txt";
-    std::string negative_prompts_file = "examples/control-vector-generator/negative.txt";
-
-    /* pair of prompts to be used for generating final vector */
-    std::vector<std::string> positive_entries;
-    std::vector<std::string> negative_entries;
-};
-
 struct tokenized_prompt {
     std::vector<llama_token> tokens_pos;
     std::vector<llama_token> tokens_neg;
@@ -293,148 +280,6 @@ static std::string to_string(const T & val) {
     return ss.str();
 }
 
-static void print_usage(const char * executable) {
-    struct ctrl_params defaults;
-    printf("\n");
-    printf("usage: %s [options] -m <model> [gpt-opts]", executable);
-    printf("\n");
-    printf("Creates a GGUF control vector for a given model.");
-    printf("\n");
-    printf("options:\n");
-    printf("  -h,  --help                 show this help message and exit\n");
-    printf("  -o,  --outfile FNAME        output file\n");
-    printf("                                default: %s\n", defaults.outfile.c_str());
-    printf("  -pf, --positive-file FNAME  positive prompts file, one prompt per line\n");
-    printf("                                default: %s\n", defaults.positive_prompts_file.c_str());
-    printf("  -nf, --negative-file FNAME  negative prompts file, one prompt per line\n");
-    printf("                                default: %s\n", defaults.negative_prompts_file.c_str());
-    printf("  -cf, --completions-file     completions file\n");
-    printf("                                default: %s\n", defaults.completions_file.c_str());
-    printf("  -nc, --num-completions N    number of lines of completions file to use\n");
-    printf("                                default: %d\n", defaults.n_completions);
-    printf("  --batch-pca N               batch size used for PCA. Larger batch runs faster, but uses more memory\n");
-    printf("                                default: %d\n", defaults.n_pca_batch);
-    printf("  --iter-pca N                number of iterations used for PCA\n");
-    printf("                                default: %d\n", defaults.n_pca_iterations);
-    printf("\n");
-    printf("gpt-opts:\n");
-    printf("  -m, --model  FNAME          path to model file\n");
-    printf("  -ngl,  --gpu-layers N       number of layers to offload to GPU\n");
-    printf("  ...other options from main\n");
-    printf("\n");
-}
-
-static int ctrlvec_params_parse_ex(int argc, char ** argv, ctrl_params & params) {
-    std::string arg;
-    const std::string arg_prefix = "-";
-    // hack to skip ctrlvec args in gpt_parse_params but we'll leave it as is
-    int skipme = 0;
-
-    for(int arg_idx = 1; arg_idx < argc; ++arg_idx) {
-        arg = argv[arg_idx];
-        if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
-            std::replace(arg.begin(), arg.end(), '_', '-');
-        }
-
-        if (arg == "-h" || arg == "--help") {
-            print_usage(argv[0]);
-            exit(0);
-        }
-        if (arg == "--version") {
-            fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT);
-            fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
-            exit(0);
-        }
-        if (arg == "--outfile" || arg == "-o") {
-            if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
-                params.outfile = argv[arg_idx];
-                skipme += 2;
-            } else {
-                throw std::invalid_argument("error: missing argument for " + arg);
-            }
-        }
-        if (arg == "--completions-file" || arg == "-cf") {
-            if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
-                params.completions_file = argv[arg_idx];
-                skipme += 2;
-            } else {
-                throw std::invalid_argument("error: missing argument for " + arg);
-            }
-        }
-        if (arg == "--positive-file" || arg == "-pf") {
-            if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
-                params.positive_prompts_file = argv[arg_idx];
-                skipme += 2;
-            } else {
-                throw std::invalid_argument("error: missing argument for " + arg);
-            }
-        }
-        if (arg == "--negative-file" || arg == "-nf") {
-            if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
-                params.negative_prompts_file = argv[arg_idx];
-                skipme += 2;
-            } else {
-                throw std::invalid_argument("error: missing argument for " + arg);
-            }
-        }
-        if (arg == "--num-completions" || arg == "-nc") {
-            if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
-                try {
-                    params.n_completions = std::stoi(argv[arg_idx]);
-                }
-                catch (const std::invalid_argument & ex) {
-                    throw std::invalid_argument("error: invalid argument for " + arg);
-                }
-                skipme += 2;
-            } else {
-                throw std::invalid_argument("error: missing argument for " + arg);
-            }
-        }
-        if (arg == "--pca-batch") {
-            if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
-                try {
-                    params.n_pca_batch = std::stoi(argv[arg_idx]);
-                }
-                catch (const std::invalid_argument & ex) {
-                    throw std::invalid_argument("error: invalid argument for " + arg);
-                }
-                skipme += 2;
-            } else {
-                throw std::invalid_argument("error: missing argument for " + arg);
-            }
-        }
-        if (arg == "--pca-iter") {
-            if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
-                try {
-                    params.n_pca_iterations = std::stoi(argv[arg_idx]);
-                }
-                catch (const std::invalid_argument & ex) {
-                    throw std::invalid_argument("error: invalid argument for " + arg);
-                }
-                skipme += 2;
-            } else {
-                throw std::invalid_argument("error: missing argument for " + arg);
-            }
-        }
-        // TODO it might be nice QoL to have single positive/negative args
-        // we do not handle any other unknown arguments here because they will be handled by gpt_parse_params
-    }
-    return skipme;
-}
-
-static int ctrlvec_params_parse(int argc, char ** argv, ctrl_params & params) {
-    int skipme = 0;
-    try {
-        skipme = ctrlvec_params_parse_ex(argc, argv, params);
-    }
-    catch (const std::invalid_argument & ex) {
-        fprintf(stderr, "%s\n", ex.what());
-        print_usage(argv[0]);
-        exit(EXIT_FAILURE);
-    }
-    return skipme;
-}
-
 static std::vector<std::string> ctrlvec_load_prompt_file(std::string path, bool skip_empty_lines = false) {
     std::vector<std::string> output;
     std::ifstream file(path);
@@ -508,10 +353,10 @@ static void export_gguf(const std::vector<struct ggml_tensor *> & v_ctrl, const
  * Load prompt files and completion file.
  * Then format each pair of prompt + completion to make an entry.
  */
-static int prepare_entries(ctrl_params & cparams) {
+static int prepare_entries(gpt_params & params, train_context & ctx_train) {
     // load prompts
-    std::vector<std::string> positive_prompts = ctrlvec_load_prompt_file(cparams.positive_prompts_file);
-    std::vector<std::string> negative_prompts = ctrlvec_load_prompt_file(cparams.negative_prompts_file);
+    std::vector<std::string> positive_prompts = ctrlvec_load_prompt_file(params.cvector_positive_file);
+    std::vector<std::string> negative_prompts = ctrlvec_load_prompt_file(params.cvector_negative_file);
     if (positive_prompts.size() != negative_prompts.size()) {
         fprintf(stderr, "number of positive and negative prompts must be equal\n");
         return 1;
@@ -522,7 +367,7 @@ static int prepare_entries(ctrl_params & cparams) {
     }
 
     // create templated prompts
-    std::vector<std::string> completions = ctrlvec_load_prompt_file(cparams.completions_file, false);
+    std::vector<std::string> completions = ctrlvec_load_prompt_file(params.cvector_completions_file, false);
     auto format_template = [](std::string persona, std::string suffix) {
         //const std::string user_tag = "[INST]";
         //const std::string asst_tag = "[/INST]";
@@ -531,34 +376,28 @@ static int prepare_entries(ctrl_params & cparams) {
         return persona + " " + suffix; // entry in positive/negative.txt must already be formatted i.e. "[INST] Act as if you're extremely happy. [/INST]"
     };
     for (size_t i = 0; i < positive_prompts.size(); ++i) {
-        for (int j = 0; j < std::min((int) completions.size(), cparams.n_completions); ++j) {
+        for (int j = 0; j < std::min((int) completions.size(), params.n_completions); ++j) {
             // TODO replicate the truncations done by the python implementation
-            cparams.positive_entries.push_back(format_template(positive_prompts[i], completions[j]));
-            cparams.negative_entries.push_back(format_template(negative_prompts[i], completions[j]));
+            ctx_train.positive_entries.push_back(format_template(positive_prompts[i], completions[j]));
+            ctx_train.negative_entries.push_back(format_template(negative_prompts[i], completions[j]));
         }
     }
     return 0;
 }
 
 int main(int argc, char ** argv) {
-    ctrl_params cparams;
-
-    int skipme = ctrlvec_params_parse(argc, argv, cparams);
-    argc -= skipme;
-    argv += skipme;
-
     gpt_params params;
+
     if (!gpt_params_parse(argc, argv, params)) {
+        gpt_params_print_usage(argc, argv, params);
         return 1;
     }
 
-    if (cparams.n_pca_iterations % cparams.n_pca_batch != 0) {
+    if (params.n_pca_iterations % params.n_pca_batch != 0) {
         fprintf(stderr, "PCA iterations must by multiply of PCA batch size\n");
         return 1;
     }
 
-    // load and prepare entries for training
-    prepare_entries(cparams);
 
     callback_data cb_data;
 
@@ -584,27 +423,30 @@ int main(int argc, char ** argv) {
     char model_hint[128];
     llama_model_meta_val_str(model, "general.architecture", model_hint, 128);
 
+    // init train_context
+    train_context ctx_train(n_embd, n_layers);
+
+    // load and prepare entries for training
+    prepare_entries(params, ctx_train);
+
     // we have to pretokenize everything because otherwise we don't know how much overhead to allocate ctx_diffs_wrapped
     std::vector<tokenized_prompt> tokenized_prompts;
     size_t n_total_tokens = 0;
-    for (size_t i = 0; i < cparams.positive_entries.size(); ++i) {
-        tokenized_prompt t(ctx, cparams.positive_entries[i], cparams.negative_entries[i]);
+    for (size_t i = 0; i < ctx_train.positive_entries.size(); ++i) {
+        tokenized_prompt t(ctx, ctx_train.positive_entries[i], ctx_train.negative_entries[i]);
         n_total_tokens += 2 * t.max_seq_len;
         tokenized_prompts.push_back(std::move(t));
     }
 
     std::cout << "n_total_tokens: " << n_total_tokens << std::endl;
 
-    // init train_context
-    train_context ctx_train(n_embd, n_layers);
-
-    for(size_t i = 0; i < cparams.positive_entries.size(); ++i) {
+    for(size_t i = 0; i < ctx_train.positive_entries.size(); ++i) {
         tokenized_prompt t = tokenized_prompts[i];
         cb_data.n_layers = n_layers;
         cb_data.n_tokens = t.max_seq_len;
 
         printf("Evaluating prompt[%d/%d]: \"%s\" - \"%s\" (%d tokens)\n",
-            (int) i+1, (int) cparams.positive_entries.size(),
+            (int) i+1, (int) ctx_train.positive_entries.size(),
             tokens_to_str(ctx, t.tokens_pos.cbegin(), t.tokens_pos.cend()).c_str(),
             tokens_to_str(ctx, t.tokens_neg.cbegin(), t.tokens_neg.cend()).c_str(),
             (int) t.max_seq_len);
@@ -635,12 +477,12 @@ int main(int argc, char ** argv) {
     // run PCA
     PCA::pca_params pca_params;
     pca_params.n_threads = params.n_threads;
-    pca_params.n_batch = cparams.n_pca_batch;
-    pca_params.n_iterations = cparams.n_pca_iterations;
+    pca_params.n_batch = params.n_pca_batch;
+    pca_params.n_iterations = params.n_pca_iterations;
     PCA::run_pca(pca_params, ctx_train.v_diff, ctx_train.v_final);
 
     // write output vectors to gguf
-    export_gguf(ctx_train.v_final, cparams.outfile, model_hint);
+    export_gguf(ctx_train.v_final, params.cvector_outfile, model_hint);
 
     llama_backend_free();