move param parser to common

2024-06-12 15:58:20 +02:00 · 2024-06-12 15:58:20 +02:00 · 679f5137f8
commit 679f5137f8
parent f54cb8e307
4 changed files with 99 additions and 190 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -1576,6 +1576,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
            return true;
        }
        params.out_file = argv[i];
        params.cvector_outfile = argv[i];
        return true;
    }
    if (arg == "-ofreq" || arg == "--output-frequency") {
@ -1610,6 +1611,55 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
        params.i_chunk = std::stoi(argv[i]);
        return true;
    }
    // control-vector-generator params
    if (arg == "--completions-file") {
        if (++i >= argc) {
            invalid_param = true;
            return true;
        }
        params.cvector_completions_file = argv[i];
        return true;
    }
    if (arg == "--positive-file") {
        if (++i >= argc) {
            invalid_param = true;
            return true;
        }
        params.cvector_positive_file = argv[i];
        return true;
    }
    if (arg == "--negative-file") {
        if (++i >= argc) {
            invalid_param = true;
            return true;
        }
        params.cvector_negative_file = argv[i];
        return true;
    }
    if (arg == "--num-completions") {
        if (++i >= argc) {
            invalid_param = true;
            return true;
        }
        params.n_completions = std::stoi(argv[i]);
        return true;
    }
    if (arg == "--pca-batch") {
        if (++i >= argc) {
            invalid_param = true;
            return true;
        }
        params.n_pca_batch = std::stoi(argv[i]);
        return true;
    }
    if (arg == "--pca-iter") {
        if (++i >= argc) {
            invalid_param = true;
            return true;
        }
        params.n_pca_iterations = std::stoi(argv[i]);
        return true;
    }
 #ifndef LOG_DISABLE_LOGS
    // Parse args for logging parameters
    if (log_param_single_parse(argv[i])) {
@ -1931,6 +1981,15 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
    options.push_back({ "logging",     "       --log-append",           "Don't truncate the old log file." });
 #endif // LOG_DISABLE_LOGS
    options.push_back({ "control-vector-generator" });
    options.push_back({ "control-vector-generator", "-o,  --output FNAME",    "output file (default: '%s')", params.cvector_outfile.c_str() });
    options.push_back({ "control-vector-generator", "--positive-file FNAME",  "positive prompts file, one prompt per line (default: '%s')", params.cvector_positive_file.c_str() });
    options.push_back({ "control-vector-generator", "--negative-file FNAME",  "negative prompts file, one prompt per line (default: '%s')", params.cvector_negative_file.c_str() });
    options.push_back({ "control-vector-generator", "--completions-file",     "completions file (default: '%s')", params.cvector_completions_file.c_str() });
    options.push_back({ "control-vector-generator", "--num-completions N",    "number of lines of completions file to use (default: %d)", params.n_completions });
    options.push_back({ "control-vector-generator", "--batch-pca N",          "batch size used for PCA. Larger batch runs faster, but uses more memory (default: %d)", params.n_pca_batch });
    options.push_back({ "control-vector-generator", "--iter-pca N",           "number of iterations used for PCA (default: %d)", params.n_pca_iterations });
    printf("usage: %s [options]\n", argv[0]);
    for (const auto & o : options) {
--- a/common/common.h
+++ b/common/common.h
@ -232,6 +232,15 @@ struct gpt_params {
    bool process_output = false; // collect data for the output tensor
    bool compute_ppl    = true;  // whether to compute perplexity
    // control-vector-generator params
    int n_completions = 64;
    int n_pca_batch = 20;
    int n_pca_iterations = 1000;
    std::string cvector_outfile          = "control_vector.gguf";
    std::string cvector_completions_file = "examples/control-vector-generator/completions.txt";
    std::string cvector_positive_file    = "examples/control-vector-generator/positive.txt";
    std::string cvector_negative_file    = "examples/control-vector-generator/negative.txt";
 };
 void gpt_params_handle_model_default(gpt_params & params);
--- a/examples/control-vector-generator/README.md
+++ b/examples/control-vector-generator/README.md
@ -5,7 +5,7 @@ This example demonstrates how to generate a control vector using gguf models.
 Related PRs:
 - [Add support for control vectors](https://github.com/ggerganov/llama.cpp/pull/5970)
 - (Issue) [Generate control vector using llama.cpp](https://github.com/ggerganov/llama.cpp/issues/6880)
- [Add control-vector-generator](https://github.com/ggerganov/llama.cpp/pull/7514)
+- [Add control-vector-generator example](https://github.com/ggerganov/llama.cpp/pull/7514)
 Example:
@ -14,13 +14,12 @@ Example:
 ./control-vector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf
 # With GPU
-./control-vector-generator --num-completions 2 --pca-iter 40 -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99
+./control-vector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99
 # With advanced options
-# Please note that the ORDER of arguments does matter
+./control-vector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99 --num-completions 128 --pca-iter 2000 --batch-pca 100
 # example-related options (i.e., --num-completions, --pca-iter) always come before model options (i.e., -m, -ngl)
 ./control-vector-generator --num-completions 128 --pca-iter 2000 --batch-pca 100 -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99
 # To see help message
 ./control-vector-generator -h
 # Then, have a look at "control-vector-generator" section
 ```
--- a/examples/control-vector-generator/control-vector-generator.cpp
+++ b/examples/control-vector-generator/control-vector-generator.cpp
@ -168,6 +168,10 @@ struct train_context {
    int n_embd;
    int n_layers;
    /* pair of prompts to be used for generating final vector */
    std::vector<std::string> positive_entries;
    std::vector<std::string> negative_entries;
    // each element of the vector correspond to one layer
    // NOTE: the last layer is discard. therefore, we will have (n_layers - 1) elements here
    // NOTE (2): v_diff is transposed from v_diff_tmp
@ -243,23 +247,6 @@ struct train_context {
    }
 };
 struct ctrl_params {
    /* default meta parameters */
    int n_completions = 64;
    int n_pca_batch = 20;
    int n_pca_iterations = 1000;
    /* default filepaths */
    std::string outfile = "control_vector.gguf";
    std::string completions_file = "examples/control-vector-generator/completions.txt";
    std::string positive_prompts_file = "examples/control-vector-generator/positive.txt";
    std::string negative_prompts_file = "examples/control-vector-generator/negative.txt";
    /* pair of prompts to be used for generating final vector */
    std::vector<std::string> positive_entries;
    std::vector<std::string> negative_entries;
 };
 struct tokenized_prompt {
    std::vector<llama_token> tokens_pos;
    std::vector<llama_token> tokens_neg;
@ -293,148 +280,6 @@ static std::string to_string(const T & val) {
    return ss.str();
 }
 static void print_usage(const char * executable) {
    struct ctrl_params defaults;
    printf("\n");
    printf("usage: %s [options] -m <model> [gpt-opts]", executable);
    printf("\n");
    printf("Creates a GGUF control vector for a given model.");
    printf("\n");
    printf("options:\n");
    printf("  -h,  --help                 show this help message and exit\n");
    printf("  -o,  --outfile FNAME        output file\n");
    printf("                                default: %s\n", defaults.outfile.c_str());
    printf("  -pf, --positive-file FNAME  positive prompts file, one prompt per line\n");
    printf("                                default: %s\n", defaults.positive_prompts_file.c_str());
    printf("  -nf, --negative-file FNAME  negative prompts file, one prompt per line\n");
    printf("                                default: %s\n", defaults.negative_prompts_file.c_str());
    printf("  -cf, --completions-file     completions file\n");
    printf("                                default: %s\n", defaults.completions_file.c_str());
    printf("  -nc, --num-completions N    number of lines of completions file to use\n");
    printf("                                default: %d\n", defaults.n_completions);
    printf("  --batch-pca N               batch size used for PCA. Larger batch runs faster, but uses more memory\n");
    printf("                                default: %d\n", defaults.n_pca_batch);
    printf("  --iter-pca N                number of iterations used for PCA\n");
    printf("                                default: %d\n", defaults.n_pca_iterations);
    printf("\n");
    printf("gpt-opts:\n");
    printf("  -m, --model  FNAME          path to model file\n");
    printf("  -ngl,  --gpu-layers N       number of layers to offload to GPU\n");
    printf("  ...other options from main\n");
    printf("\n");
 }
 static int ctrlvec_params_parse_ex(int argc, char ** argv, ctrl_params & params) {
    std::string arg;
    const std::string arg_prefix = "-";
    // hack to skip ctrlvec args in gpt_parse_params but we'll leave it as is
    int skipme = 0;
    for(int arg_idx = 1; arg_idx < argc; ++arg_idx) {
        arg = argv[arg_idx];
        if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
            std::replace(arg.begin(), arg.end(), '_', '-');
        }
        if (arg == "-h" || arg == "--help") {
            print_usage(argv[0]);
            exit(0);
        }
        if (arg == "--version") {
            fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT);
            fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
            exit(0);
        }
        if (arg == "--outfile" || arg == "-o") {
            if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
                params.outfile = argv[arg_idx];
                skipme += 2;
            } else {
                throw std::invalid_argument("error: missing argument for " + arg);
            }
        }
        if (arg == "--completions-file" || arg == "-cf") {
            if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
                params.completions_file = argv[arg_idx];
                skipme += 2;
            } else {
                throw std::invalid_argument("error: missing argument for " + arg);
            }
        }
        if (arg == "--positive-file" || arg == "-pf") {
            if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
                params.positive_prompts_file = argv[arg_idx];
                skipme += 2;
            } else {
                throw std::invalid_argument("error: missing argument for " + arg);
            }
        }
        if (arg == "--negative-file" || arg == "-nf") {
            if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
                params.negative_prompts_file = argv[arg_idx];
                skipme += 2;
            } else {
                throw std::invalid_argument("error: missing argument for " + arg);
            }
        }
        if (arg == "--num-completions" || arg == "-nc") {
            if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
                try {
                    params.n_completions = std::stoi(argv[arg_idx]);
                }
                catch (const std::invalid_argument & ex) {
                    throw std::invalid_argument("error: invalid argument for " + arg);
                }
                skipme += 2;
            } else {
                throw std::invalid_argument("error: missing argument for " + arg);
            }
        }
        if (arg == "--pca-batch") {
            if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
                try {
                    params.n_pca_batch = std::stoi(argv[arg_idx]);
                }
                catch (const std::invalid_argument & ex) {
                    throw std::invalid_argument("error: invalid argument for " + arg);
                }
                skipme += 2;
            } else {
                throw std::invalid_argument("error: missing argument for " + arg);
            }
        }
        if (arg == "--pca-iter") {
            if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
                try {
                    params.n_pca_iterations = std::stoi(argv[arg_idx]);
                }
                catch (const std::invalid_argument & ex) {
                    throw std::invalid_argument("error: invalid argument for " + arg);
                }
                skipme += 2;
            } else {
                throw std::invalid_argument("error: missing argument for " + arg);
            }
        }
        // TODO it might be nice QoL to have single positive/negative args
        // we do not handle any other unknown arguments here because they will be handled by gpt_parse_params
    }
    return skipme;
 }
 static int ctrlvec_params_parse(int argc, char ** argv, ctrl_params & params) {
    int skipme = 0;
    try {
        skipme = ctrlvec_params_parse_ex(argc, argv, params);
    }
    catch (const std::invalid_argument & ex) {
        fprintf(stderr, "%s\n", ex.what());
        print_usage(argv[0]);
        exit(EXIT_FAILURE);
    }
    return skipme;
 }
 static std::vector<std::string> ctrlvec_load_prompt_file(std::string path, bool skip_empty_lines = false) {
    std::vector<std::string> output;
    std::ifstream file(path);
@ -508,10 +353,10 @@ static void export_gguf(const std::vector<struct ggml_tensor *> & v_ctrl, const
 * Load prompt files and completion file.
 * Then format each pair of prompt + completion to make an entry.
 */
-static int prepare_entries(ctrl_params & cparams) {
+static int prepare_entries(gpt_params & params, train_context & ctx_train) {
    // load prompts
-    std::vector<std::string> positive_prompts = ctrlvec_load_prompt_file(cparams.positive_prompts_file);
+    std::vector<std::string> positive_prompts = ctrlvec_load_prompt_file(params.cvector_positive_file);
-    std::vector<std::string> negative_prompts = ctrlvec_load_prompt_file(cparams.negative_prompts_file);
+    std::vector<std::string> negative_prompts = ctrlvec_load_prompt_file(params.cvector_negative_file);
    if (positive_prompts.size() != negative_prompts.size()) {
        fprintf(stderr, "number of positive and negative prompts must be equal\n");
        return 1;
@ -522,7 +367,7 @@ static int prepare_entries(ctrl_params & cparams) {
    }
    // create templated prompts
-    std::vector<std::string> completions = ctrlvec_load_prompt_file(cparams.completions_file, false);
+    std::vector<std::string> completions = ctrlvec_load_prompt_file(params.cvector_completions_file, false);
    auto format_template = [](std::string persona, std::string suffix) {
        //const std::string user_tag = "[INST]";
        //const std::string asst_tag = "[/INST]";
@ -531,34 +376,28 @@ static int prepare_entries(ctrl_params & cparams) {
        return persona + " " + suffix; // entry in positive/negative.txt must already be formatted i.e. "[INST] Act as if you're extremely happy. [/INST]"
    };
    for (size_t i = 0; i < positive_prompts.size(); ++i) {
-        for (int j = 0; j < std::min((int) completions.size(), cparams.n_completions); ++j) {
+        for (int j = 0; j < std::min((int) completions.size(), params.n_completions); ++j) {
            // TODO replicate the truncations done by the python implementation
-            cparams.positive_entries.push_back(format_template(positive_prompts[i], completions[j]));
+            ctx_train.positive_entries.push_back(format_template(positive_prompts[i], completions[j]));
-            cparams.negative_entries.push_back(format_template(negative_prompts[i], completions[j]));
+            ctx_train.negative_entries.push_back(format_template(negative_prompts[i], completions[j]));
        }
    }
    return 0;
 }
 int main(int argc, char ** argv) {
    ctrl_params cparams;
    int skipme = ctrlvec_params_parse(argc, argv, cparams);
    argc -= skipme;
    argv += skipme;
    gpt_params params;
    if (!gpt_params_parse(argc, argv, params)) {
        gpt_params_print_usage(argc, argv, params);
        return 1;
    }
-    if (cparams.n_pca_iterations % cparams.n_pca_batch != 0) {
+    if (params.n_pca_iterations % params.n_pca_batch != 0) {
        fprintf(stderr, "PCA iterations must by multiply of PCA batch size\n");
        return 1;
    }
    // load and prepare entries for training
    prepare_entries(cparams);
    callback_data cb_data;
@ -584,27 +423,30 @@ int main(int argc, char ** argv) {
    char model_hint[128];
    llama_model_meta_val_str(model, "general.architecture", model_hint, 128);
    // init train_context
    train_context ctx_train(n_embd, n_layers);
    // load and prepare entries for training
    prepare_entries(params, ctx_train);
    // we have to pretokenize everything because otherwise we don't know how much overhead to allocate ctx_diffs_wrapped
    std::vector<tokenized_prompt> tokenized_prompts;
    size_t n_total_tokens = 0;
-    for (size_t i = 0; i < cparams.positive_entries.size(); ++i) {
+    for (size_t i = 0; i < ctx_train.positive_entries.size(); ++i) {
-        tokenized_prompt t(ctx, cparams.positive_entries[i], cparams.negative_entries[i]);
+        tokenized_prompt t(ctx, ctx_train.positive_entries[i], ctx_train.negative_entries[i]);
        n_total_tokens += 2 * t.max_seq_len;
        tokenized_prompts.push_back(std::move(t));
    }
    std::cout << "n_total_tokens: " << n_total_tokens << std::endl;
-    // init train_context
+    for(size_t i = 0; i < ctx_train.positive_entries.size(); ++i) {
    train_context ctx_train(n_embd, n_layers);
    for(size_t i = 0; i < cparams.positive_entries.size(); ++i) {
        tokenized_prompt t = tokenized_prompts[i];
        cb_data.n_layers = n_layers;
        cb_data.n_tokens = t.max_seq_len;
        printf("Evaluating prompt[%d/%d]: \"%s\" - \"%s\" (%d tokens)\n",
-            (int) i+1, (int) cparams.positive_entries.size(),
+            (int) i+1, (int) ctx_train.positive_entries.size(),
            tokens_to_str(ctx, t.tokens_pos.cbegin(), t.tokens_pos.cend()).c_str(),
            tokens_to_str(ctx, t.tokens_neg.cbegin(), t.tokens_neg.cend()).c_str(),
            (int) t.max_seq_len);
@ -635,12 +477,12 @@ int main(int argc, char ** argv) {
    // run PCA
    PCA::pca_params pca_params;
    pca_params.n_threads = params.n_threads;
-    pca_params.n_batch = cparams.n_pca_batch;
+    pca_params.n_batch = params.n_pca_batch;
-    pca_params.n_iterations = cparams.n_pca_iterations;
+    pca_params.n_iterations = params.n_pca_iterations;
    PCA::run_pca(pca_params, ctx_train.v_diff, ctx_train.v_final);
    // write output vectors to gguf
-    export_gguf(ctx_train.v_final, cparams.outfile, model_hint);
+    export_gguf(ctx_train.v_final, params.cvector_outfile, model_hint);
    llama_backend_free();