move param parser to common

This commit is contained in:
ngxson 2024-06-12 15:58:20 +02:00
parent f54cb8e307
commit 679f5137f8
4 changed files with 99 additions and 190 deletions

View file

@ -1576,6 +1576,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
return true; return true;
} }
params.out_file = argv[i]; params.out_file = argv[i];
params.cvector_outfile = argv[i];
return true; return true;
} }
if (arg == "-ofreq" || arg == "--output-frequency") { if (arg == "-ofreq" || arg == "--output-frequency") {
@ -1610,6 +1611,55 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
params.i_chunk = std::stoi(argv[i]); params.i_chunk = std::stoi(argv[i]);
return true; return true;
} }
// control-vector-generator params
if (arg == "--completions-file") {
if (++i >= argc) {
invalid_param = true;
return true;
}
params.cvector_completions_file = argv[i];
return true;
}
if (arg == "--positive-file") {
if (++i >= argc) {
invalid_param = true;
return true;
}
params.cvector_positive_file = argv[i];
return true;
}
if (arg == "--negative-file") {
if (++i >= argc) {
invalid_param = true;
return true;
}
params.cvector_negative_file = argv[i];
return true;
}
if (arg == "--num-completions") {
if (++i >= argc) {
invalid_param = true;
return true;
}
params.n_completions = std::stoi(argv[i]);
return true;
}
if (arg == "--pca-batch") {
if (++i >= argc) {
invalid_param = true;
return true;
}
params.n_pca_batch = std::stoi(argv[i]);
return true;
}
if (arg == "--pca-iter") {
if (++i >= argc) {
invalid_param = true;
return true;
}
params.n_pca_iterations = std::stoi(argv[i]);
return true;
}
#ifndef LOG_DISABLE_LOGS #ifndef LOG_DISABLE_LOGS
// Parse args for logging parameters // Parse args for logging parameters
if (log_param_single_parse(argv[i])) { if (log_param_single_parse(argv[i])) {
@ -1931,6 +1981,15 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
options.push_back({ "logging", " --log-append", "Don't truncate the old log file." }); options.push_back({ "logging", " --log-append", "Don't truncate the old log file." });
#endif // LOG_DISABLE_LOGS #endif // LOG_DISABLE_LOGS
options.push_back({ "control-vector-generator" });
options.push_back({ "control-vector-generator", "-o, --output FNAME", "output file (default: '%s')", params.cvector_outfile.c_str() });
options.push_back({ "control-vector-generator", "--positive-file FNAME", "positive prompts file, one prompt per line (default: '%s')", params.cvector_positive_file.c_str() });
options.push_back({ "control-vector-generator", "--negative-file FNAME", "negative prompts file, one prompt per line (default: '%s')", params.cvector_negative_file.c_str() });
options.push_back({ "control-vector-generator", "--completions-file", "completions file (default: '%s')", params.cvector_completions_file.c_str() });
options.push_back({ "control-vector-generator", "--num-completions N", "number of lines of completions file to use (default: %d)", params.n_completions });
options.push_back({ "control-vector-generator", "--batch-pca N", "batch size used for PCA. Larger batch runs faster, but uses more memory (default: %d)", params.n_pca_batch });
options.push_back({ "control-vector-generator", "--iter-pca N", "number of iterations used for PCA (default: %d)", params.n_pca_iterations });
printf("usage: %s [options]\n", argv[0]); printf("usage: %s [options]\n", argv[0]);
for (const auto & o : options) { for (const auto & o : options) {

View file

@ -232,6 +232,15 @@ struct gpt_params {
bool process_output = false; // collect data for the output tensor bool process_output = false; // collect data for the output tensor
bool compute_ppl = true; // whether to compute perplexity bool compute_ppl = true; // whether to compute perplexity
// control-vector-generator params
int n_completions = 64;
int n_pca_batch = 20;
int n_pca_iterations = 1000;
std::string cvector_outfile = "control_vector.gguf";
std::string cvector_completions_file = "examples/control-vector-generator/completions.txt";
std::string cvector_positive_file = "examples/control-vector-generator/positive.txt";
std::string cvector_negative_file = "examples/control-vector-generator/negative.txt";
}; };
void gpt_params_handle_model_default(gpt_params & params); void gpt_params_handle_model_default(gpt_params & params);

View file

@ -5,7 +5,7 @@ This example demonstrates how to generate a control vector using gguf models.
Related PRs: Related PRs:
- [Add support for control vectors](https://github.com/ggerganov/llama.cpp/pull/5970) - [Add support for control vectors](https://github.com/ggerganov/llama.cpp/pull/5970)
- (Issue) [Generate control vector using llama.cpp](https://github.com/ggerganov/llama.cpp/issues/6880) - (Issue) [Generate control vector using llama.cpp](https://github.com/ggerganov/llama.cpp/issues/6880)
- [Add control-vector-generator](https://github.com/ggerganov/llama.cpp/pull/7514) - [Add control-vector-generator example](https://github.com/ggerganov/llama.cpp/pull/7514)
Example: Example:
@ -14,13 +14,12 @@ Example:
./control-vector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf ./control-vector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf
# With GPU # With GPU
./control-vector-generator --num-completions 2 --pca-iter 40 -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99 ./control-vector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99
# With advanced options # With advanced options
# Please note that the ORDER of arguments does matter ./control-vector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99 --num-completions 128 --pca-iter 2000 --batch-pca 100
# example-related options (i.e., --num-completions, --pca-iter) always come before model options (i.e., -m, -ngl)
./control-vector-generator --num-completions 128 --pca-iter 2000 --batch-pca 100 -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99
# To see help message # To see help message
./control-vector-generator -h ./control-vector-generator -h
# Then, have a look at "control-vector-generator" section
``` ```

View file

@ -168,6 +168,10 @@ struct train_context {
int n_embd; int n_embd;
int n_layers; int n_layers;
/* pair of prompts to be used for generating final vector */
std::vector<std::string> positive_entries;
std::vector<std::string> negative_entries;
// each element of the vector correspond to one layer // each element of the vector correspond to one layer
// NOTE: the last layer is discard. therefore, we will have (n_layers - 1) elements here // NOTE: the last layer is discard. therefore, we will have (n_layers - 1) elements here
// NOTE (2): v_diff is transposed from v_diff_tmp // NOTE (2): v_diff is transposed from v_diff_tmp
@ -243,23 +247,6 @@ struct train_context {
} }
}; };
struct ctrl_params {
/* default meta parameters */
int n_completions = 64;
int n_pca_batch = 20;
int n_pca_iterations = 1000;
/* default filepaths */
std::string outfile = "control_vector.gguf";
std::string completions_file = "examples/control-vector-generator/completions.txt";
std::string positive_prompts_file = "examples/control-vector-generator/positive.txt";
std::string negative_prompts_file = "examples/control-vector-generator/negative.txt";
/* pair of prompts to be used for generating final vector */
std::vector<std::string> positive_entries;
std::vector<std::string> negative_entries;
};
struct tokenized_prompt { struct tokenized_prompt {
std::vector<llama_token> tokens_pos; std::vector<llama_token> tokens_pos;
std::vector<llama_token> tokens_neg; std::vector<llama_token> tokens_neg;
@ -293,148 +280,6 @@ static std::string to_string(const T & val) {
return ss.str(); return ss.str();
} }
static void print_usage(const char * executable) {
struct ctrl_params defaults;
printf("\n");
printf("usage: %s [options] -m <model> [gpt-opts]", executable);
printf("\n");
printf("Creates a GGUF control vector for a given model.");
printf("\n");
printf("options:\n");
printf(" -h, --help show this help message and exit\n");
printf(" -o, --outfile FNAME output file\n");
printf(" default: %s\n", defaults.outfile.c_str());
printf(" -pf, --positive-file FNAME positive prompts file, one prompt per line\n");
printf(" default: %s\n", defaults.positive_prompts_file.c_str());
printf(" -nf, --negative-file FNAME negative prompts file, one prompt per line\n");
printf(" default: %s\n", defaults.negative_prompts_file.c_str());
printf(" -cf, --completions-file completions file\n");
printf(" default: %s\n", defaults.completions_file.c_str());
printf(" -nc, --num-completions N number of lines of completions file to use\n");
printf(" default: %d\n", defaults.n_completions);
printf(" --batch-pca N batch size used for PCA. Larger batch runs faster, but uses more memory\n");
printf(" default: %d\n", defaults.n_pca_batch);
printf(" --iter-pca N number of iterations used for PCA\n");
printf(" default: %d\n", defaults.n_pca_iterations);
printf("\n");
printf("gpt-opts:\n");
printf(" -m, --model FNAME path to model file\n");
printf(" -ngl, --gpu-layers N number of layers to offload to GPU\n");
printf(" ...other options from main\n");
printf("\n");
}
static int ctrlvec_params_parse_ex(int argc, char ** argv, ctrl_params & params) {
std::string arg;
const std::string arg_prefix = "-";
// hack to skip ctrlvec args in gpt_parse_params but we'll leave it as is
int skipme = 0;
for(int arg_idx = 1; arg_idx < argc; ++arg_idx) {
arg = argv[arg_idx];
if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
std::replace(arg.begin(), arg.end(), '_', '-');
}
if (arg == "-h" || arg == "--help") {
print_usage(argv[0]);
exit(0);
}
if (arg == "--version") {
fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT);
fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
exit(0);
}
if (arg == "--outfile" || arg == "-o") {
if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
params.outfile = argv[arg_idx];
skipme += 2;
} else {
throw std::invalid_argument("error: missing argument for " + arg);
}
}
if (arg == "--completions-file" || arg == "-cf") {
if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
params.completions_file = argv[arg_idx];
skipme += 2;
} else {
throw std::invalid_argument("error: missing argument for " + arg);
}
}
if (arg == "--positive-file" || arg == "-pf") {
if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
params.positive_prompts_file = argv[arg_idx];
skipme += 2;
} else {
throw std::invalid_argument("error: missing argument for " + arg);
}
}
if (arg == "--negative-file" || arg == "-nf") {
if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
params.negative_prompts_file = argv[arg_idx];
skipme += 2;
} else {
throw std::invalid_argument("error: missing argument for " + arg);
}
}
if (arg == "--num-completions" || arg == "-nc") {
if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
try {
params.n_completions = std::stoi(argv[arg_idx]);
}
catch (const std::invalid_argument & ex) {
throw std::invalid_argument("error: invalid argument for " + arg);
}
skipme += 2;
} else {
throw std::invalid_argument("error: missing argument for " + arg);
}
}
if (arg == "--pca-batch") {
if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
try {
params.n_pca_batch = std::stoi(argv[arg_idx]);
}
catch (const std::invalid_argument & ex) {
throw std::invalid_argument("error: invalid argument for " + arg);
}
skipme += 2;
} else {
throw std::invalid_argument("error: missing argument for " + arg);
}
}
if (arg == "--pca-iter") {
if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
try {
params.n_pca_iterations = std::stoi(argv[arg_idx]);
}
catch (const std::invalid_argument & ex) {
throw std::invalid_argument("error: invalid argument for " + arg);
}
skipme += 2;
} else {
throw std::invalid_argument("error: missing argument for " + arg);
}
}
// TODO it might be nice QoL to have single positive/negative args
// we do not handle any other unknown arguments here because they will be handled by gpt_parse_params
}
return skipme;
}
static int ctrlvec_params_parse(int argc, char ** argv, ctrl_params & params) {
int skipme = 0;
try {
skipme = ctrlvec_params_parse_ex(argc, argv, params);
}
catch (const std::invalid_argument & ex) {
fprintf(stderr, "%s\n", ex.what());
print_usage(argv[0]);
exit(EXIT_FAILURE);
}
return skipme;
}
static std::vector<std::string> ctrlvec_load_prompt_file(std::string path, bool skip_empty_lines = false) { static std::vector<std::string> ctrlvec_load_prompt_file(std::string path, bool skip_empty_lines = false) {
std::vector<std::string> output; std::vector<std::string> output;
std::ifstream file(path); std::ifstream file(path);
@ -508,10 +353,10 @@ static void export_gguf(const std::vector<struct ggml_tensor *> & v_ctrl, const
* Load prompt files and completion file. * Load prompt files and completion file.
* Then format each pair of prompt + completion to make an entry. * Then format each pair of prompt + completion to make an entry.
*/ */
static int prepare_entries(ctrl_params & cparams) { static int prepare_entries(gpt_params & params, train_context & ctx_train) {
// load prompts // load prompts
std::vector<std::string> positive_prompts = ctrlvec_load_prompt_file(cparams.positive_prompts_file); std::vector<std::string> positive_prompts = ctrlvec_load_prompt_file(params.cvector_positive_file);
std::vector<std::string> negative_prompts = ctrlvec_load_prompt_file(cparams.negative_prompts_file); std::vector<std::string> negative_prompts = ctrlvec_load_prompt_file(params.cvector_negative_file);
if (positive_prompts.size() != negative_prompts.size()) { if (positive_prompts.size() != negative_prompts.size()) {
fprintf(stderr, "number of positive and negative prompts must be equal\n"); fprintf(stderr, "number of positive and negative prompts must be equal\n");
return 1; return 1;
@ -522,7 +367,7 @@ static int prepare_entries(ctrl_params & cparams) {
} }
// create templated prompts // create templated prompts
std::vector<std::string> completions = ctrlvec_load_prompt_file(cparams.completions_file, false); std::vector<std::string> completions = ctrlvec_load_prompt_file(params.cvector_completions_file, false);
auto format_template = [](std::string persona, std::string suffix) { auto format_template = [](std::string persona, std::string suffix) {
//const std::string user_tag = "[INST]"; //const std::string user_tag = "[INST]";
//const std::string asst_tag = "[/INST]"; //const std::string asst_tag = "[/INST]";
@ -531,34 +376,28 @@ static int prepare_entries(ctrl_params & cparams) {
return persona + " " + suffix; // entry in positive/negative.txt must already be formatted i.e. "[INST] Act as if you're extremely happy. [/INST]" return persona + " " + suffix; // entry in positive/negative.txt must already be formatted i.e. "[INST] Act as if you're extremely happy. [/INST]"
}; };
for (size_t i = 0; i < positive_prompts.size(); ++i) { for (size_t i = 0; i < positive_prompts.size(); ++i) {
for (int j = 0; j < std::min((int) completions.size(), cparams.n_completions); ++j) { for (int j = 0; j < std::min((int) completions.size(), params.n_completions); ++j) {
// TODO replicate the truncations done by the python implementation // TODO replicate the truncations done by the python implementation
cparams.positive_entries.push_back(format_template(positive_prompts[i], completions[j])); ctx_train.positive_entries.push_back(format_template(positive_prompts[i], completions[j]));
cparams.negative_entries.push_back(format_template(negative_prompts[i], completions[j])); ctx_train.negative_entries.push_back(format_template(negative_prompts[i], completions[j]));
} }
} }
return 0; return 0;
} }
int main(int argc, char ** argv) { int main(int argc, char ** argv) {
ctrl_params cparams;
int skipme = ctrlvec_params_parse(argc, argv, cparams);
argc -= skipme;
argv += skipme;
gpt_params params; gpt_params params;
if (!gpt_params_parse(argc, argv, params)) { if (!gpt_params_parse(argc, argv, params)) {
gpt_params_print_usage(argc, argv, params);
return 1; return 1;
} }
if (cparams.n_pca_iterations % cparams.n_pca_batch != 0) { if (params.n_pca_iterations % params.n_pca_batch != 0) {
fprintf(stderr, "PCA iterations must by multiply of PCA batch size\n"); fprintf(stderr, "PCA iterations must by multiply of PCA batch size\n");
return 1; return 1;
} }
// load and prepare entries for training
prepare_entries(cparams);
callback_data cb_data; callback_data cb_data;
@ -584,27 +423,30 @@ int main(int argc, char ** argv) {
char model_hint[128]; char model_hint[128];
llama_model_meta_val_str(model, "general.architecture", model_hint, 128); llama_model_meta_val_str(model, "general.architecture", model_hint, 128);
// init train_context
train_context ctx_train(n_embd, n_layers);
// load and prepare entries for training
prepare_entries(params, ctx_train);
// we have to pretokenize everything because otherwise we don't know how much overhead to allocate ctx_diffs_wrapped // we have to pretokenize everything because otherwise we don't know how much overhead to allocate ctx_diffs_wrapped
std::vector<tokenized_prompt> tokenized_prompts; std::vector<tokenized_prompt> tokenized_prompts;
size_t n_total_tokens = 0; size_t n_total_tokens = 0;
for (size_t i = 0; i < cparams.positive_entries.size(); ++i) { for (size_t i = 0; i < ctx_train.positive_entries.size(); ++i) {
tokenized_prompt t(ctx, cparams.positive_entries[i], cparams.negative_entries[i]); tokenized_prompt t(ctx, ctx_train.positive_entries[i], ctx_train.negative_entries[i]);
n_total_tokens += 2 * t.max_seq_len; n_total_tokens += 2 * t.max_seq_len;
tokenized_prompts.push_back(std::move(t)); tokenized_prompts.push_back(std::move(t));
} }
std::cout << "n_total_tokens: " << n_total_tokens << std::endl; std::cout << "n_total_tokens: " << n_total_tokens << std::endl;
// init train_context for(size_t i = 0; i < ctx_train.positive_entries.size(); ++i) {
train_context ctx_train(n_embd, n_layers);
for(size_t i = 0; i < cparams.positive_entries.size(); ++i) {
tokenized_prompt t = tokenized_prompts[i]; tokenized_prompt t = tokenized_prompts[i];
cb_data.n_layers = n_layers; cb_data.n_layers = n_layers;
cb_data.n_tokens = t.max_seq_len; cb_data.n_tokens = t.max_seq_len;
printf("Evaluating prompt[%d/%d]: \"%s\" - \"%s\" (%d tokens)\n", printf("Evaluating prompt[%d/%d]: \"%s\" - \"%s\" (%d tokens)\n",
(int) i+1, (int) cparams.positive_entries.size(), (int) i+1, (int) ctx_train.positive_entries.size(),
tokens_to_str(ctx, t.tokens_pos.cbegin(), t.tokens_pos.cend()).c_str(), tokens_to_str(ctx, t.tokens_pos.cbegin(), t.tokens_pos.cend()).c_str(),
tokens_to_str(ctx, t.tokens_neg.cbegin(), t.tokens_neg.cend()).c_str(), tokens_to_str(ctx, t.tokens_neg.cbegin(), t.tokens_neg.cend()).c_str(),
(int) t.max_seq_len); (int) t.max_seq_len);
@ -635,12 +477,12 @@ int main(int argc, char ** argv) {
// run PCA // run PCA
PCA::pca_params pca_params; PCA::pca_params pca_params;
pca_params.n_threads = params.n_threads; pca_params.n_threads = params.n_threads;
pca_params.n_batch = cparams.n_pca_batch; pca_params.n_batch = params.n_pca_batch;
pca_params.n_iterations = cparams.n_pca_iterations; pca_params.n_iterations = params.n_pca_iterations;
PCA::run_pca(pca_params, ctx_train.v_diff, ctx_train.v_final); PCA::run_pca(pca_params, ctx_train.v_diff, ctx_train.v_final);
// write output vectors to gguf // write output vectors to gguf
export_gguf(ctx_train.v_final, cparams.outfile, model_hint); export_gguf(ctx_train.v_final, params.cvector_outfile, model_hint);
llama_backend_free(); llama_backend_free();