diff --git a/examples/control-vector-generator/control-vector-generator.cpp b/examples/control-vector-generator/control-vector-generator.cpp index 33da54ec7..70f3668e9 100644 --- a/examples/control-vector-generator/control-vector-generator.cpp +++ b/examples/control-vector-generator/control-vector-generator.cpp @@ -19,16 +19,20 @@ struct diff_wrapper { struct callback_data { std::vector data; + int n_tokens = 0; int n_embd = 0; bool is_eval_pos = true; + // each element of the vector correspond to one layer - std::vector v_pos; // vector of matrices of size [n_embd, n_tokens] - std::vector v_neg; // vector of matrices of size [n_embd, n_tokens] - std::vector v_final; // vector of finished vectors of size [n_embd] - std::vector v_diff; // vector of matrices of size [n_embd, m] where m is some some sum of concatenated matrices + std::vector v_pos; // vector of matrices of size [n_embd, n_tokens] + std::vector v_neg; // vector of matrices of size [n_embd, n_tokens] + std::vector v_final; // vector of finished vectors of size [n_embd] + std::vector v_diff; // vector of matrices of size [n_embd, m] where m ~ n_tokens * n_completions + // each element of the outer vector correspond to one layer, each element of the inner vector correspond to one prompt pass std::vector> v_diffs_wrapped; // vector of compiled diff matrices to be concatenated + ~callback_data() { for (auto ptr : v_pos) free(ptr); for (auto ptr : v_neg) free(ptr); @@ -66,10 +70,8 @@ static void print_usage(const char * executable) { printf("Creates a GGUF control vector for a given model."); printf("\n"); printf("options:\n"); - printf(" -h, --help show this help message and exit\n"); - printf(" -t, --num-threads number of threads to use (do not confuse with gpt-opts -t)\n"); - printf(" default: 8\n"); - printf(" -o, --outfile output file\n"); + printf(" -h, --help show this help message and exit\n"); + printf(" -o, --outfile output file\n"); printf(" default: 'control_vector.gguf'\n"); printf(" -pf, --positive-file positive prompts file, one prompt per line\n"); printf(" default: 'examples/control-vector-generator/positive.txt'\n"); @@ -77,9 +79,11 @@ static void print_usage(const char * executable) { printf(" default: 'examples/control-vector-generator/negative.txt'\n"); printf(" -cf, --completions-file completions file\n"); printf(" default: 'examples/control-vector-generator/completions.txt'\n"); - printf(" -nc, --num-completions number of lines of completions file to use\n"); + printf(" -nc, --num-completions N number of lines of completions file to use\n"); printf(" default: 64\n"); - printf(" --always-reload reload the model for every new template to parse\n"); + printf(" -t, --num-threads N number of threads to use (do not confuse with gpt-opts -t)\n"); + printf(" default: 8\n"); + printf(" --always-reload reload the model for every new template to parse\n"); printf("\n"); printf("gpt-opts:\n"); printf(" other options from main\n"); @@ -88,7 +92,7 @@ static void print_usage(const char * executable) { static int ctrlvec_params_parse_ex(int argc, char ** argv, ctrl_params & params) { std::string arg; - const std::string arg_prefix = "--"; + const std::string arg_prefix = "-"; // hack to skip ctrlvec args in gpt_parse_params but we'll leave it as is int skipme = 0; @@ -357,6 +361,7 @@ static void calc_diff(callback_data & cb_data) { } } +// TODO do we want to multithread this? it takes very little time as it is static void concatenate_diffs(callback_data & cb_data) { for (size_t i = 0; i < cb_data.v_diffs_wrapped.size(); ++i) { std::vector & vec = cb_data.v_diffs_wrapped[i]; @@ -398,7 +403,7 @@ static float* square_diff(callback_data & cb_data, size_t idx) { return result; } -// TODO translate to ggml +// TODO translate to ggml (this is a built-in function in ggml) static void normalize_inplace(std::vector & vec) { // inefficient(?) norm computation float norm = 0.0f; @@ -412,7 +417,7 @@ static void normalize_inplace(std::vector & vec) { } } -// TODO translate to ggml +// TODO translate to ggml (this is a built-in function in ggml) static std::vector mul_mat(const float * mat, const std::vector & vec, size_t dim) { std::vector result(dim, 0.0f); for (size_t i = 0; i < dim; ++i) { @@ -459,7 +464,7 @@ static std::vector power_iteration(callback_data & cb_data, const float * // TODO translate to ggml static void pca(callback_data & cb_data, size_t n_threads) { - int n_layers = cb_data.v_diff.size(); + int n_layers = cb_data.v_diff.size(); std::vector threads; cb_data.v_final.reserve(n_layers); auto worker_function = [&](int worker_id) { @@ -577,6 +582,7 @@ int main(int argc, char ** argv) { int n_layers = llama_n_layer(model); int n_embd = llama_n_embd(model); + cb_data.n_embd = n_embd; int n_prompts = cparams.positive_prompts.size(); // vector of finished vectors of size [n_embd], we have (n_layers - 1) vectors in total @@ -605,7 +611,6 @@ int main(int argc, char ** argv) { padding_seq(ctx, tokens_pos, max_seq_len); padding_seq(ctx, tokens_neg, max_seq_len); cb_data.n_tokens = max_seq_len; - cb_data.n_embd = n_embd; // need to reload the model so it doesn't run out of context // this should scale with -c option passed by main