diff --git a/examples/control-vector-generator/control-vector-generator.cpp b/examples/control-vector-generator/control-vector-generator.cpp index eb7f05038..d4f619e93 100644 --- a/examples/control-vector-generator/control-vector-generator.cpp +++ b/examples/control-vector-generator/control-vector-generator.cpp @@ -20,6 +20,98 @@ struct callback_data { std::vector v_final; // vector of finished vectors of size [n_embd] }; +struct ctrl_params { + std::string outfile = "control_vector.gguf"; + std::string positive = "happy"; // TODO support multiple positive prompts + std::string negative = "sad"; // TODO support multiple negative prompts +}; + +static void print_usage(const char * executable) { + printf("\n"); + printf("usage: %s [options] -m [gpt-opts]", executable); + printf("\n"); + printf("Creates a GGUF control vector for a given model."); + printf("\n"); + printf("options:\n"); + printf(" -h, --help show this help message and exit\n"); + printf(" --outfile output file (default: 'control_vector.gguf')\n"); + printf(" --positive positive prompt (default: 'happy')\n"); + printf(" --negative negative prompt (default: 'sad')\n"); + printf("\n"); + printf("gpt-opts: other options from main\n"); + printf("\n"); +} + +static int ctrlvec_params_parse_ex(int argc, char ** argv, ctrl_params & params) { + std::string arg; + const std::string arg_prefix = "--"; + int skipme = 0; + + int arg_idx = 1; + for(; arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) == 0; ++arg_idx) { + arg = argv[arg_idx]; + if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) { + std::replace(arg.begin(), arg.end(), '_', '-'); + } + + if (arg == "-h" || arg == "--help") { + print_usage(argv[0]); + exit(0); + } + if (arg == "--version") { + fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT); + fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET); + exit(0); + } + if (arg == "--outfile") { + if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) { + params.outfile = argv[arg_idx]; + // FIXME hack to skip these args in gpt_parse_params + skipme += 2; + } + else { + throw std::invalid_argument("error: missing argument for " + arg); + } + } + if (arg == "--positive") { + if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) { + params.positive = argv[arg_idx]; + // FIXME hack to skip these args in gpt_parse_params + skipme += 2; + } + else { + throw std::invalid_argument("error: missing argument for " + arg); + } + } + if (arg == "--negative") { + if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) { + params.negative = argv[arg_idx]; + // FIXME hack to skip these args in gpt_parse_params + skipme += 2; + } + else { + throw std::invalid_argument("error: missing argument for " + arg); + } + } + + // we do not handle any other unknown arguments here because they will be handled by gpt_parse_params + } + return skipme; +} + +static int ctrlvec_params_parse(int argc, char ** argv, ctrl_params & params) { + int skipme = 0; + try { + skipme = ctrlvec_params_parse_ex(argc, argv, params); + } + catch (const std::invalid_argument & ex) { + fprintf(stderr, "%s\n", ex.what()); + print_usage(argv[0]); + exit(EXIT_FAILURE); + } + return skipme; +} + static std::string ggml_ne_string(const ggml_tensor * t) { std::string str; for (int i = 0; i < GGML_MAX_DIMS; ++i) { @@ -192,14 +284,14 @@ static std::vector power_iteration(callback_data & cb_data, const float * // TODO translate to ggml static void pca(callback_data & cb_data) { - for (size_t i = 0; i < cb_data.v_diff.size(); i++) { + for (int i = 0; i < cb_data.v_diff.size(); i++) { float* matrix = square_diff(cb_data, i); std::vector eigenvector = power_iteration(cb_data, matrix); cb_data.v_final.push_back(&eigenvector[0]); delete[] matrix; - // TODO make your print outputs nicer - std::cout << "Done with layer " << i << "\n"; + printf("Done with layer %d\n", i); } + printf("Done with PCA."); } template @@ -209,59 +301,53 @@ static std::string to_string(const T & val) { return ss.str(); } -static void export_gguf(callback_data & cb_data, const std::string fname) { +static void export_gguf(callback_data & cb_data, const std::string fname, const std::string model_hint) { struct gguf_context * ctx = gguf_init_empty(); - gguf_set_val_str(ctx, "general.architecture", "controlvector"); - gguf_set_val_str(ctx, "controlvector.model_hint", "mistral"); // TODO steal this from the model somehow (arch) - gguf_set_val_i32(ctx, "controlvector.layer_count", cb_data.v_final.size()); + const std::string arch = "controlvector"; + gguf_set_val_str(ctx, "general.architecture", arch.c_str()); + gguf_set_val_str(ctx, (arch + ".model_hint").c_str(), model_hint.c_str()); + gguf_set_val_i32(ctx, (arch + ".layer_count").c_str(), cb_data.v_final.size()); - //size_t buf_size = 3u*cb_data.n_embd*sizeof(float); // TODO how much size do i need??? - size_t buf_size = 128u*1024u*4096u; - std::vector buf(buf_size); + //size_t buf_size = 3u*cb_data.n_embd*sizeof(float); // TODO how much size do i need? + size_t buf_size = 128u*1024u*4096u; // FIXME placehokder - // TODO customize mem size - I have no idea + // TODO customize mem size - I have no idea what this is supposed to be struct ggml_init_params params = { /*.mem_size =*/ buf_size, - /*.mem_buffer =*/ buf.data(), + /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, }; struct ggml_context * ctx_data = ggml_init(params); - // TODO direction tensor invalid??? probably because you start at 0. see below - for (int i = 0; i < cb_data.v_final.size(); i++) { - const std::string name = "direction." + to_string(i+1); // TODO figure out how to get the number for direction - dl repeng locally and debug - // clone the repo and use importlib - // git clone https://github.com/vgel/repeng.git + for (int i = 0; i < cb_data.v_final.size(); ++i) { + // TODO this number is probably not right - figure out which layer is which + // the python implementation uses a dict to handle this, we don't know if it's 1, 2, 3, 4... or other + const std::string name = "direction." + to_string(i+1); struct ggml_tensor * cur = ggml_new_tensor_1d(ctx_data, GGML_TYPE_F32, cb_data.n_embd); - std::cout << "Made it past tensor creation"; - ggml_set_name(cur, name.c_str()); - std::cout << "Made it past tensor name set"; - // whining about buf != NULL - // TODO figure out how to set data - //ggml_backend_tensor_set(cur, cb_data.v_final[i], 0, cb_data.n_embd * sizeof(float)); // if this doesn't work refer to gguf.cpp example + // TODO figure out how to set data - it's whining about buf != NULL when using the below commented line + //ggml_backend_tensor_set(cur, cb_data.v_final[i], 0, cb_data.n_embd * sizeof(float)); { float * data = (float *) cur->data; for(int j = 0; j < ggml_nelements(cur); j++) { data[j] = cb_data.v_final[i][j]; } } - std::cout << "Made it past tensor backend set"; gguf_add_tensor(ctx, cur); - std::cout << "Added tensor " << i << "\n"; + printf("Added tensor %d\n", i); } - std::cout << "Writing file\n"; + printf("Writing file...\n"); gguf_write_to_file(ctx, fname.c_str(), false); - printf("%s: wrote file '%s;\n", __func__, fname.c_str()); + printf("%s: wrote file '%s'\n", __func__, fname.c_str()); ggml_free(ctx_data); gguf_free(ctx); @@ -270,10 +356,14 @@ static void export_gguf(callback_data & cb_data, const std::string fname) { // END NON-GGML IMPLEMENTATION int main(int argc, char ** argv) { - callback_data cb_data; - std::string prompt_pos = "happy"; - std::string prompt_neg = "sad"; + ctrl_params cparams; + int skipme = ctrlvec_params_parse(argc, argv, cparams); + // FIXME hack to skip the ctrlvec args in parsing gpt params + argc -= skipme; + argv += skipme; + + callback_data cb_data; gpt_params params; if (!gpt_params_parse(argc, argv, params)) { return 1; @@ -305,8 +395,17 @@ int main(int argc, char ** argv) { } const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx)); - std::vector tokens_pos = ::llama_tokenize(ctx, prompt_pos, add_bos); - std::vector tokens_neg = ::llama_tokenize(ctx, prompt_neg, add_bos); + + /* TODO this just tokenizes the exact pos/neg strings, correct? + * instead we want to create a bunch of starter prompts for it to work off + * we need to run get_hidden_layers many many times and then figure out how to combine the resulting vectors + * see the blogpost + python implementation for reference + * + * https://vgel.me/posts/representation-engineering/ + * https://github.com/vgel/repeng/blob/main/repeng/extract.py + */ + std::vector tokens_pos = ::llama_tokenize(ctx, cparams.positive, add_bos); + std::vector tokens_neg = ::llama_tokenize(ctx, cparams.negative, add_bos); size_t max_seq_len = std::max(tokens_pos.size(), tokens_neg.size()); padding_seq(ctx, tokens_pos, max_seq_len); padding_seq(ctx, tokens_neg, max_seq_len); @@ -325,9 +424,12 @@ int main(int argc, char ** argv) { printf("%f %f \n", cb_data.v_diff[0][4096], cb_data.v_diff[0][4096]); pca(cb_data); - // TODO --outfile - std::cout << "Done with PCA" << "\n"; - export_gguf(cb_data, "controlvector.gguf"); + + // TODO figure out how to extract this from model - there's no API exposed to get model arch string + // we need get_arch_name() from llama.cpp + // TODO also has support been implemeneted for arches other than llama yet? see #5970 + std::string model_hint = "llama"; + export_gguf(cb_data, cparams.outfile, model_hint); //llama_print_timings(ctx);