param parsing, refactor, comments

Added basic command-line parameters for outfile and one each positive/negative prompt.

Refactored some messy code in PCA computation and GGUF exporting.

Left a bunch of comments regarding further work needed.
This commit is contained in:
Christian Zhou-Zheng 2024-05-30 11:31:45 -04:00
parent 73747fe8eb
commit f58f6af133

View file

@ -20,6 +20,98 @@ struct callback_data {
std::vector<float *> v_final; // vector of finished vectors of size [n_embd]
};
struct ctrl_params {
std::string outfile = "control_vector.gguf";
std::string positive = "happy"; // TODO support multiple positive prompts
std::string negative = "sad"; // TODO support multiple negative prompts
};
static void print_usage(const char * executable) {
printf("\n");
printf("usage: %s [options] -m <model> [gpt-opts]", executable);
printf("\n");
printf("Creates a GGUF control vector for a given model.");
printf("\n");
printf("options:\n");
printf(" -h, --help show this help message and exit\n");
printf(" --outfile output file (default: 'control_vector.gguf')\n");
printf(" --positive positive prompt (default: 'happy')\n");
printf(" --negative negative prompt (default: 'sad')\n");
printf("\n");
printf("gpt-opts: other options from main\n");
printf("\n");
}
static int ctrlvec_params_parse_ex(int argc, char ** argv, ctrl_params & params) {
std::string arg;
const std::string arg_prefix = "--";
int skipme = 0;
int arg_idx = 1;
for(; arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) == 0; ++arg_idx) {
arg = argv[arg_idx];
if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
std::replace(arg.begin(), arg.end(), '_', '-');
}
if (arg == "-h" || arg == "--help") {
print_usage(argv[0]);
exit(0);
}
if (arg == "--version") {
fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT);
fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
exit(0);
}
if (arg == "--outfile") {
if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
params.outfile = argv[arg_idx];
// FIXME hack to skip these args in gpt_parse_params
skipme += 2;
}
else {
throw std::invalid_argument("error: missing argument for " + arg);
}
}
if (arg == "--positive") {
if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
params.positive = argv[arg_idx];
// FIXME hack to skip these args in gpt_parse_params
skipme += 2;
}
else {
throw std::invalid_argument("error: missing argument for " + arg);
}
}
if (arg == "--negative") {
if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
params.negative = argv[arg_idx];
// FIXME hack to skip these args in gpt_parse_params
skipme += 2;
}
else {
throw std::invalid_argument("error: missing argument for " + arg);
}
}
// we do not handle any other unknown arguments here because they will be handled by gpt_parse_params
}
return skipme;
}
static int ctrlvec_params_parse(int argc, char ** argv, ctrl_params & params) {
int skipme = 0;
try {
skipme = ctrlvec_params_parse_ex(argc, argv, params);
}
catch (const std::invalid_argument & ex) {
fprintf(stderr, "%s\n", ex.what());
print_usage(argv[0]);
exit(EXIT_FAILURE);
}
return skipme;
}
static std::string ggml_ne_string(const ggml_tensor * t) {
std::string str;
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
@ -192,14 +284,14 @@ static std::vector<float> power_iteration(callback_data & cb_data, const float *
// TODO translate to ggml
static void pca(callback_data & cb_data) {
for (size_t i = 0; i < cb_data.v_diff.size(); i++) {
for (int i = 0; i < cb_data.v_diff.size(); i++) {
float* matrix = square_diff(cb_data, i);
std::vector<float> eigenvector = power_iteration(cb_data, matrix);
cb_data.v_final.push_back(&eigenvector[0]);
delete[] matrix;
// TODO make your print outputs nicer
std::cout << "Done with layer " << i << "\n";
printf("Done with layer %d\n", i);
}
printf("Done with PCA.");
}
template <typename T>
@ -209,59 +301,53 @@ static std::string to_string(const T & val) {
return ss.str();
}
static void export_gguf(callback_data & cb_data, const std::string fname) {
static void export_gguf(callback_data & cb_data, const std::string fname, const std::string model_hint) {
struct gguf_context * ctx = gguf_init_empty();
gguf_set_val_str(ctx, "general.architecture", "controlvector");
gguf_set_val_str(ctx, "controlvector.model_hint", "mistral"); // TODO steal this from the model somehow (arch)
gguf_set_val_i32(ctx, "controlvector.layer_count", cb_data.v_final.size());
const std::string arch = "controlvector";
gguf_set_val_str(ctx, "general.architecture", arch.c_str());
gguf_set_val_str(ctx, (arch + ".model_hint").c_str(), model_hint.c_str());
gguf_set_val_i32(ctx, (arch + ".layer_count").c_str(), cb_data.v_final.size());
//size_t buf_size = 3u*cb_data.n_embd*sizeof(float); // TODO how much size do i need???
size_t buf_size = 128u*1024u*4096u;
std::vector<uint8_t> buf(buf_size);
//size_t buf_size = 3u*cb_data.n_embd*sizeof(float); // TODO how much size do i need?
size_t buf_size = 128u*1024u*4096u; // FIXME placehokder
// TODO customize mem size - I have no idea
// TODO customize mem size - I have no idea what this is supposed to be
struct ggml_init_params params = {
/*.mem_size =*/ buf_size,
/*.mem_buffer =*/ buf.data(),
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ false,
};
struct ggml_context * ctx_data = ggml_init(params);
// TODO direction tensor invalid??? probably because you start at 0. see below
for (int i = 0; i < cb_data.v_final.size(); i++) {
const std::string name = "direction." + to_string(i+1); // TODO figure out how to get the number for direction - dl repeng locally and debug
// clone the repo and use importlib
// git clone https://github.com/vgel/repeng.git
for (int i = 0; i < cb_data.v_final.size(); ++i) {
// TODO this number is probably not right - figure out which layer is which
// the python implementation uses a dict to handle this, we don't know if it's 1, 2, 3, 4... or other
const std::string name = "direction." + to_string(i+1);
struct ggml_tensor * cur = ggml_new_tensor_1d(ctx_data, GGML_TYPE_F32, cb_data.n_embd);
std::cout << "Made it past tensor creation";
ggml_set_name(cur, name.c_str());
std::cout << "Made it past tensor name set";
// whining about buf != NULL
// TODO figure out how to set data
//ggml_backend_tensor_set(cur, cb_data.v_final[i], 0, cb_data.n_embd * sizeof(float)); // if this doesn't work refer to gguf.cpp example
// TODO figure out how to set data - it's whining about buf != NULL when using the below commented line
//ggml_backend_tensor_set(cur, cb_data.v_final[i], 0, cb_data.n_embd * sizeof(float));
{
float * data = (float *) cur->data;
for(int j = 0; j < ggml_nelements(cur); j++) {
data[j] = cb_data.v_final[i][j];
}
}
std::cout << "Made it past tensor backend set";
gguf_add_tensor(ctx, cur);
std::cout << "Added tensor " << i << "\n";
printf("Added tensor %d\n", i);
}
std::cout << "Writing file\n";
printf("Writing file...\n");
gguf_write_to_file(ctx, fname.c_str(), false);
printf("%s: wrote file '%s;\n", __func__, fname.c_str());
printf("%s: wrote file '%s'\n", __func__, fname.c_str());
ggml_free(ctx_data);
gguf_free(ctx);
@ -270,10 +356,14 @@ static void export_gguf(callback_data & cb_data, const std::string fname) {
// END NON-GGML IMPLEMENTATION
int main(int argc, char ** argv) {
callback_data cb_data;
std::string prompt_pos = "happy";
std::string prompt_neg = "sad";
ctrl_params cparams;
int skipme = ctrlvec_params_parse(argc, argv, cparams);
// FIXME hack to skip the ctrlvec args in parsing gpt params
argc -= skipme;
argv += skipme;
callback_data cb_data;
gpt_params params;
if (!gpt_params_parse(argc, argv, params)) {
return 1;
@ -305,8 +395,17 @@ int main(int argc, char ** argv) {
}
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
std::vector<llama_token> tokens_pos = ::llama_tokenize(ctx, prompt_pos, add_bos);
std::vector<llama_token> tokens_neg = ::llama_tokenize(ctx, prompt_neg, add_bos);
/* TODO this just tokenizes the exact pos/neg strings, correct?
* instead we want to create a bunch of starter prompts for it to work off
* we need to run get_hidden_layers many many times and then figure out how to combine the resulting vectors
* see the blogpost + python implementation for reference
*
* https://vgel.me/posts/representation-engineering/
* https://github.com/vgel/repeng/blob/main/repeng/extract.py
*/
std::vector<llama_token> tokens_pos = ::llama_tokenize(ctx, cparams.positive, add_bos);
std::vector<llama_token> tokens_neg = ::llama_tokenize(ctx, cparams.negative, add_bos);
size_t max_seq_len = std::max(tokens_pos.size(), tokens_neg.size());
padding_seq(ctx, tokens_pos, max_seq_len);
padding_seq(ctx, tokens_neg, max_seq_len);
@ -325,9 +424,12 @@ int main(int argc, char ** argv) {
printf("%f %f \n", cb_data.v_diff[0][4096], cb_data.v_diff[0][4096]);
pca(cb_data);
// TODO --outfile
std::cout << "Done with PCA" << "\n";
export_gguf(cb_data, "controlvector.gguf");
// TODO figure out how to extract this from model - there's no API exposed to get model arch string
// we need get_arch_name() from llama.cpp
// TODO also has support been implemeneted for arches other than llama yet? see #5970
std::string model_hint = "llama";
export_gguf(cb_data, cparams.outfile, model_hint);
//llama_print_timings(ctx);