working version
This commit is contained in:
parent
9e39571fc2
commit
1a088fb0a5
2 changed files with 95 additions and 65 deletions
|
@ -70,7 +70,7 @@ struct callback_data {
|
||||||
t_layer->data = malloc(n_bytes); // TODO @ngxson : get rid of this malloc somehow
|
t_layer->data = malloc(n_bytes); // TODO @ngxson : get rid of this malloc somehow
|
||||||
ggml_backend_tensor_get(t, t_layer->data, 0, n_bytes);
|
ggml_backend_tensor_get(t, t_layer->data, 0, n_bytes);
|
||||||
ggml_set_name(t_layer, ggml_get_name(t));
|
ggml_set_name(t_layer, ggml_get_name(t));
|
||||||
print_debug_tensor(t_layer);
|
//print_debug_tensor(t_layer);
|
||||||
|
|
||||||
if (is_eval_pos) {
|
if (is_eval_pos) {
|
||||||
v_pos.push_back(t_layer);
|
v_pos.push_back(t_layer);
|
||||||
|
@ -99,7 +99,7 @@ struct callback_data {
|
||||||
|
|
||||||
// delete zero rows from a given 2D tensor
|
// delete zero rows from a given 2D tensor
|
||||||
struct ggml_tensor * filter_nonzero_rows(struct ggml_tensor * a) {
|
struct ggml_tensor * filter_nonzero_rows(struct ggml_tensor * a) {
|
||||||
printf("filter_nonzero_rows\n");
|
//printf("filter_nonzero_rows\n");
|
||||||
auto is_row_all_zeros = [](struct ggml_tensor * t, int row, float eps) -> bool {
|
auto is_row_all_zeros = [](struct ggml_tensor * t, int row, float eps) -> bool {
|
||||||
// check if given row containing all zero elements
|
// check if given row containing all zero elements
|
||||||
int n_cols = t->ne[0]; // hint: should be equal to n_embd
|
int n_cols = t->ne[0]; // hint: should be equal to n_embd
|
||||||
|
@ -119,7 +119,7 @@ struct callback_data {
|
||||||
|
|
||||||
// get "n_nonzero_rows" for the output "diff_filtered"
|
// get "n_nonzero_rows" for the output "diff_filtered"
|
||||||
int n_nonzero_rows = rows_to_copy.size();
|
int n_nonzero_rows = rows_to_copy.size();
|
||||||
printf("n_nonzero_rows: %d\n", n_nonzero_rows);
|
//printf("n_nonzero_rows: %d\n", n_nonzero_rows);
|
||||||
int n_embd = a->ne[0];
|
int n_embd = a->ne[0];
|
||||||
GGML_ASSERT(n_nonzero_rows > 0);
|
GGML_ASSERT(n_nonzero_rows > 0);
|
||||||
|
|
||||||
|
@ -138,7 +138,7 @@ struct callback_data {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
print_debug_tensor(diff_filtered);
|
//print_debug_tensor(diff_filtered);
|
||||||
|
|
||||||
return diff_filtered;
|
return diff_filtered;
|
||||||
}
|
}
|
||||||
|
@ -169,7 +169,8 @@ struct train_context {
|
||||||
|
|
||||||
// each element of the vector correspond to one layer
|
// each element of the vector correspond to one layer
|
||||||
// NOTE: the last layer is discard. therefore, we will have (n_layers - 1) elements here
|
// NOTE: the last layer is discard. therefore, we will have (n_layers - 1) elements here
|
||||||
std::vector<struct ggml_tensor *> v_diff; // vector of matrices of size [n_embd, m] where m ~ n_tokens * n_completions (v_diff contains no zero-rows)
|
// NOTE (2): v_diff is transposed from v_diff_tmp
|
||||||
|
std::vector<struct ggml_tensor *> v_diff; // vector of matrices of size [m, n_embd] where m ~ n_tokens * n_completions (v_diff contains no zero-rows)
|
||||||
std::vector<struct ggml_tensor *> v_final; // vector of vectors of size [n_embd] to be written to file
|
std::vector<struct ggml_tensor *> v_final; // vector of vectors of size [n_embd] to be written to file
|
||||||
|
|
||||||
// to easily re-alloc when concat v_diff, we temporary store v_diff in a vector instead of a tensor
|
// to easily re-alloc when concat v_diff, we temporary store v_diff in a vector instead of a tensor
|
||||||
|
@ -196,7 +197,7 @@ struct train_context {
|
||||||
|
|
||||||
// add new rows into existing tensor in v_diff_tmp
|
// add new rows into existing tensor in v_diff_tmp
|
||||||
void concat_diff_tmp(const std::vector<struct ggml_tensor *> & diff_filtered) {
|
void concat_diff_tmp(const std::vector<struct ggml_tensor *> & diff_filtered) {
|
||||||
GGML_ASSERT(diff_filtered.size() == n_layers - 1);
|
GGML_ASSERT((int) diff_filtered.size() == n_layers - 1);
|
||||||
for (int il = 0; il < n_layers - 1; il++) {
|
for (int il = 0; il < n_layers - 1; il++) {
|
||||||
auto t = diff_filtered[il];
|
auto t = diff_filtered[il];
|
||||||
auto & diff_tmp = v_diff_tmp[il];
|
auto & diff_tmp = v_diff_tmp[il];
|
||||||
|
@ -206,32 +207,46 @@ struct train_context {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// build the v_diff tensors from v_diff_tmp
|
// build the v_diff tensors from v_diff_tmp (v_diff need to be transposed)
|
||||||
void build_v_diff() {
|
void build_v_diff() {
|
||||||
|
printf("build_v_diff\n");
|
||||||
for (int il = 0; il < n_layers - 1; il++) {
|
for (int il = 0; il < n_layers - 1; il++) {
|
||||||
auto & diff_tmp = v_diff_tmp[il];
|
auto & diff_tmp = v_diff_tmp[il];
|
||||||
int n_elem = diff_tmp.size() / sizeof(float);
|
int n_elem = diff_tmp.size() / sizeof(float);
|
||||||
|
GGML_ASSERT(n_elem % n_embd == 0);
|
||||||
int n_rows = n_elem / n_embd;
|
int n_rows = n_elem / n_embd;
|
||||||
struct ggml_tensor * diff = ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_rows, n_embd);
|
struct ggml_tensor * diff = ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_rows, n_embd);
|
||||||
ggml_set_name(diff, (std::string("diff_") + std::to_string(il)).c_str());
|
ggml_set_name(diff, (std::string("diff_") + std::to_string(il)).c_str());
|
||||||
// TODO: IMPORTANT!! transpose diff
|
// copy data & transpose
|
||||||
diff->data = diff_tmp.data();
|
diff->data = malloc(ggml_nbytes(diff)); // TODO: get rid of this malloc if possible
|
||||||
|
float * arr = (float *) diff_tmp.data();
|
||||||
|
for (int ir = 0; ir < n_rows; ++ir) {
|
||||||
|
for (int ic = 0; ic < n_embd; ++ic) {
|
||||||
|
float f = arr[ir*n_embd + ic];
|
||||||
|
//std::cout << ir << "," << ic << " = " << f << "\n";
|
||||||
|
ggml_set_f32_nd(diff, ir, ic, 0, 0, f);
|
||||||
|
}
|
||||||
|
}
|
||||||
v_diff.push_back(diff);
|
v_diff.push_back(diff);
|
||||||
|
print_debug_tensor(diff);
|
||||||
|
// free memory of diff_tmp
|
||||||
|
diff_tmp.resize(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
~train_context() {
|
~train_context() {
|
||||||
for (auto ptr : v_final) free(ptr->data);
|
for (auto ptr : v_final) free(ptr->data);
|
||||||
// no need to free v_diff_tmp or v_diff, since we didn't use malloc
|
for (auto ptr : v_diff) free(ptr->data);
|
||||||
|
// no need to free v_diff_tmp, since we didn't use malloc
|
||||||
ggml_free(ctx_ggml);
|
ggml_free(ctx_ggml);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ctrl_params {
|
struct ctrl_params {
|
||||||
/* default meta parameters */
|
/* default meta parameters */
|
||||||
bool always_reload = false;
|
|
||||||
int n_completions = 64;
|
int n_completions = 64;
|
||||||
int n_threads = 8;
|
int n_pca_batch = 5;
|
||||||
|
int n_pca_iterations = 1000;
|
||||||
|
|
||||||
/* default filepaths */
|
/* default filepaths */
|
||||||
std::string outfile = "control_vector.gguf";
|
std::string outfile = "control_vector.gguf";
|
||||||
|
@ -295,9 +310,10 @@ static void print_usage(const char * executable) {
|
||||||
printf(" default: 'examples/control-vector-generator/completions.txt'\n");
|
printf(" default: 'examples/control-vector-generator/completions.txt'\n");
|
||||||
printf(" -nc, --num-completions N number of lines of completions file to use\n");
|
printf(" -nc, --num-completions N number of lines of completions file to use\n");
|
||||||
printf(" default: 64\n");
|
printf(" default: 64\n");
|
||||||
printf(" -t, --num-threads N number of threads to use (do not confuse with gpt-opts -t)\n");
|
printf(" --batch-pca N batch size used for PCA\n");
|
||||||
printf(" default: 8\n");
|
printf(" default: 5\n");
|
||||||
printf(" --always-reload reload the model for every new template to parse (not recommended)\n");
|
printf(" --iter-pca N number of iterations used for PCA\n");
|
||||||
|
printf(" default: 1000\n");
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("gpt-opts:\n");
|
printf("gpt-opts:\n");
|
||||||
printf(" other options from main\n");
|
printf(" other options from main\n");
|
||||||
|
@ -370,10 +386,10 @@ static int ctrlvec_params_parse_ex(int argc, char ** argv, ctrl_params & params)
|
||||||
throw std::invalid_argument("error: missing argument for " + arg);
|
throw std::invalid_argument("error: missing argument for " + arg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (arg == "--num-threads" || arg == "-t") {
|
if (arg == "--pca-batch") {
|
||||||
if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
|
if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
|
||||||
try {
|
try {
|
||||||
params.n_threads = std::stoi(argv[arg_idx]);
|
params.n_pca_batch = std::stoi(argv[arg_idx]);
|
||||||
}
|
}
|
||||||
catch (const std::invalid_argument & ex) {
|
catch (const std::invalid_argument & ex) {
|
||||||
throw std::invalid_argument("error: invalid argument for " + arg);
|
throw std::invalid_argument("error: invalid argument for " + arg);
|
||||||
|
@ -383,9 +399,18 @@ static int ctrlvec_params_parse_ex(int argc, char ** argv, ctrl_params & params)
|
||||||
throw std::invalid_argument("error: missing argument for " + arg);
|
throw std::invalid_argument("error: missing argument for " + arg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (arg == "--always-reload") {
|
if (arg == "--pca-iter") {
|
||||||
params.always_reload = true;
|
if (++arg_idx < argc && strncmp(argv[arg_idx], arg_prefix.c_str(), 2) != 0) {
|
||||||
skipme += 1;
|
try {
|
||||||
|
params.n_pca_iterations = std::stoi(argv[arg_idx]);
|
||||||
|
}
|
||||||
|
catch (const std::invalid_argument & ex) {
|
||||||
|
throw std::invalid_argument("error: invalid argument for " + arg);
|
||||||
|
}
|
||||||
|
skipme += 2;
|
||||||
|
} else {
|
||||||
|
throw std::invalid_argument("error: missing argument for " + arg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// TODO it might be nice QoL to have single positive/negative args
|
// TODO it might be nice QoL to have single positive/negative args
|
||||||
// we do not handle any other unknown arguments here because they will be handled by gpt_parse_params
|
// we do not handle any other unknown arguments here because they will be handled by gpt_parse_params
|
||||||
|
@ -427,7 +452,7 @@ static std::vector<std::string> ctrlvec_load_prompt_file(std::string path, bool
|
||||||
|
|
||||||
static bool cb_eval(struct ggml_tensor * t, bool ask, void * user_data) {
|
static bool cb_eval(struct ggml_tensor * t, bool ask, void * user_data) {
|
||||||
auto * cb_data = (callback_data *) user_data;
|
auto * cb_data = (callback_data *) user_data;
|
||||||
auto ggml_ne_string = [](const ggml_tensor * t) -> std::string {
|
/*auto ggml_ne_string = [](const ggml_tensor * t) -> std::string {
|
||||||
std::string str;
|
std::string str;
|
||||||
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
|
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
|
||||||
str += std::to_string(t->ne[i]);
|
str += std::to_string(t->ne[i]);
|
||||||
|
@ -436,7 +461,7 @@ static bool cb_eval(struct ggml_tensor * t, bool ask, void * user_data) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return str;
|
return str;
|
||||||
};
|
};*/
|
||||||
|
|
||||||
static const char * l_out_name = "l_out";
|
static const char * l_out_name = "l_out";
|
||||||
const bool is_l_out = strncmp(t->name, l_out_name, strlen(l_out_name)) == 0;
|
const bool is_l_out = strncmp(t->name, l_out_name, strlen(l_out_name)) == 0;
|
||||||
|
@ -473,6 +498,7 @@ static void export_gguf(const std::vector<struct ggml_tensor *> & v_ctrl, const
|
||||||
|
|
||||||
for (size_t i = 0; i < v_ctrl.size(); ++i) {
|
for (size_t i = 0; i < v_ctrl.size(); ++i) {
|
||||||
gguf_add_tensor(ctx, v_ctrl[i]);
|
gguf_add_tensor(ctx, v_ctrl[i]);
|
||||||
|
print_debug_tensor(v_ctrl[i]);
|
||||||
printf("Added tensor: %s\n", v_ctrl[i]->name);
|
printf("Added tensor: %s\n", v_ctrl[i]->name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -489,7 +515,7 @@ static void export_gguf(const std::vector<struct ggml_tensor *> & v_ctrl, const
|
||||||
* Load prompt files and completion file.
|
* Load prompt files and completion file.
|
||||||
* Then format each pair of prompt + completion to make an entry.
|
* Then format each pair of prompt + completion to make an entry.
|
||||||
*/
|
*/
|
||||||
int prepare_entries(ctrl_params & cparams) {
|
static int prepare_entries(ctrl_params & cparams) {
|
||||||
// load prompts
|
// load prompts
|
||||||
std::vector<std::string> positive_prompts = ctrlvec_load_prompt_file(cparams.positive_prompts_file);
|
std::vector<std::string> positive_prompts = ctrlvec_load_prompt_file(cparams.positive_prompts_file);
|
||||||
std::vector<std::string> negative_prompts = ctrlvec_load_prompt_file(cparams.negative_prompts_file);
|
std::vector<std::string> negative_prompts = ctrlvec_load_prompt_file(cparams.negative_prompts_file);
|
||||||
|
@ -511,7 +537,7 @@ int prepare_entries(ctrl_params & cparams) {
|
||||||
// TODO make this dynamic - allow the user to change it somehow - and adapt based on model
|
// TODO make this dynamic - allow the user to change it somehow - and adapt based on model
|
||||||
return persona + " " + suffix; // entry in positive/negative.txt must already be formatted i.e. "[INST] Act as if you're extremely happy. [/INST]"
|
return persona + " " + suffix; // entry in positive/negative.txt must already be formatted i.e. "[INST] Act as if you're extremely happy. [/INST]"
|
||||||
};
|
};
|
||||||
for (int i = 0; i < positive_prompts.size(); ++i) {
|
for (size_t i = 0; i < positive_prompts.size(); ++i) {
|
||||||
for (auto & cmpl : completions) {
|
for (auto & cmpl : completions) {
|
||||||
// TODO replicate the truncations done by the python implementation
|
// TODO replicate the truncations done by the python implementation
|
||||||
cparams.positive_entries.push_back(format_template(positive_prompts[i], cmpl));
|
cparams.positive_entries.push_back(format_template(positive_prompts[i], cmpl));
|
||||||
|
@ -553,7 +579,7 @@ int main(int argc, char ** argv) {
|
||||||
llama_context * ctx;
|
llama_context * ctx;
|
||||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
||||||
|
|
||||||
int n_ctx = llama_n_ctx(ctx);
|
// int n_ctx = llama_n_ctx(ctx);
|
||||||
int n_layers = llama_n_layer(model);
|
int n_layers = llama_n_layer(model);
|
||||||
int n_embd = llama_n_embd(model);
|
int n_embd = llama_n_embd(model);
|
||||||
// get model hint param (a.k.a model arch name)
|
// get model hint param (a.k.a model arch name)
|
||||||
|
@ -574,29 +600,13 @@ int main(int argc, char ** argv) {
|
||||||
// init train_context
|
// init train_context
|
||||||
train_context ctx_train(n_embd, n_layers);
|
train_context ctx_train(n_embd, n_layers);
|
||||||
|
|
||||||
int token_ct = 0;
|
|
||||||
|
|
||||||
for(size_t i = 0; i < cparams.positive_entries.size(); ++i) {
|
for(size_t i = 0; i < cparams.positive_entries.size(); ++i) {
|
||||||
tokenized_prompt t = tokenized_prompts[i];
|
tokenized_prompt t = tokenized_prompts[i];
|
||||||
cb_data.n_layers = n_layers;
|
cb_data.n_layers = n_layers;
|
||||||
cb_data.n_tokens = t.max_seq_len;
|
cb_data.n_tokens = t.max_seq_len;
|
||||||
|
|
||||||
// need to reload the model so it doesn't run out of context
|
printf("Evaluating prompt[%ld/%ld]: \"%s\" - \"%s\" (%ld tokens)\n",
|
||||||
// this should scale with -c option passed by main
|
i+1, t.tokens_pos.size(),
|
||||||
token_ct += 2 * t.max_seq_len;
|
|
||||||
if (token_ct > n_ctx || cparams.always_reload) {
|
|
||||||
//break;
|
|
||||||
llama_free(ctx);
|
|
||||||
llama_free_model(model);
|
|
||||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
|
||||||
token_ct = 2 * t.max_seq_len;
|
|
||||||
}
|
|
||||||
if (token_ct > n_ctx) {
|
|
||||||
fprintf(stderr, "context size exceeded on iteration %zu\n", i);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("Evaluating prompt: \"%s\" - \"%s\" (%ld tokens)\n",
|
|
||||||
tokens_to_str(ctx, t.tokens_pos.cbegin(), t.tokens_pos.cend()).c_str(),
|
tokens_to_str(ctx, t.tokens_pos.cbegin(), t.tokens_pos.cend()).c_str(),
|
||||||
tokens_to_str(ctx, t.tokens_neg.cbegin(), t.tokens_neg.cend()).c_str(),
|
tokens_to_str(ctx, t.tokens_neg.cbegin(), t.tokens_neg.cend()).c_str(),
|
||||||
t.max_seq_len);
|
t.max_seq_len);
|
||||||
|
@ -610,12 +620,10 @@ int main(int argc, char ** argv) {
|
||||||
auto v_diff_filtered = cb_data.calc_diff();
|
auto v_diff_filtered = cb_data.calc_diff();
|
||||||
|
|
||||||
// save & concat the filtered v_diff to ctx_train
|
// save & concat the filtered v_diff to ctx_train
|
||||||
printf("concat_diff_tmp\n");
|
|
||||||
ctx_train.concat_diff_tmp(v_diff_filtered);
|
ctx_train.concat_diff_tmp(v_diff_filtered);
|
||||||
|
|
||||||
// reset for next iteration
|
// reset for next iteration
|
||||||
cb_data.reset();
|
cb_data.reset();
|
||||||
printf("reset\n");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// done with the model, we can now free it to make gain some memory
|
// done with the model, we can now free it to make gain some memory
|
||||||
|
@ -628,8 +636,10 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
// run PCA
|
// run PCA
|
||||||
PCA::pca_params pca_params;
|
PCA::pca_params pca_params;
|
||||||
|
pca_params.n_threads = params.n_threads;
|
||||||
|
pca_params.n_batch = cparams.n_pca_batch;
|
||||||
|
pca_params.n_iterations = cparams.n_pca_iterations;
|
||||||
PCA::run_pca(pca_params, ctx_train.v_diff, ctx_train.v_final);
|
PCA::run_pca(pca_params, ctx_train.v_diff, ctx_train.v_final);
|
||||||
exit(0); // TODO: REMOVE ME !!!!!!!!!!!!!!!!!!!!!!!!
|
|
||||||
|
|
||||||
// write output vectors to gguf
|
// write output vectors to gguf
|
||||||
export_gguf(ctx_train.v_final, cparams.outfile, model_hint);
|
export_gguf(ctx_train.v_final, cparams.outfile, model_hint);
|
||||||
|
|
|
@ -38,10 +38,15 @@ struct pca_params {
|
||||||
int n_batch = 5; // number of iterations do to in one batch. larger the batch, more memory is used
|
int n_batch = 5; // number of iterations do to in one batch. larger the batch, more memory is used
|
||||||
int n_iterations = 1000;
|
int n_iterations = 1000;
|
||||||
float tolerance = 1e-7;
|
float tolerance = 1e-7;
|
||||||
|
|
||||||
|
// for debugging
|
||||||
|
int i_layer = 0;
|
||||||
|
int n_layers = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
// result from each iteration
|
// result from each iteration
|
||||||
struct pca_result {
|
struct pca_result {
|
||||||
|
struct ggml_tensor * calculated_square = NULL;
|
||||||
std::vector<struct ggml_tensor *> eigenvectors;
|
std::vector<struct ggml_tensor *> eigenvectors;
|
||||||
std::vector<float> distances;
|
std::vector<float> distances;
|
||||||
};
|
};
|
||||||
|
@ -162,7 +167,6 @@ static struct ggml_cgraph * build_graph_piter(
|
||||||
// turn v_diff_original into square matrix if needed
|
// turn v_diff_original into square matrix if needed
|
||||||
struct ggml_tensor * tmp_square;
|
struct ggml_tensor * tmp_square;
|
||||||
if (calc_square) {
|
if (calc_square) {
|
||||||
print_debug_tensor(model.dev_input);
|
|
||||||
tmp_square = ggml_mul_mat(ctx0, model.dev_input, model.dev_input);
|
tmp_square = ggml_mul_mat(ctx0, model.dev_input, model.dev_input);
|
||||||
ggml_set_name(tmp_square, "tmp_square");
|
ggml_set_name(tmp_square, "tmp_square");
|
||||||
}
|
}
|
||||||
|
@ -229,17 +233,17 @@ static ggml_status compute_piter(
|
||||||
}
|
}
|
||||||
return i;
|
return i;
|
||||||
};
|
};
|
||||||
// get output nodes
|
result.calculated_square = NULL;
|
||||||
result.eigenvectors.clear();
|
result.eigenvectors.clear();
|
||||||
result.distances.clear();
|
result.distances.clear();
|
||||||
result.eigenvectors.resize(params.n_batch);
|
result.eigenvectors.resize(params.n_batch);
|
||||||
result.distances.resize(params.n_batch);
|
result.distances.resize(params.n_batch);
|
||||||
|
// get output nodes
|
||||||
for (int i = 0; i < gf->n_nodes; ++i) {
|
for (int i = 0; i < gf->n_nodes; ++i) {
|
||||||
auto node = gf->nodes[i];
|
auto node = gf->nodes[i];
|
||||||
int iter = -1;
|
int iter = -1;
|
||||||
// find b_tensor (without copying data from device)
|
// find b_tensor (without copying data from device)
|
||||||
if ((iter = extract_i("b_tensor_norm_", node->name)) > -1) {
|
if ((iter = extract_i("b_tensor_norm_", node->name)) > -1) {
|
||||||
print_debug_tensor(node, false);
|
|
||||||
result.eigenvectors[iter] = node;
|
result.eigenvectors[iter] = node;
|
||||||
}
|
}
|
||||||
// find distances, then copy data from device
|
// find distances, then copy data from device
|
||||||
|
@ -247,7 +251,11 @@ static ggml_status compute_piter(
|
||||||
float d;
|
float d;
|
||||||
ggml_backend_tensor_get(node, &d, 0, sizeof(float));
|
ggml_backend_tensor_get(node, &d, 0, sizeof(float));
|
||||||
result.distances[iter] = d;
|
result.distances[iter] = d;
|
||||||
std::cout << node->name << " = " << d << "\n";
|
// std::cout << node->name << " = " << d << "\n";
|
||||||
|
}
|
||||||
|
// find tmp_square if it exists (without copying data from device)
|
||||||
|
if (std::string(node->name) == "tmp_square") {
|
||||||
|
result.calculated_square = node;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -258,23 +266,22 @@ static void power_iteration(
|
||||||
const struct pca_params & params,
|
const struct pca_params & params,
|
||||||
struct ggml_tensor * input, // shape of input: [n_samples, n_embd]
|
struct ggml_tensor * input, // shape of input: [n_samples, n_embd]
|
||||||
struct ggml_tensor * output) {
|
struct ggml_tensor * output) {
|
||||||
printf("in power iteration\n");
|
//printf("in power iteration\n");
|
||||||
//int n_embd = input->ne[1];
|
|
||||||
struct pca_model model(input);
|
struct pca_model model(input);
|
||||||
|
|
||||||
ggml_gallocr_t allocr = NULL;
|
ggml_gallocr_t allocr = NULL;
|
||||||
struct pca_result result;
|
struct pca_result result;
|
||||||
struct ggml_tensor * last_eigenvector;
|
struct ggml_tensor * last_eigenvector = NULL;
|
||||||
|
|
||||||
int n_iter = params.n_iterations / params.n_batch; // more batch, fewer iterations
|
int n_iters = params.n_iterations / params.n_batch; // more batch, fewer iterations
|
||||||
for (int iter = 0; iter < n_iter; ++iter) {
|
for (int iter = 0; iter < n_iters; ++iter) {
|
||||||
bool calc_square = (iter == 0); // only need to calculate square for first iteration
|
bool calc_square = (iter == 0); // only need to calculate square for first iteration
|
||||||
if (allocr) {
|
if (allocr) {
|
||||||
ggml_gallocr_free(allocr);
|
ggml_gallocr_free(allocr);
|
||||||
}
|
}
|
||||||
allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(model.backend));
|
allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(model.backend));
|
||||||
struct ggml_cgraph * gf = build_graph_piter(params, model, calc_square);
|
struct ggml_cgraph * gf = build_graph_piter(params, model, calc_square);
|
||||||
ggml_graph_dump_dot(gf, nullptr, "/tmp/_cgraph.dot");
|
// ggml_graph_dump_dot(gf, nullptr, "/tmp/_cgraph.dot");
|
||||||
compute_piter(params, model, gf, allocr, result);
|
compute_piter(params, model, gf, allocr, result);
|
||||||
|
|
||||||
for (size_t k = 0; k < result.distances.size(); ++k) {
|
for (size_t k = 0; k < result.distances.size(); ++k) {
|
||||||
|
@ -284,30 +291,43 @@ static void power_iteration(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
break; // FIXME
|
if (calc_square) {
|
||||||
|
// copy and store the square matrix if needed
|
||||||
|
GGML_ASSERT(result.calculated_square != NULL);
|
||||||
|
std::vector<uint8_t> tmp_buf(ggml_nbytes(model.dev_square));
|
||||||
|
ggml_backend_tensor_get(result.calculated_square, tmp_buf.data(), 0, tmp_buf.size());
|
||||||
|
ggml_backend_tensor_set(model.dev_square, tmp_buf.data(), 0, tmp_buf.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
printf("%s: layer %d/%d, iteration: %d / total: %d (batch = %d) ...\n",
|
||||||
|
__func__, params.i_layer+1, params.n_layers, iter, n_iters, params.n_batch);
|
||||||
|
}
|
||||||
|
|
||||||
|
// get output tensor
|
||||||
|
GGML_ASSERT(last_eigenvector);
|
||||||
ggml_backend_tensor_get(last_eigenvector, output->data, 0, ggml_nbytes(last_eigenvector));
|
ggml_backend_tensor_get(last_eigenvector, output->data, 0, ggml_nbytes(last_eigenvector));
|
||||||
print_debug_tensor(output);
|
//print_debug_tensor(output);
|
||||||
ggml_gallocr_free(allocr);
|
ggml_gallocr_free(allocr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void run_pca(
|
static void run_pca(
|
||||||
const struct pca_params & params,
|
struct pca_params & params,
|
||||||
const std::vector<struct ggml_tensor *> & v_input,
|
const std::vector<struct ggml_tensor *> & v_input, // shape of v_input[0]: [n_samples, n_embd]
|
||||||
const std::vector<struct ggml_tensor *> & v_output) {
|
const std::vector<struct ggml_tensor *> & v_output) {
|
||||||
printf("Running PCA...\n");
|
printf("Running PCA...\n");
|
||||||
int n_embd = v_input[0]->ne[0]; // shape of v_input[0]: [n_embd, m]
|
|
||||||
for (size_t il = 0; il < v_input.size(); ++il) {
|
for (size_t il = 0; il < v_input.size(); ++il) {
|
||||||
print_debug_tensor(v_input[il]);
|
|
||||||
// prepare output vector
|
// prepare output vector
|
||||||
struct ggml_tensor * ctrl_out = v_output[il];
|
struct ggml_tensor * ctrl_out = v_output[il];
|
||||||
auto name = std::string("direction.") + std::to_string(il + 1);
|
auto name = std::string("direction.") + std::to_string(il + 1);
|
||||||
ggml_set_name(ctrl_out, name.c_str());
|
ggml_set_name(ctrl_out, name.c_str());
|
||||||
|
|
||||||
// run power_iteration
|
// run power_iteration
|
||||||
|
params.i_layer = il;
|
||||||
|
params.n_layers = v_input.size();
|
||||||
power_iteration(params, v_input[il], ctrl_out);
|
power_iteration(params, v_input[il], ctrl_out);
|
||||||
printf("Done with layer %d\n", il);
|
printf("DONE layer %ld / %ld\n", il+1, v_input.size());
|
||||||
print_debug_tensor(ctrl_out);
|
//print_debug_tensor(ctrl_out);
|
||||||
}
|
}
|
||||||
printf("Done with PCA.\n");
|
printf("Done with PCA.\n");
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue