From 15d5c257a04ff7a1cfb6e32fb9f5dcd71bd2e83b Mon Sep 17 00:00:00 2001 From: ngxson Date: Sun, 2 Jun 2024 10:58:11 +0200 Subject: [PATCH] fix cb_eval --- .../control-vector-generator.cpp | 43 +++++++++++-------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/examples/control-vector-generator/control-vector-generator.cpp b/examples/control-vector-generator/control-vector-generator.cpp index f48e5ef3f..e2ee6208d 100644 --- a/examples/control-vector-generator/control-vector-generator.cpp +++ b/examples/control-vector-generator/control-vector-generator.cpp @@ -18,10 +18,13 @@ #include #include +#define DEBUG_POS 2 + // TODO read everything over and make sure it makes sense because I'm dropping logic errors left and right - Christian struct callback_data { std::vector data; + ggml_context * ctx_ggml; int n_tokens = 0; int n_embd = 0; @@ -290,18 +293,11 @@ static bool cb_eval(struct ggml_tensor * t, bool ask, void * user_data) { // v_pos and v_neg are being populated, but the values aren't correct - it writes the same values to all vectors, it looks like? // this leads ultimately to an error in calc_diff where diff becomes entirely zeroes and eventually a segfault several iterations into pca struct ggml_tensor * t_host; - if (!is_host) { - auto n_bytes = ggml_nbytes(t); - struct ggml_init_params params = { - /*.mem_size =*/ n_bytes, - /*.mem_buffer =*/ NULL, - /*.no_alloc =*/ false, - }; - struct ggml_context * ctx_data = ggml_init(params); - t_host = ggml_new_tensor_2d(ctx_data, t->type, t->ne[0], t->ne[1]); - ggml_backend_tensor_get(t, t_host->data, 0, n_bytes); - } - else t_host = t; + auto n_bytes = ggml_nbytes(t); + t_host = ggml_new_tensor_2d(cb_data->ctx_ggml, t->type, t->ne[0], t->ne[1]); + t_host->data = malloc(n_bytes); // TODO @ngxson : get rid of this malloc somehow + ggml_backend_tensor_get(t, t_host->data, 0, n_bytes); + printf("t_host [0][%d]: %f\n", DEBUG_POS, ggml_get_f32_nd(t_host, 0, DEBUG_POS, 0, 0)); if (t_host->type == GGML_TYPE_F32) { if (cb_data->is_eval_pos) { @@ -315,6 +311,7 @@ static bool cb_eval(struct ggml_tensor * t, bool ask, void * user_data) { } static bool get_hidden_layers(llama_context * ctx, std::vector & tokens) { + llama_kv_cache_clear(ctx); if (llama_decode(ctx, llama_batch_get_one(tokens.data(), tokens.size(), 0, 0))) { fprintf(stderr, "%s : failed to eval\n", __func__); return false; @@ -355,7 +352,8 @@ static void calc_diff(callback_data & cb_data) { }; struct ggml_context * ctx_data = ggml_init(params); - printf("inp_pos [0][0]: %f\n", ggml_get_f32_nd(inp_pos, 0, 0, 0, 0)); + printf("inp_pos [0][%d]: %f\n", DEBUG_POS, ggml_get_f32_nd(inp_pos, 0, DEBUG_POS, 0, 0)); + printf("inp_neg [0][%d]: %f\n", DEBUG_POS, ggml_get_f32_nd(inp_neg, 0, DEBUG_POS, 0, 0)); // TODO is this the best way to get dimension? i don't know which way n_embd/n_tokens go // for that matter can we get rid of n_embd/n_tokens fields in favor of ne[0]/ne[1]? @@ -367,7 +365,7 @@ static void calc_diff(callback_data & cb_data) { } } - printf("dest [0][0]: %f\n", ggml_get_f32_nd(dest, 0, 0, 0, 0)); + printf("dest [0][%d]: %f\n", DEBUG_POS, ggml_get_f32_nd(dest, 0, DEBUG_POS, 0, 0)); // TODO can we make this faster? like check during the above operation rather than on a second pass? @@ -415,6 +413,7 @@ static void calc_diff(callback_data & cb_data) { } static void concatenate_diffs(callback_data & cb_data) { + printf("concatenate_diffs\n"); for (size_t i = 0; i < cb_data.v_diffs_wrapped.size(); ++i) { std::vector & vec = cb_data.v_diffs_wrapped[i]; size_t n_rows_total = 0; @@ -756,6 +755,14 @@ int main(int argc, char ** argv) { cb_data.n_embd = n_embd; int n_prompts = cparams.positive_prompts.size(); + // init ctx_ggml + struct ggml_init_params params_ggml = { + /*.mem_size =*/ ggml_tensor_overhead() * n_prompts * n_layers * 4u, + /*.mem_buffer =*/ NULL, + /*.no_alloc =*/ true, + }; + cb_data.ctx_ggml = ggml_init(params_ggml); + // create templated prompts for (int i = 0; i < n_prompts; ++i) { populate_entries(cparams, cparams.positive_prompts[i], cparams.negative_prompts[i]); @@ -804,13 +811,15 @@ int main(int argc, char ** argv) { calc_diff(cb_data); // reset for next iteration - // TODO there's no good way to do this is there? because you need to ggml_free the underlying ggml_context - //for (auto ptr : cb_data.v_pos) free(ptr->data); - //for (auto ptr : cb_data.v_neg) free(ptr->data); + // TODO @ngxson : find a more proper way to alloc / free tensors + for (auto ptr : cb_data.v_pos) free(ptr->data); + for (auto ptr : cb_data.v_neg) free(ptr->data); cb_data.v_pos.clear(); cb_data.v_neg.clear(); } + printf("Done evaluate prompts\n"); + concatenate_diffs(cb_data); pca(cb_data, cparams.n_threads); //printf("v_final %f %f \n", cb_data.v_final[0][0], cb_data.v_final[0][1]);