From 23fd1b587cb91af06a6e5800282f20ebbf6f7e72 Mon Sep 17 00:00:00 2001 From: Christian Zhou-Zheng Date: Mon, 3 Jun 2024 21:14:43 -0400 Subject: [PATCH] update debug statements --- .../control-vector-generator.cpp | 32 ++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/examples/control-vector-generator/control-vector-generator.cpp b/examples/control-vector-generator/control-vector-generator.cpp index 94c1939f8..59335f826 100644 --- a/examples/control-vector-generator/control-vector-generator.cpp +++ b/examples/control-vector-generator/control-vector-generator.cpp @@ -22,7 +22,7 @@ // TODO read everything over and make sure it makes sense because I'm dropping logic errors left and right - Christian -// to reduce the amount of stuff that gets sent to cb_eval I separated it somewhat - Christian +// to reduce the amount of stuff that gets sent to cb_eval this is only what cb_eval actually needs struct callback_data { std::vector data; ggml_context * ctx_ggml; // holds v_pos, v_neg @@ -34,11 +34,13 @@ struct callback_data { std::vector v_pos; // vector of matrices of size [n_embd, n_tokens] std::vector v_neg; // vector of matrices of size [n_embd, n_tokens] + // TODO I free everything as soon as it's unnecessary, rather than letting this live until the end of main() - is this undesirable? + /* ~callback_data() { for (auto ptr : v_pos) free(ptr); for (auto ptr : v_neg) free(ptr); ggml_free(ctx_ggml); - } + }*/ }; // I prefer having the different contexts so we can free each immediately after we're done using it @@ -51,7 +53,7 @@ struct diff_ctx { ggml_context * ctx_diffs_wrapped; // holds v_diffs_wrapped ggml_context * ctx_diff; // holds v_diff ggml_context * ctx_final; // holds v_final - + // each element of the vector correspond to one layer std::vector v_diff; // vector of matrices of size [n_embd, m] where m ~ n_tokens * n_completions std::vector v_final; // vector of vectors of size [n_embd] to be written to file @@ -62,10 +64,9 @@ struct diff_ctx { ~diff_ctx() { for (auto ptr : v_diff) free(ptr); for (auto ptr : v_final) free(ptr); - for (auto & vec : v_diffs_wrapped) for (auto ptr : vec) free(ptr); - ggml_free(ctx_diffs_wrapped); ggml_free(ctx_diff); ggml_free(ctx_final); + // ctx_diffs_wrapped is freed in concatenate_diffs as soon as we're done with it - see above. undesirable? } }; @@ -396,8 +397,6 @@ static void concatenate_diffs(diff_ctx & dctx) { printf("il: %zu of %zu\n", il, dctx.v_diffs_wrapped.size()-1); std::vector & vec = dctx.v_diffs_wrapped[il]; - std::cout << "vec size: " << vec.size() << std::endl; - // strip zero rows int n_nonzero_rows = 0; std::vector> nonzero_rows; // outer vector is tensor idx, inner vector is row in tensor @@ -411,7 +410,7 @@ static void concatenate_diffs(diff_ctx & dctx) { } } - std::cout << "n_nonzero_rows: " << n_nonzero_rows << std::endl; + printf("n_nonzero_rows: %d\n", n_nonzero_rows); // we transpose it here because ggml mul_mat is really weird struct ggml_tensor * diff = ggml_new_tensor_2d(dctx.ctx_diff, GGML_TYPE_F32, n_nonzero_rows, dctx.n_embd); @@ -427,13 +426,14 @@ static void concatenate_diffs(diff_ctx & dctx) { } } - printf("diff[0][1]: %f\n", ggml_get_f32_nd(diff, 0, 1, 0, 0)); + printf("diff[0][%d]: %f\n", DEBUG_POS, ggml_get_f32_nd(diff, 0, DEBUG_POS, 0, 0)); // TODO assert row == n_nonzero_rows dctx.v_diff.push_back(diff); } - ggml_free(dctx.ctx_diffs_wrapped); + //for (auto & vec : dctx.v_diffs_wrapped) for (auto ptr : vec) free(ptr); + ggml_free(dctx.ctx_diffs_wrapped); } // TODO translate everything below this @@ -473,7 +473,7 @@ void load_pca_model(pca_model & model, struct ggml_tensor * v_diff_original) { model.backend = ggml_backend_cpu_init(); } - printf("v_diff_original[0][1]: %f\n", ggml_get_f32_nd(v_diff_original, 0, 1, 0, 0)); + printf("v_diff_original[0][%d]: %f\n", DEBUG_POS, ggml_get_f32_nd(v_diff_original, 0, DEBUG_POS, 0, 0)); const int num_tensors = 4; @@ -529,7 +529,6 @@ struct ggml_cgraph * square_diff_graph(const pca_model & model) { return gf; } -// TODO do this before pca so the pca_model is easier to malloc? struct ggml_tensor * compute_square(const pca_model & model, ggml_gallocr_t allocr, int n_threads) { struct ggml_cgraph * gf = square_diff_graph(model); @@ -598,14 +597,12 @@ static void power_iteration(diff_ctx & dctx, int idx, int maxIterations = 1000, pca_model model; load_pca_model(model, dctx.v_diff[idx]); - std::cout << "model.v_diff_original->ne[0]: " << model.v_diff_original->ne[0] << std::endl; ggml_gallocr_t allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(model.backend)); struct ggml_tensor * square = compute_square(model, allocr, dctx.n_threads); ggml_backend_tensor_set(model.square, square->data, 0, ggml_nbytes(model.square)); - // yes? ggml_gallocr_free(allocr); struct ggml_init_params host_params = { @@ -683,7 +680,7 @@ static void export_gguf(diff_ctx & dctx, int n_layers, const std::string fname, // i'm pretty sure it's right now const std::string name = "direction." + to_string(i+1); - std::cout << "dctx.v_final[i][0][1]: " << ggml_get_f32_nd(dctx.v_final[i], 0, 1, 0, 0) << std::endl; + printf("dctx.v_final[i][%d]: %f\n", DEBUG_POS, ggml_get_f32_1d(dctx.v_final[i], DEBUG_POS)); ggml_set_name(dctx.v_final[i], name.c_str()); @@ -838,15 +835,14 @@ int main(int argc, char ** argv) { } // TODO we can actually delete cb_data here - //ggml_free(cb_data.ctx_ggml); - printf("dctx.v_diffs_wrapped[0][0][2]: %f\n", ggml_get_f32_nd(dctx.v_diffs_wrapped[0][0], 0, 2, 0, 0)); + printf("dctx.v_diffs_wrapped[0][0][%d]: %f\n", DEBUG_POS, ggml_get_f32_nd(dctx.v_diffs_wrapped[0][0], 0, DEBUG_POS, 0, 0)); printf("Done evaluate prompts\n"); concatenate_diffs(dctx); - printf("dctx.v_diff[0][0][1]: %f\n", ggml_get_f32_nd(dctx.v_diff[0], 0, 1, 0, 0)); + printf("dctx.v_diff[0][0][%d]: %f\n", DEBUG_POS, ggml_get_f32_nd(dctx.v_diff[0], 0, DEBUG_POS, 0, 0)); printf("Done concatenate diffs\n");