diff --git a/examples/control-vector-generator/control-vector-generator.cpp b/examples/control-vector-generator/control-vector-generator.cpp index 35d607a59..ebff76edc 100644 --- a/examples/control-vector-generator/control-vector-generator.cpp +++ b/examples/control-vector-generator/control-vector-generator.cpp @@ -94,7 +94,7 @@ struct callback_data { auto diff_filtered = filter_nonzero_rows(v_pos[il]); v_diff_filtered.push_back(diff_filtered); } - return v_pos; // for convinient, we return the result std::vector + return v_diff_filtered; // for convinient, we return the result std::vector } // delete zero rows from a given 2D tensor @@ -624,7 +624,7 @@ int main(int argc, char ** argv) { ctx_train.build_v_diff(); // run PCA - pca(ctx_train.v_diff, ctx_train.v_final); + PCA::run_pca(ctx_train.v_diff, ctx_train.v_final); // write output vectors to gguf export_gguf(ctx_train.v_final, cparams.outfile, model_hint); diff --git a/examples/control-vector-generator/pca.hpp b/examples/control-vector-generator/pca.hpp index 47c8981a2..f279268ce 100644 --- a/examples/control-vector-generator/pca.hpp +++ b/examples/control-vector-generator/pca.hpp @@ -29,12 +29,11 @@ static void print_debug_tensor(struct ggml_tensor * t) { printf(" ... ]\n"); } - +namespace PCA { struct pca_model { struct ggml_tensor * v_diff_original; struct ggml_tensor * square; - struct ggml_tensor * square_transpose; struct ggml_tensor * eigenvector; ggml_backend_t backend = NULL; @@ -42,7 +41,7 @@ struct pca_model { struct ggml_context * ctx; }; -void load_pca_model(pca_model & model, struct ggml_tensor * v_diff_original) { +void load_pca_model(pca_model & model, struct ggml_tensor * input) { #ifdef GGML_USE_CUDA fprintf(stderr, "%s: using CUDA backend\n", __func__); model.backend = ggml_backend_cuda_init(0); // init device 0 @@ -64,35 +63,35 @@ void load_pca_model(pca_model & model, struct ggml_tensor * v_diff_original) { if (!model.backend) { model.backend = ggml_backend_cpu_init(); } - - //printf("v_diff_original[0][%d]: %f\n", DEBUG_POS, ggml_get_f32_nd(v_diff_original, 0, DEBUG_POS, 0, 0)); const int num_tensors = 4; - struct ggml_init_params params { - /*.mem_size =*/ ggml_tensor_overhead() * num_tensors, - /*.mem_buffer =*/ NULL, - /*.no_alloc =*/ true, + /*.mem_size =*/ ggml_tensor_overhead() * num_tensors, + /*.mem_buffer =*/ NULL, + /*.no_alloc =*/ true, }; - model.ctx = ggml_init(params); - model.v_diff_original = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, v_diff_original->ne[0], v_diff_original->ne[1]); - model.square = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, v_diff_original->ne[1], v_diff_original->ne[1]); - model.square_transpose = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, v_diff_original->ne[1], v_diff_original->ne[1]); - model.eigenvector = ggml_new_tensor_1d(model.ctx, GGML_TYPE_F32, v_diff_original->ne[1]); + auto n_embd = input->ne[0]; + auto n_samples = input->ne[1]; + + model.v_diff_original = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, n_embd, n_samples); + model.square = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, n_embd, n_embd); + model.eigenvector = ggml_new_tensor_1d(model.ctx, GGML_TYPE_F32, n_embd); + + ggml_set_name(model.v_diff_original, "v_diff_original"); + ggml_set_name(model.square, "square"); + ggml_set_name(model.eigenvector, "eigenvector"); model.buffer = ggml_backend_alloc_ctx_tensors(model.ctx, model.backend); - ggml_backend_tensor_set(model.v_diff_original, v_diff_original->data, 0, ggml_nbytes(v_diff_original)); - - // no need to load anything into square or square_transpose yet + ggml_backend_tensor_set(model.v_diff_original, input->data, 0, ggml_nbytes(input)); // initialize model.eigenvector to random vector std::vector random_vec; std::default_random_engine generator(static_cast(std::time(0))); std::uniform_real_distribution distribution(0.0, 1.0); - for (int i = 0; i < v_diff_original->ne[1]; ++i) { + for (int i = 0; i < ggml_nelements(model.eigenvector); ++i) { random_vec.push_back(distribution(generator)); } @@ -100,8 +99,12 @@ void load_pca_model(pca_model & model, struct ggml_tensor * v_diff_original) { ggml_backend_tensor_set(model.eigenvector, random_vec.data(), 0, ggml_nbytes(model.eigenvector)); } -struct ggml_cgraph * square_diff_graph(const pca_model & model) { - static size_t buf_size = ggml_tensor_overhead() * GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead(); +static struct ggml_cgraph * build_graph_piter( + const pca_model & model, + bool calc_square = false, + int nb_iterations = 1) { + GGML_ASSERT(nb_iterations > 0); + static size_t buf_size = ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead(); static std::vector buf(buf_size); struct ggml_init_params params0 = { @@ -109,21 +112,55 @@ struct ggml_cgraph * square_diff_graph(const pca_model & model) { /*.mem_buffer =*/ buf.data(), /*.no_alloc =*/ true, // the tensors will be allocated later by ggml_allocr_alloc_graph() }; + // create a temporally context to build the graph struct ggml_context * ctx0 = ggml_init(params0); - struct ggml_cgraph * gf = ggml_new_graph(ctx0); + struct ggml_cgraph * gf = ggml_new_graph(ctx0); - struct ggml_tensor * square = ggml_mul_mat(ctx0, model.v_diff_original, model.v_diff_original); - //struct ggml_tensor * square_transpose = ggml_transpose(ctx0, square); + // turn v_diff_original into square matrix if needed + if (calc_square) { + //struct ggml_tensor * v_diff_transposed = ggml_transpose(ctx0, model.v_diff_original); + struct ggml_tensor * square = ggml_mul_mat(ctx0, model.v_diff_original, model.v_diff_original); + ggml_set_name(square, "square"); + //model.square = ggml_scale_inplace(ctx0, model.square, 0.0); + } - ggml_build_forward_expand(gf, square); + struct ggml_tensor * b_tensor; + for (int i = 0; i < nb_iterations; ++i) { + // b_tensor = square * eigenvector^T + b_tensor = ggml_mul_mat(ctx0, model.square, model.eigenvector); + ggml_set_name(b_tensor, "b_tensor"); + + // normalize + b_tensor = ggml_div_inplace(ctx0, + b_tensor, + ggml_sqrt_inplace(ctx0, ggml_sum_rows(ctx0, ggml_sqr(ctx0, b_tensor))) + ); + } + + // calculate distance + struct ggml_tensor * distance; + { + distance = ggml_sub(ctx0, model.eigenvector, b_tensor); + ggml_set_name(distance, "distance"); + distance = ggml_sqrt_inplace(ctx0, + ggml_sum_rows(ctx0, ggml_sqr_inplace(ctx0, distance))); + } + + // build operations nodes + ggml_build_forward_expand(gf, distance); + + // delete the temporally context used to build the graph ggml_free(ctx0); return gf; } -struct ggml_tensor * compute_square(const pca_model & model, ggml_gallocr_t allocr, int n_threads) { - struct ggml_cgraph * gf = square_diff_graph(model); - +struct ggml_tensor * compute_piter( + const pca_model & model, + struct ggml_cgraph * gf, + ggml_gallocr_t allocr, + int n_threads) { + // allocate tensors ggml_gallocr_alloc_graph(allocr, gf); if (ggml_backend_is_cpu(model.backend)) { @@ -138,68 +175,26 @@ struct ggml_tensor * compute_square(const pca_model & model, ggml_gallocr_t allo ggml_backend_graph_compute(model.backend, gf); + // in this case, the output tensor is the last one in the graph return gf->nodes[gf->n_nodes - 1]; } -struct ggml_cgraph * power_iteration_graph(const pca_model & model, float tolerance) { - static size_t buf_size = ggml_tensor_overhead() * GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead(); - static std::vector buf(buf_size); - - struct ggml_init_params params0 = { - /*.mem_size =*/ buf_size, - /*.mem_buffer =*/ buf.data(), - /*.no_alloc =*/ true, // the tensors will be allocated later by ggml_allocr_alloc_graph() - }; - struct ggml_context * ctx0 = ggml_init(params0); - struct ggml_cgraph * gf = ggml_new_graph(ctx0); - - struct ggml_tensor * b_tensor = ggml_mul_mat(ctx0, model.square, model.eigenvector); - // TODO difference between ggml_norm and ggml_norm_inplace? - // also is this the right way to do multi-step graphs? - b_tensor = ggml_norm_inplace(ctx0, b_tensor, tolerance); - - ggml_build_forward_expand(gf, b_tensor); - - ggml_free(ctx0); - return gf; -} - -struct ggml_tensor * compute_piter(const pca_model & model, ggml_gallocr_t allocr, int n_threads, float tolerance) { - struct ggml_cgraph * gf = power_iteration_graph(model, tolerance); - - ggml_gallocr_alloc_graph(allocr, gf); - - if (ggml_backend_is_cpu(model.backend)) { - ggml_backend_cpu_set_n_threads(model.backend, n_threads); - } - -#ifdef GGML_USE_METAL - if (ggml_backend_is_metal(model.backend)) { - ggml_backend_metal_set_n_cb(model.backend, n_threads); - } -#endif - - ggml_backend_graph_compute(model.backend, gf); - - return gf->nodes[gf->n_nodes - 1]; -} - -static void power_iteration(struct ggml_tensor * input, struct ggml_tensor * output, int n_threads, int maxIterations = 1000, float tolerance = 1e-7) { +static void power_iteration( + struct ggml_tensor * input, + struct ggml_tensor * output, + int n_threads, + int maxIterations = 1000, + float tolerance = 1e-7) { printf("in power iteration\n"); - int n_embd = input->ne[0];// shape of input: [n_embd, m] + int n_embd = input->ne[0]; // shape of input: [n_embd, m] pca_model model; load_pca_model(model, input); - ggml_gallocr_t allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(model.backend)); - - struct ggml_tensor * square = compute_square(model, allocr, n_threads); - ggml_backend_tensor_set(model.square, square->data, 0, ggml_nbytes(model.square)); - - ggml_gallocr_free(allocr); + ggml_gallocr_t allocr = NULL; struct ggml_init_params host_params = { - /*.mem_size =*/ (n_embd * sizeof(float) + ggml_tensor_overhead()) * 2u, + /*.mem_size =*/ (n_embd * sizeof(float) + ggml_tensor_overhead()) * 4u, /*.mem_buffer =*/ NULL, /*.no_alloc =*/ false, }; @@ -209,33 +204,19 @@ static void power_iteration(struct ggml_tensor * input, struct ggml_tensor * out struct ggml_tensor * host_new_eigenvector = ggml_new_tensor_1d(host_ctx, GGML_TYPE_F32, n_embd); for (int iter = 0; iter < maxIterations; ++iter) { - - // TODO do I need to reset it like this every time? + if (allocr) { + ggml_gallocr_free(allocr); + } allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(model.backend)); + struct ggml_cgraph * gf = build_graph_piter(model, iter == 0); + printf("kkk\n"); + ggml_graph_dump_dot(gf, nullptr, "/tmp/_cgraph.dot"); + struct ggml_tensor * distance = compute_piter(model, gf, allocr, n_threads); - struct ggml_tensor * b_tensor = compute_piter(model, allocr, n_threads, tolerance); - - ggml_backend_tensor_get(b_tensor, host_new_eigenvector->data, 0, ggml_nbytes(b_tensor)); - ggml_backend_tensor_get(model.eigenvector, host_old_eigenvector->data, 0, ggml_nbytes(model.eigenvector)); - - // convergence check - float diff = 0.0; - for (int i = 0; i < n_embd; ++i) { - diff += std::pow((ggml_get_f32_1d(host_new_eigenvector, i) - ggml_get_f32_1d(host_old_eigenvector, i)), 2); - } - - // update eigenvector - ggml_backend_tensor_set(model.eigenvector, host_new_eigenvector->data, 0, ggml_nbytes(model.eigenvector)); - - try { - if (std::sqrt(diff) < tolerance) { - break; - } - } - catch (std::exception & e) { - // catch division by zero I guess - break; - } + ggml_backend_tensor_get(distance, host_new_eigenvector->data, 0, ggml_nbytes(distance)); + print_debug_tensor(host_new_eigenvector); + + break; // FIXME } ggml_backend_tensor_get(model.eigenvector, output->data, 0, ggml_nbytes(model.eigenvector)); @@ -245,11 +226,12 @@ static void power_iteration(struct ggml_tensor * input, struct ggml_tensor * out ggml_free(model.ctx); ggml_backend_buffer_free(model.buffer); ggml_backend_free(model.backend); + exit(0); } -static void pca( - const std::vector & v_input, - const std::vector & v_output) { +static void run_pca( + const std::vector & v_input, + const std::vector & v_output) { printf("Running PCA...\n"); int n_embd = v_input[0]->ne[0]; // shape of v_input[0]: [n_embd, m] int n_threads = 8; // TODO: change me @@ -265,3 +247,5 @@ static void pca( } printf("Done with PCA.\n"); } + +}