clean up PCA ggml implementation
This commit is contained in:
parent
a710df749c
commit
c241b500a1
2 changed files with 93 additions and 109 deletions
|
@ -94,7 +94,7 @@ struct callback_data {
|
|||
auto diff_filtered = filter_nonzero_rows(v_pos[il]);
|
||||
v_diff_filtered.push_back(diff_filtered);
|
||||
}
|
||||
return v_pos; // for convinient, we return the result std::vector
|
||||
return v_diff_filtered; // for convinient, we return the result std::vector
|
||||
}
|
||||
|
||||
// delete zero rows from a given 2D tensor
|
||||
|
@ -624,7 +624,7 @@ int main(int argc, char ** argv) {
|
|||
ctx_train.build_v_diff();
|
||||
|
||||
// run PCA
|
||||
pca(ctx_train.v_diff, ctx_train.v_final);
|
||||
PCA::run_pca(ctx_train.v_diff, ctx_train.v_final);
|
||||
|
||||
// write output vectors to gguf
|
||||
export_gguf(ctx_train.v_final, cparams.outfile, model_hint);
|
||||
|
|
|
@ -29,12 +29,11 @@ static void print_debug_tensor(struct ggml_tensor * t) {
|
|||
printf(" ... ]\n");
|
||||
}
|
||||
|
||||
|
||||
namespace PCA {
|
||||
|
||||
struct pca_model {
|
||||
struct ggml_tensor * v_diff_original;
|
||||
struct ggml_tensor * square;
|
||||
struct ggml_tensor * square_transpose;
|
||||
struct ggml_tensor * eigenvector;
|
||||
|
||||
ggml_backend_t backend = NULL;
|
||||
|
@ -42,7 +41,7 @@ struct pca_model {
|
|||
struct ggml_context * ctx;
|
||||
};
|
||||
|
||||
void load_pca_model(pca_model & model, struct ggml_tensor * v_diff_original) {
|
||||
void load_pca_model(pca_model & model, struct ggml_tensor * input) {
|
||||
#ifdef GGML_USE_CUDA
|
||||
fprintf(stderr, "%s: using CUDA backend\n", __func__);
|
||||
model.backend = ggml_backend_cuda_init(0); // init device 0
|
||||
|
@ -64,35 +63,35 @@ void load_pca_model(pca_model & model, struct ggml_tensor * v_diff_original) {
|
|||
if (!model.backend) {
|
||||
model.backend = ggml_backend_cpu_init();
|
||||
}
|
||||
|
||||
//printf("v_diff_original[0][%d]: %f\n", DEBUG_POS, ggml_get_f32_nd(v_diff_original, 0, DEBUG_POS, 0, 0));
|
||||
|
||||
const int num_tensors = 4;
|
||||
|
||||
struct ggml_init_params params {
|
||||
/*.mem_size =*/ ggml_tensor_overhead() * num_tensors,
|
||||
/*.mem_buffer =*/ NULL,
|
||||
/*.no_alloc =*/ true,
|
||||
/*.mem_size =*/ ggml_tensor_overhead() * num_tensors,
|
||||
/*.mem_buffer =*/ NULL,
|
||||
/*.no_alloc =*/ true,
|
||||
};
|
||||
|
||||
model.ctx = ggml_init(params);
|
||||
|
||||
model.v_diff_original = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, v_diff_original->ne[0], v_diff_original->ne[1]);
|
||||
model.square = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, v_diff_original->ne[1], v_diff_original->ne[1]);
|
||||
model.square_transpose = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, v_diff_original->ne[1], v_diff_original->ne[1]);
|
||||
model.eigenvector = ggml_new_tensor_1d(model.ctx, GGML_TYPE_F32, v_diff_original->ne[1]);
|
||||
auto n_embd = input->ne[0];
|
||||
auto n_samples = input->ne[1];
|
||||
|
||||
model.v_diff_original = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, n_embd, n_samples);
|
||||
model.square = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, n_embd, n_embd);
|
||||
model.eigenvector = ggml_new_tensor_1d(model.ctx, GGML_TYPE_F32, n_embd);
|
||||
|
||||
ggml_set_name(model.v_diff_original, "v_diff_original");
|
||||
ggml_set_name(model.square, "square");
|
||||
ggml_set_name(model.eigenvector, "eigenvector");
|
||||
|
||||
model.buffer = ggml_backend_alloc_ctx_tensors(model.ctx, model.backend);
|
||||
|
||||
ggml_backend_tensor_set(model.v_diff_original, v_diff_original->data, 0, ggml_nbytes(v_diff_original));
|
||||
|
||||
// no need to load anything into square or square_transpose yet
|
||||
ggml_backend_tensor_set(model.v_diff_original, input->data, 0, ggml_nbytes(input));
|
||||
|
||||
// initialize model.eigenvector to random vector
|
||||
std::vector<float> random_vec;
|
||||
std::default_random_engine generator(static_cast<unsigned int>(std::time(0)));
|
||||
std::uniform_real_distribution<float> distribution(0.0, 1.0);
|
||||
for (int i = 0; i < v_diff_original->ne[1]; ++i) {
|
||||
for (int i = 0; i < ggml_nelements(model.eigenvector); ++i) {
|
||||
random_vec.push_back(distribution(generator));
|
||||
}
|
||||
|
||||
|
@ -100,8 +99,12 @@ void load_pca_model(pca_model & model, struct ggml_tensor * v_diff_original) {
|
|||
ggml_backend_tensor_set(model.eigenvector, random_vec.data(), 0, ggml_nbytes(model.eigenvector));
|
||||
}
|
||||
|
||||
struct ggml_cgraph * square_diff_graph(const pca_model & model) {
|
||||
static size_t buf_size = ggml_tensor_overhead() * GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead();
|
||||
static struct ggml_cgraph * build_graph_piter(
|
||||
const pca_model & model,
|
||||
bool calc_square = false,
|
||||
int nb_iterations = 1) {
|
||||
GGML_ASSERT(nb_iterations > 0);
|
||||
static size_t buf_size = ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead();
|
||||
static std::vector<uint8_t> buf(buf_size);
|
||||
|
||||
struct ggml_init_params params0 = {
|
||||
|
@ -109,21 +112,55 @@ struct ggml_cgraph * square_diff_graph(const pca_model & model) {
|
|||
/*.mem_buffer =*/ buf.data(),
|
||||
/*.no_alloc =*/ true, // the tensors will be allocated later by ggml_allocr_alloc_graph()
|
||||
};
|
||||
// create a temporally context to build the graph
|
||||
struct ggml_context * ctx0 = ggml_init(params0);
|
||||
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
|
||||
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
|
||||
|
||||
struct ggml_tensor * square = ggml_mul_mat(ctx0, model.v_diff_original, model.v_diff_original);
|
||||
//struct ggml_tensor * square_transpose = ggml_transpose(ctx0, square);
|
||||
// turn v_diff_original into square matrix if needed
|
||||
if (calc_square) {
|
||||
//struct ggml_tensor * v_diff_transposed = ggml_transpose(ctx0, model.v_diff_original);
|
||||
struct ggml_tensor * square = ggml_mul_mat(ctx0, model.v_diff_original, model.v_diff_original);
|
||||
ggml_set_name(square, "square");
|
||||
//model.square = ggml_scale_inplace(ctx0, model.square, 0.0);
|
||||
}
|
||||
|
||||
ggml_build_forward_expand(gf, square);
|
||||
struct ggml_tensor * b_tensor;
|
||||
|
||||
for (int i = 0; i < nb_iterations; ++i) {
|
||||
// b_tensor = square * eigenvector^T
|
||||
b_tensor = ggml_mul_mat(ctx0, model.square, model.eigenvector);
|
||||
ggml_set_name(b_tensor, "b_tensor");
|
||||
|
||||
// normalize
|
||||
b_tensor = ggml_div_inplace(ctx0,
|
||||
b_tensor,
|
||||
ggml_sqrt_inplace(ctx0, ggml_sum_rows(ctx0, ggml_sqr(ctx0, b_tensor)))
|
||||
);
|
||||
}
|
||||
|
||||
// calculate distance
|
||||
struct ggml_tensor * distance;
|
||||
{
|
||||
distance = ggml_sub(ctx0, model.eigenvector, b_tensor);
|
||||
ggml_set_name(distance, "distance");
|
||||
distance = ggml_sqrt_inplace(ctx0,
|
||||
ggml_sum_rows(ctx0, ggml_sqr_inplace(ctx0, distance)));
|
||||
}
|
||||
|
||||
// build operations nodes
|
||||
ggml_build_forward_expand(gf, distance);
|
||||
|
||||
// delete the temporally context used to build the graph
|
||||
ggml_free(ctx0);
|
||||
return gf;
|
||||
}
|
||||
|
||||
struct ggml_tensor * compute_square(const pca_model & model, ggml_gallocr_t allocr, int n_threads) {
|
||||
struct ggml_cgraph * gf = square_diff_graph(model);
|
||||
|
||||
struct ggml_tensor * compute_piter(
|
||||
const pca_model & model,
|
||||
struct ggml_cgraph * gf,
|
||||
ggml_gallocr_t allocr,
|
||||
int n_threads) {
|
||||
// allocate tensors
|
||||
ggml_gallocr_alloc_graph(allocr, gf);
|
||||
|
||||
if (ggml_backend_is_cpu(model.backend)) {
|
||||
|
@ -138,68 +175,26 @@ struct ggml_tensor * compute_square(const pca_model & model, ggml_gallocr_t allo
|
|||
|
||||
ggml_backend_graph_compute(model.backend, gf);
|
||||
|
||||
// in this case, the output tensor is the last one in the graph
|
||||
return gf->nodes[gf->n_nodes - 1];
|
||||
}
|
||||
|
||||
struct ggml_cgraph * power_iteration_graph(const pca_model & model, float tolerance) {
|
||||
static size_t buf_size = ggml_tensor_overhead() * GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead();
|
||||
static std::vector<uint8_t> buf(buf_size);
|
||||
|
||||
struct ggml_init_params params0 = {
|
||||
/*.mem_size =*/ buf_size,
|
||||
/*.mem_buffer =*/ buf.data(),
|
||||
/*.no_alloc =*/ true, // the tensors will be allocated later by ggml_allocr_alloc_graph()
|
||||
};
|
||||
struct ggml_context * ctx0 = ggml_init(params0);
|
||||
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
|
||||
|
||||
struct ggml_tensor * b_tensor = ggml_mul_mat(ctx0, model.square, model.eigenvector);
|
||||
// TODO difference between ggml_norm and ggml_norm_inplace?
|
||||
// also is this the right way to do multi-step graphs?
|
||||
b_tensor = ggml_norm_inplace(ctx0, b_tensor, tolerance);
|
||||
|
||||
ggml_build_forward_expand(gf, b_tensor);
|
||||
|
||||
ggml_free(ctx0);
|
||||
return gf;
|
||||
}
|
||||
|
||||
struct ggml_tensor * compute_piter(const pca_model & model, ggml_gallocr_t allocr, int n_threads, float tolerance) {
|
||||
struct ggml_cgraph * gf = power_iteration_graph(model, tolerance);
|
||||
|
||||
ggml_gallocr_alloc_graph(allocr, gf);
|
||||
|
||||
if (ggml_backend_is_cpu(model.backend)) {
|
||||
ggml_backend_cpu_set_n_threads(model.backend, n_threads);
|
||||
}
|
||||
|
||||
#ifdef GGML_USE_METAL
|
||||
if (ggml_backend_is_metal(model.backend)) {
|
||||
ggml_backend_metal_set_n_cb(model.backend, n_threads);
|
||||
}
|
||||
#endif
|
||||
|
||||
ggml_backend_graph_compute(model.backend, gf);
|
||||
|
||||
return gf->nodes[gf->n_nodes - 1];
|
||||
}
|
||||
|
||||
static void power_iteration(struct ggml_tensor * input, struct ggml_tensor * output, int n_threads, int maxIterations = 1000, float tolerance = 1e-7) {
|
||||
static void power_iteration(
|
||||
struct ggml_tensor * input,
|
||||
struct ggml_tensor * output,
|
||||
int n_threads,
|
||||
int maxIterations = 1000,
|
||||
float tolerance = 1e-7) {
|
||||
printf("in power iteration\n");
|
||||
int n_embd = input->ne[0];// shape of input: [n_embd, m]
|
||||
int n_embd = input->ne[0]; // shape of input: [n_embd, m]
|
||||
|
||||
pca_model model;
|
||||
load_pca_model(model, input);
|
||||
|
||||
ggml_gallocr_t allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(model.backend));
|
||||
|
||||
struct ggml_tensor * square = compute_square(model, allocr, n_threads);
|
||||
ggml_backend_tensor_set(model.square, square->data, 0, ggml_nbytes(model.square));
|
||||
|
||||
ggml_gallocr_free(allocr);
|
||||
ggml_gallocr_t allocr = NULL;
|
||||
|
||||
struct ggml_init_params host_params = {
|
||||
/*.mem_size =*/ (n_embd * sizeof(float) + ggml_tensor_overhead()) * 2u,
|
||||
/*.mem_size =*/ (n_embd * sizeof(float) + ggml_tensor_overhead()) * 4u,
|
||||
/*.mem_buffer =*/ NULL,
|
||||
/*.no_alloc =*/ false,
|
||||
};
|
||||
|
@ -209,33 +204,19 @@ static void power_iteration(struct ggml_tensor * input, struct ggml_tensor * out
|
|||
struct ggml_tensor * host_new_eigenvector = ggml_new_tensor_1d(host_ctx, GGML_TYPE_F32, n_embd);
|
||||
|
||||
for (int iter = 0; iter < maxIterations; ++iter) {
|
||||
|
||||
// TODO do I need to reset it like this every time?
|
||||
if (allocr) {
|
||||
ggml_gallocr_free(allocr);
|
||||
}
|
||||
allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(model.backend));
|
||||
struct ggml_cgraph * gf = build_graph_piter(model, iter == 0);
|
||||
printf("kkk\n");
|
||||
ggml_graph_dump_dot(gf, nullptr, "/tmp/_cgraph.dot");
|
||||
struct ggml_tensor * distance = compute_piter(model, gf, allocr, n_threads);
|
||||
|
||||
struct ggml_tensor * b_tensor = compute_piter(model, allocr, n_threads, tolerance);
|
||||
|
||||
ggml_backend_tensor_get(b_tensor, host_new_eigenvector->data, 0, ggml_nbytes(b_tensor));
|
||||
ggml_backend_tensor_get(model.eigenvector, host_old_eigenvector->data, 0, ggml_nbytes(model.eigenvector));
|
||||
|
||||
// convergence check
|
||||
float diff = 0.0;
|
||||
for (int i = 0; i < n_embd; ++i) {
|
||||
diff += std::pow((ggml_get_f32_1d(host_new_eigenvector, i) - ggml_get_f32_1d(host_old_eigenvector, i)), 2);
|
||||
}
|
||||
|
||||
// update eigenvector
|
||||
ggml_backend_tensor_set(model.eigenvector, host_new_eigenvector->data, 0, ggml_nbytes(model.eigenvector));
|
||||
|
||||
try {
|
||||
if (std::sqrt(diff) < tolerance) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
catch (std::exception & e) {
|
||||
// catch division by zero I guess
|
||||
break;
|
||||
}
|
||||
ggml_backend_tensor_get(distance, host_new_eigenvector->data, 0, ggml_nbytes(distance));
|
||||
print_debug_tensor(host_new_eigenvector);
|
||||
|
||||
break; // FIXME
|
||||
}
|
||||
|
||||
ggml_backend_tensor_get(model.eigenvector, output->data, 0, ggml_nbytes(model.eigenvector));
|
||||
|
@ -245,11 +226,12 @@ static void power_iteration(struct ggml_tensor * input, struct ggml_tensor * out
|
|||
ggml_free(model.ctx);
|
||||
ggml_backend_buffer_free(model.buffer);
|
||||
ggml_backend_free(model.backend);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
static void pca(
|
||||
const std::vector<struct ggml_tensor *> & v_input,
|
||||
const std::vector<struct ggml_tensor *> & v_output) {
|
||||
static void run_pca(
|
||||
const std::vector<struct ggml_tensor *> & v_input,
|
||||
const std::vector<struct ggml_tensor *> & v_output) {
|
||||
printf("Running PCA...\n");
|
||||
int n_embd = v_input[0]->ne[0]; // shape of v_input[0]: [n_embd, m]
|
||||
int n_threads = 8; // TODO: change me
|
||||
|
@ -265,3 +247,5 @@ static void pca(
|
|||
}
|
||||
printf("Done with PCA.\n");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue