use ggml_format_name

This commit is contained in:
ngxson 2024-06-11 19:14:04 +02:00
parent 54f77e2467
commit 04c91d29ff
2 changed files with 14 additions and 10 deletions

View file

@ -127,7 +127,7 @@ struct callback_data {
// diff_filtered: [n_embd, n_nonzero_rows]
struct ggml_tensor * diff_filtered = ggml_new_tensor_2d(
ctx_ggml, GGML_TYPE_F32, n_embd, n_nonzero_rows);
ggml_set_name(diff_filtered, (std::string("diff_filtered_") + a->name).c_str());
ggml_format_name(diff_filtered, "diff_filtered_%s", a->name);
diff_filtered->data = malloc(ggml_nbytes(diff_filtered));
// copy non-zero rows
@ -245,7 +245,7 @@ struct train_context {
struct ctrl_params {
/* default meta parameters */
int n_completions = INT_MAX;
int n_completions = 64;
int n_pca_batch = 20;
int n_pca_iterations = 1000;
@ -311,7 +311,7 @@ static void print_usage(const char * executable) {
printf(" -cf, --completions-file completions file\n");
printf(" default: %s\n", defaults.completions_file.c_str());
printf(" -nc, --num-completions N number of lines of completions file to use\n");
printf(" default: use all lines\n");
printf(" default: %d\n", defaults.n_completions);
printf(" --batch-pca N batch size used for PCA. Larger batch runs faster, but uses more memory\n");
printf(" default: %d\n", defaults.n_pca_batch);
printf(" --iter-pca N number of iterations used for PCA\n");
@ -550,6 +550,11 @@ int main(int argc, char ** argv) {
return 1;
}
if (cparams.n_pca_iterations % cparams.n_pca_batch != 0) {
fprintf(stderr, "PCA iterations must by multiply of PCA batch size\n");
return 1;
}
// load and prepare entries for training
prepare_entries(cparams);

View file

@ -181,13 +181,14 @@ static struct ggml_cgraph * build_graph_piter(
b_tensor,
ggml_sqrt_inplace(ctx0, ggml_sum_rows(ctx0, ggml_sqr(ctx0, b_tensor)))
);
ggml_set_name(b_tensor, ("b_tensor_norm_" + std::to_string(i)).c_str());
ggml_format_name(b_tensor, "b_tensor_norm_%d", i);
// calculate distance(new eigenvector - old eigenvector)
// we don't use ggml_sub because it may not be implemented on GPU backend
struct ggml_tensor * new_sub_old = ggml_add(ctx0, old_eigen, ggml_scale(ctx0, b_tensor, -1));
distance = ggml_sqrt_inplace(ctx0,
ggml_sum_rows(ctx0, ggml_sqr_inplace(ctx0, new_sub_old)));
ggml_set_name(distance, ("distance_" + std::to_string(i)).c_str());
ggml_format_name(distance, "distance_%d", i);
old_eigen = b_tensor;
@ -317,22 +318,20 @@ static void run_pca(
struct pca_params & params,
const std::vector<struct ggml_tensor *> & v_input, // shape of v_input[0]: [n_samples, n_embd]
const std::vector<struct ggml_tensor *> & v_output) {
printf("Running PCA...\n");
printf("%s: Running PCA...\n", __func__);
for (size_t il = 0; il < v_input.size(); ++il) {
// prepare output vector
struct ggml_tensor * ctrl_out = v_output[il];
auto name = std::string("direction.") + std::to_string(il + 1);
ggml_set_name(ctrl_out, name.c_str());
ggml_format_name(ctrl_out, "direction.%ld", il+1);
// run power_iteration
params.i_layer = il;
params.n_layers = v_input.size();
power_iteration(params, v_input[il], ctrl_out);
printf("DONE layer %ld / %ld\n", il+1, v_input.size());
printf("%s: Done layer %ld / %ld\n", __func__, il+1, v_input.size());
//print_debug_tensor(ctrl_out);
}
printf("Done with PCA.\n");
}
}