From 4571bcc17fec0a75b6a3833b083462c87b497600 Mon Sep 17 00:00:00 2001 From: Andrew Godfrey Date: Wed, 15 Nov 2023 08:05:40 -0800 Subject: [PATCH] Use ggml_set_zero instead of adding a new function --- common/train.cpp | 45 ---------------------------------- common/train.h | 1 - examples/finetune/finetune.cpp | 24 +++++++++--------- 3 files changed, 12 insertions(+), 58 deletions(-) diff --git a/common/train.cpp b/common/train.cpp index 62aaa2638..bc15b7a03 100644 --- a/common/train.cpp +++ b/common/train.cpp @@ -68,51 +68,6 @@ void free_random_uniform_distribution(struct random_uniform_distribution * rnd) free(rnd); } -struct ggml_tensor * zero_tensor(struct ggml_tensor * tensor) { - float scale = 1.0f; // xavier - switch (tensor->n_dims) { - case 1: - for (int i0 = 0; i0 < tensor->ne[0]; i0++) { - float * dst = (float *) ((char *) tensor->data + i0*tensor->nb[0]); - *dst = 0.0f; - } - break; - case 2: - for (int i1 = 0; i1 < tensor->ne[1]; i1++) { - for (int i0 = 0; i0 < tensor->ne[0]; i0++) { - float * dst = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]); - *dst = 0.0f; - } - } - break; - case 3: - for (int i2 = 0; i2 < tensor->ne[2]; i2++) { - for (int i1 = 0; i1 < tensor->ne[1]; i1++) { - for (int i0 = 0; i0 < tensor->ne[0]; i0++) { - float * dst = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2]); - *dst = 0.0f; - } - } - } - break; - case 4: - for (int i3 = 0; i3 < tensor->ne[3]; i3++) { - for (int i2 = 0; i2 < tensor->ne[2]; i2++) { - for (int i1 = 0; i1 < tensor->ne[1]; i1++) { - for (int i0 = 0; i0 < tensor->ne[0]; i0++) { - float * dst = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2] + i3*tensor->nb[3]); - *dst = 0.0f; - } - } - } - } - break; - default: - die("Unsupported tensor->n_dims"); - }; - return tensor; -} - struct ggml_tensor * randomize_tensor_normal(struct ggml_tensor * tensor, struct random_normal_distribution * rnd) { float scale = 1.0f; // xavier switch (tensor->n_dims) { diff --git a/common/train.h b/common/train.h index e1758ddf2..d86c93cc4 100644 --- a/common/train.h +++ b/common/train.h @@ -127,7 +127,6 @@ struct random_uniform_distribution * init_random_uniform_distribution(int seed, void free_random_normal_distribution (struct random_normal_distribution * rnd); void free_random_uniform_distribution(struct random_uniform_distribution * rnd); -struct ggml_tensor * zero_tensor (struct ggml_tensor * tensor); struct ggml_tensor * randomize_tensor_normal (struct ggml_tensor * tensor, struct random_normal_distribution * rnd); struct ggml_tensor * randomize_tensor_uniform(struct ggml_tensor * tensor, struct random_uniform_distribution * rnd); diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index 04a7a986b..d6e75fc37 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -548,35 +548,35 @@ static void randomize_lora(struct my_llama_lora * lora, int seed, float mean, fl struct random_normal_distribution * rnd = init_random_normal_distribution(seed, mean, std, min, max); randomize_tensor_normal(lora->tok_embeddings_a, rnd); - zero_tensor(lora->tok_embeddings_b); + ggml_set_zero(lora->tok_embeddings_b); randomize_tensor_normal(lora->norm_a, rnd); - zero_tensor(lora->norm_b); + ggml_set_zero(lora->norm_b); randomize_tensor_normal(lora->output_a, rnd); - zero_tensor(lora->output_b); + ggml_set_zero(lora->output_b); for (uint32_t i = 0; i < n_layer; ++i) { auto & layer = lora->layers[i]; randomize_tensor_normal(layer.attention_norm_a, rnd); - zero_tensor(layer.attention_norm_b); + ggml_set_zero(layer.attention_norm_b); randomize_tensor_normal(layer.wq_a, rnd); - zero_tensor(layer.wq_b); + ggml_set_zero(layer.wq_b); randomize_tensor_normal(layer.wk_a, rnd); - zero_tensor(layer.wk_b); + ggml_set_zero(layer.wk_b); randomize_tensor_normal(layer.wv_a, rnd); - zero_tensor(layer.wv_b); + ggml_set_zero(layer.wv_b); randomize_tensor_normal(layer.wo_a, rnd); - zero_tensor(layer.wo_b); + ggml_set_zero(layer.wo_b); randomize_tensor_normal(layer.ffn_norm_a, rnd); - zero_tensor(layer.ffn_norm_b); + ggml_set_zero(layer.ffn_norm_b); randomize_tensor_normal(layer.w1_a, rnd); - zero_tensor(layer.w1_b); + ggml_set_zero(layer.w1_b); randomize_tensor_normal(layer.w2_a, rnd); - zero_tensor(layer.w2_b); + ggml_set_zero(layer.w2_b); randomize_tensor_normal(layer.w3_a, rnd); - zero_tensor(layer.w3_b); + ggml_set_zero(layer.w3_b); } free_random_normal_distribution(rnd);