Use ggml_set_zero instead of adding a new function
This commit is contained in:
parent
c72c1b37de
commit
4571bcc17f
3 changed files with 12 additions and 58 deletions
|
@ -68,51 +68,6 @@ void free_random_uniform_distribution(struct random_uniform_distribution * rnd)
|
||||||
free(rnd);
|
free(rnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * zero_tensor(struct ggml_tensor * tensor) {
|
|
||||||
float scale = 1.0f; // xavier
|
|
||||||
switch (tensor->n_dims) {
|
|
||||||
case 1:
|
|
||||||
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
|
|
||||||
float * dst = (float *) ((char *) tensor->data + i0*tensor->nb[0]);
|
|
||||||
*dst = 0.0f;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
for (int i1 = 0; i1 < tensor->ne[1]; i1++) {
|
|
||||||
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
|
|
||||||
float * dst = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]);
|
|
||||||
*dst = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
for (int i2 = 0; i2 < tensor->ne[2]; i2++) {
|
|
||||||
for (int i1 = 0; i1 < tensor->ne[1]; i1++) {
|
|
||||||
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
|
|
||||||
float * dst = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2]);
|
|
||||||
*dst = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
for (int i3 = 0; i3 < tensor->ne[3]; i3++) {
|
|
||||||
for (int i2 = 0; i2 < tensor->ne[2]; i2++) {
|
|
||||||
for (int i1 = 0; i1 < tensor->ne[1]; i1++) {
|
|
||||||
for (int i0 = 0; i0 < tensor->ne[0]; i0++) {
|
|
||||||
float * dst = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2] + i3*tensor->nb[3]);
|
|
||||||
*dst = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
die("Unsupported tensor->n_dims");
|
|
||||||
};
|
|
||||||
return tensor;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct ggml_tensor * randomize_tensor_normal(struct ggml_tensor * tensor, struct random_normal_distribution * rnd) {
|
struct ggml_tensor * randomize_tensor_normal(struct ggml_tensor * tensor, struct random_normal_distribution * rnd) {
|
||||||
float scale = 1.0f; // xavier
|
float scale = 1.0f; // xavier
|
||||||
switch (tensor->n_dims) {
|
switch (tensor->n_dims) {
|
||||||
|
|
|
@ -127,7 +127,6 @@ struct random_uniform_distribution * init_random_uniform_distribution(int seed,
|
||||||
void free_random_normal_distribution (struct random_normal_distribution * rnd);
|
void free_random_normal_distribution (struct random_normal_distribution * rnd);
|
||||||
void free_random_uniform_distribution(struct random_uniform_distribution * rnd);
|
void free_random_uniform_distribution(struct random_uniform_distribution * rnd);
|
||||||
|
|
||||||
struct ggml_tensor * zero_tensor (struct ggml_tensor * tensor);
|
|
||||||
struct ggml_tensor * randomize_tensor_normal (struct ggml_tensor * tensor, struct random_normal_distribution * rnd);
|
struct ggml_tensor * randomize_tensor_normal (struct ggml_tensor * tensor, struct random_normal_distribution * rnd);
|
||||||
struct ggml_tensor * randomize_tensor_uniform(struct ggml_tensor * tensor, struct random_uniform_distribution * rnd);
|
struct ggml_tensor * randomize_tensor_uniform(struct ggml_tensor * tensor, struct random_uniform_distribution * rnd);
|
||||||
|
|
||||||
|
|
|
@ -548,35 +548,35 @@ static void randomize_lora(struct my_llama_lora * lora, int seed, float mean, fl
|
||||||
struct random_normal_distribution * rnd = init_random_normal_distribution(seed, mean, std, min, max);
|
struct random_normal_distribution * rnd = init_random_normal_distribution(seed, mean, std, min, max);
|
||||||
|
|
||||||
randomize_tensor_normal(lora->tok_embeddings_a, rnd);
|
randomize_tensor_normal(lora->tok_embeddings_a, rnd);
|
||||||
zero_tensor(lora->tok_embeddings_b);
|
ggml_set_zero(lora->tok_embeddings_b);
|
||||||
randomize_tensor_normal(lora->norm_a, rnd);
|
randomize_tensor_normal(lora->norm_a, rnd);
|
||||||
zero_tensor(lora->norm_b);
|
ggml_set_zero(lora->norm_b);
|
||||||
randomize_tensor_normal(lora->output_a, rnd);
|
randomize_tensor_normal(lora->output_a, rnd);
|
||||||
zero_tensor(lora->output_b);
|
ggml_set_zero(lora->output_b);
|
||||||
|
|
||||||
for (uint32_t i = 0; i < n_layer; ++i) {
|
for (uint32_t i = 0; i < n_layer; ++i) {
|
||||||
auto & layer = lora->layers[i];
|
auto & layer = lora->layers[i];
|
||||||
randomize_tensor_normal(layer.attention_norm_a, rnd);
|
randomize_tensor_normal(layer.attention_norm_a, rnd);
|
||||||
zero_tensor(layer.attention_norm_b);
|
ggml_set_zero(layer.attention_norm_b);
|
||||||
|
|
||||||
randomize_tensor_normal(layer.wq_a, rnd);
|
randomize_tensor_normal(layer.wq_a, rnd);
|
||||||
zero_tensor(layer.wq_b);
|
ggml_set_zero(layer.wq_b);
|
||||||
randomize_tensor_normal(layer.wk_a, rnd);
|
randomize_tensor_normal(layer.wk_a, rnd);
|
||||||
zero_tensor(layer.wk_b);
|
ggml_set_zero(layer.wk_b);
|
||||||
randomize_tensor_normal(layer.wv_a, rnd);
|
randomize_tensor_normal(layer.wv_a, rnd);
|
||||||
zero_tensor(layer.wv_b);
|
ggml_set_zero(layer.wv_b);
|
||||||
randomize_tensor_normal(layer.wo_a, rnd);
|
randomize_tensor_normal(layer.wo_a, rnd);
|
||||||
zero_tensor(layer.wo_b);
|
ggml_set_zero(layer.wo_b);
|
||||||
|
|
||||||
randomize_tensor_normal(layer.ffn_norm_a, rnd);
|
randomize_tensor_normal(layer.ffn_norm_a, rnd);
|
||||||
zero_tensor(layer.ffn_norm_b);
|
ggml_set_zero(layer.ffn_norm_b);
|
||||||
|
|
||||||
randomize_tensor_normal(layer.w1_a, rnd);
|
randomize_tensor_normal(layer.w1_a, rnd);
|
||||||
zero_tensor(layer.w1_b);
|
ggml_set_zero(layer.w1_b);
|
||||||
randomize_tensor_normal(layer.w2_a, rnd);
|
randomize_tensor_normal(layer.w2_a, rnd);
|
||||||
zero_tensor(layer.w2_b);
|
ggml_set_zero(layer.w2_b);
|
||||||
randomize_tensor_normal(layer.w3_a, rnd);
|
randomize_tensor_normal(layer.w3_a, rnd);
|
||||||
zero_tensor(layer.w3_b);
|
ggml_set_zero(layer.w3_b);
|
||||||
}
|
}
|
||||||
|
|
||||||
free_random_normal_distribution(rnd);
|
free_random_normal_distribution(rnd);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue