in train function replace add_inplace by regular add

because using add_inplace seems to result in different gradients
This commit is contained in:
xaedes 2023-08-14 17:49:22 +02:00
parent 2bf422eafd
commit fc826c8ea8
No known key found for this signature in database
GPG key ID: 30030EDD817EA2B1

View file

@ -1264,7 +1264,8 @@ struct ggml_tensor * forward_batch_wo_cache_flash_attn(
assert_shape_2d(cur, n_embd, N*n_batch);
}
struct ggml_tensor * inpFF = ggml_add_inplace(ctx0, cur, inpSA);
// struct ggml_tensor * inpFF = ggml_add_inplace(ctx0, cur, inpSA);
struct ggml_tensor * inpFF = ggml_add(ctx0, cur, inpSA);
assert_shape_2d(inpFF, n_embd, N*n_batch);
// feed-forward network
@ -1304,7 +1305,8 @@ struct ggml_tensor * forward_batch_wo_cache_flash_attn(
assert_shape_2d(cur, n_embd, N*n_batch);
}
cur = ggml_add_inplace(ctx0, cur, inpFF);
// cur = ggml_add_inplace(ctx0, cur, inpFF);
cur = ggml_add(ctx0, cur, inpFF);
assert_shape_2d(cur, n_embd, N*n_batch);
// input for next layer