in train function replace add_inplace by regular add
because using add_inplace seems to result in different gradients
This commit is contained in:
parent
2bf422eafd
commit
fc826c8ea8
1 changed files with 4 additions and 2 deletions
|
@ -1264,7 +1264,8 @@ struct ggml_tensor * forward_batch_wo_cache_flash_attn(
|
||||||
assert_shape_2d(cur, n_embd, N*n_batch);
|
assert_shape_2d(cur, n_embd, N*n_batch);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * inpFF = ggml_add_inplace(ctx0, cur, inpSA);
|
// struct ggml_tensor * inpFF = ggml_add_inplace(ctx0, cur, inpSA);
|
||||||
|
struct ggml_tensor * inpFF = ggml_add(ctx0, cur, inpSA);
|
||||||
assert_shape_2d(inpFF, n_embd, N*n_batch);
|
assert_shape_2d(inpFF, n_embd, N*n_batch);
|
||||||
|
|
||||||
// feed-forward network
|
// feed-forward network
|
||||||
|
@ -1304,7 +1305,8 @@ struct ggml_tensor * forward_batch_wo_cache_flash_attn(
|
||||||
assert_shape_2d(cur, n_embd, N*n_batch);
|
assert_shape_2d(cur, n_embd, N*n_batch);
|
||||||
}
|
}
|
||||||
|
|
||||||
cur = ggml_add_inplace(ctx0, cur, inpFF);
|
// cur = ggml_add_inplace(ctx0, cur, inpFF);
|
||||||
|
cur = ggml_add(ctx0, cur, inpFF);
|
||||||
assert_shape_2d(cur, n_embd, N*n_batch);
|
assert_shape_2d(cur, n_embd, N*n_batch);
|
||||||
|
|
||||||
// input for next layer
|
// input for next layer
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue