fix baby llama, test-grad0
This commit is contained in:
parent
24e53a1466
commit
8d7cfb42b7
2 changed files with 12 additions and 10 deletions
|
@ -8,6 +8,8 @@
|
||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static const float rms_norm_eps = 1e-6f;
|
||||||
|
|
||||||
float frand() {
|
float frand() {
|
||||||
return (float)rand()/(float)RAND_MAX;
|
return (float)rand()/(float)RAND_MAX;
|
||||||
}
|
}
|
||||||
|
@ -562,7 +564,7 @@ struct ggml_tensor * forward(
|
||||||
// norm
|
// norm
|
||||||
{
|
{
|
||||||
// cur shape [n_embd,N,1,1]
|
// cur shape [n_embd,N,1,1]
|
||||||
cur = ggml_rms_norm(ctx0, inpL);
|
cur = ggml_rms_norm(ctx0, inpL, rms_norm_eps);
|
||||||
|
|
||||||
// cur = attention_norm*cur
|
// cur = attention_norm*cur
|
||||||
cur = ggml_mul(ctx0,
|
cur = ggml_mul(ctx0,
|
||||||
|
@ -685,7 +687,7 @@ struct ggml_tensor * forward(
|
||||||
// norm
|
// norm
|
||||||
{
|
{
|
||||||
// cur shape [n_embd,N,1,1]
|
// cur shape [n_embd,N,1,1]
|
||||||
cur = ggml_rms_norm(ctx0, inpFF);
|
cur = ggml_rms_norm(ctx0, inpFF, rms_norm_eps);
|
||||||
|
|
||||||
// cur = ffn_norm*cur
|
// cur = ffn_norm*cur
|
||||||
// cur shape [n_embd,N,1,1]
|
// cur shape [n_embd,N,1,1]
|
||||||
|
@ -729,7 +731,7 @@ struct ggml_tensor * forward(
|
||||||
{
|
{
|
||||||
|
|
||||||
// inpL shape [n_embd,N,1,1]
|
// inpL shape [n_embd,N,1,1]
|
||||||
inpL = ggml_rms_norm(ctx0, inpL);
|
inpL = ggml_rms_norm(ctx0, inpL, rms_norm_eps);
|
||||||
|
|
||||||
// inpL = norm*inpL
|
// inpL = norm*inpL
|
||||||
// inpL shape [n_embd,N,1,1]
|
// inpL shape [n_embd,N,1,1]
|
||||||
|
@ -817,7 +819,7 @@ struct ggml_tensor * forward_batch(
|
||||||
// norm
|
// norm
|
||||||
{
|
{
|
||||||
// cur shape [n_embd,N*n_batch,1,1]
|
// cur shape [n_embd,N*n_batch,1,1]
|
||||||
cur = ggml_rms_norm(ctx0, inpL);
|
cur = ggml_rms_norm(ctx0, inpL, rms_norm_eps);
|
||||||
assert_shape_2d(cur, n_embd, N*n_batch);
|
assert_shape_2d(cur, n_embd, N*n_batch);
|
||||||
|
|
||||||
// cur = attention_norm*cur
|
// cur = attention_norm*cur
|
||||||
|
@ -981,7 +983,7 @@ struct ggml_tensor * forward_batch(
|
||||||
// norm
|
// norm
|
||||||
{
|
{
|
||||||
// cur shape [n_embd,N*n_batch,1,1]
|
// cur shape [n_embd,N*n_batch,1,1]
|
||||||
cur = ggml_rms_norm(ctx0, inpFF);
|
cur = ggml_rms_norm(ctx0, inpFF, rms_norm_eps);
|
||||||
assert_shape_2d(cur, n_embd, N*n_batch);
|
assert_shape_2d(cur, n_embd, N*n_batch);
|
||||||
|
|
||||||
// cur = ffn_norm*cur
|
// cur = ffn_norm*cur
|
||||||
|
@ -1034,7 +1036,7 @@ struct ggml_tensor * forward_batch(
|
||||||
{
|
{
|
||||||
|
|
||||||
// inpL shape [n_embd,N*n_batch,1,1]
|
// inpL shape [n_embd,N*n_batch,1,1]
|
||||||
inpL = ggml_rms_norm(ctx0, inpL);
|
inpL = ggml_rms_norm(ctx0, inpL, rms_norm_eps);
|
||||||
assert_shape_2d(inpL, n_embd, N*n_batch);
|
assert_shape_2d(inpL, n_embd, N*n_batch);
|
||||||
|
|
||||||
// inpL = norm*inpL
|
// inpL = norm*inpL
|
||||||
|
@ -1104,7 +1106,7 @@ struct ggml_tensor * forward_lora(
|
||||||
// norm
|
// norm
|
||||||
{
|
{
|
||||||
// cur shape [n_embd,N,1,1]
|
// cur shape [n_embd,N,1,1]
|
||||||
cur = ggml_rms_norm(ctx0, inpL);
|
cur = ggml_rms_norm(ctx0, inpL, rms_norm_eps);
|
||||||
|
|
||||||
// cur = attention_norm*cur
|
// cur = attention_norm*cur
|
||||||
cur = ggml_mul(ctx0,
|
cur = ggml_mul(ctx0,
|
||||||
|
@ -1251,7 +1253,7 @@ struct ggml_tensor * forward_lora(
|
||||||
// norm
|
// norm
|
||||||
{
|
{
|
||||||
// cur shape [n_embd,N,1,1]
|
// cur shape [n_embd,N,1,1]
|
||||||
cur = ggml_rms_norm(ctx0, inpFF);
|
cur = ggml_rms_norm(ctx0, inpFF, rms_norm_eps);
|
||||||
|
|
||||||
// cur = ffn_norm*cur
|
// cur = ffn_norm*cur
|
||||||
// cur shape [n_embd,N,1,1]
|
// cur shape [n_embd,N,1,1]
|
||||||
|
@ -1295,7 +1297,7 @@ struct ggml_tensor * forward_lora(
|
||||||
{
|
{
|
||||||
|
|
||||||
// inpL shape [n_embd,N,1,1]
|
// inpL shape [n_embd,N,1,1]
|
||||||
inpL = ggml_rms_norm(ctx0, inpL);
|
inpL = ggml_rms_norm(ctx0, inpL, rms_norm_eps);
|
||||||
|
|
||||||
// inpL = norm*inpL
|
// inpL = norm*inpL
|
||||||
// inpL shape [n_embd,N,1,1]
|
// inpL shape [n_embd,N,1,1]
|
||||||
|
|
|
@ -850,7 +850,7 @@ int main(int argc, const char ** argv) {
|
||||||
ggml_set_param(ctx0, x[i]);
|
ggml_set_param(ctx0, x[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * f = ggml_sum(ctx0, ggml_rms_norm(ctx0, x[0]));
|
struct ggml_tensor * f = ggml_sum(ctx0, ggml_rms_norm(ctx0, x[0], 1e-6f));
|
||||||
|
|
||||||
check_gradient("rms_norm", ctx0, x, f, ndims, nargs, 1e-4f, 1.0f, INFINITY);
|
check_gradient("rms_norm", ctx0, x, f, ndims, nargs, 1e-4f, 1.0f, INFINITY);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue