change default AdamW weight decay parameter used in training to 0.1 as used in nanoGPT
This commit is contained in:
parent
a80f184e6d
commit
f175ead6ef
1 changed files with 1 additions and 1 deletions
|
@ -3416,7 +3416,7 @@ struct train_params get_default_train_params() {
|
||||||
params.lbfgs_n_iter = 16;
|
params.lbfgs_n_iter = 16;
|
||||||
params.adam_n_iter = 16;
|
params.adam_n_iter = 16;
|
||||||
params.adam_alpha = 1e-3f;
|
params.adam_alpha = 1e-3f;
|
||||||
params.adam_decay = 1e-3f;
|
params.adam_decay = 1e-1f;
|
||||||
params.adam_beta1 = 0.9f;
|
params.adam_beta1 = 0.9f;
|
||||||
params.adam_beta2 = 0.999f;
|
params.adam_beta2 = 0.999f;
|
||||||
params.adam_gclip = 1.0f;
|
params.adam_gclip = 1.0f;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue