change default AdamW weight decay parameter used in training to 0.1 as used in nanoGPT

This commit is contained in:
xaedes 2023-06-29 21:33:39 +02:00
parent a80f184e6d
commit f175ead6ef
No known key found for this signature in database
GPG key ID: 30030EDD817EA2B1

View file

@ -3416,7 +3416,7 @@ struct train_params get_default_train_params() {
params.lbfgs_n_iter = 16; params.lbfgs_n_iter = 16;
params.adam_n_iter = 16; params.adam_n_iter = 16;
params.adam_alpha = 1e-3f; params.adam_alpha = 1e-3f;
params.adam_decay = 1e-3f; params.adam_decay = 1e-1f;
params.adam_beta1 = 0.9f; params.adam_beta1 = 0.9f;
params.adam_beta2 = 0.999f; params.adam_beta2 = 0.999f;
params.adam_gclip = 1.0f; params.adam_gclip = 1.0f;