remove unused training parameters "use_scratch" and "use_unified"

This commit is contained in:
xaedes 2023-08-14 18:31:59 +02:00
parent c954f41ca4
commit 271e4d64b5
No known key found for this signature in database
GPG key ID: 30030EDD817EA2B1

View file

@ -2189,10 +2189,8 @@ struct train_params {
bool samples_start_after_nl;
bool use_adam;
bool use_flash;
bool use_scratch;
bool use_checkpointing;
bool use_alloc;
bool use_unified;
// only adam
int warmup;
@ -2252,10 +2250,8 @@ struct train_params get_default_train_params() {
params.samples_start_after_nl = false;
params.use_adam = true;
params.use_flash = true;
params.use_scratch = true;
params.use_checkpointing = true;
params.use_alloc = true;
params.use_unified = true;
params.opt_past = 0;
params.opt_delta = 1e-5f;
@ -2313,16 +2309,12 @@ void train_print_usage(int /*argc*/, char ** argv, const struct train_params * p
fprintf(stderr, " --samples-after-nl Training samples start after newlines. (default %s)\n", params->samples_start_after_nl ? "on" : "off");
fprintf(stderr, " --use-lbfgs Use LBFGS optimizer instead of default Adam\n");
fprintf(stderr, " --use-adam Use Adam optimizer (default)\n");
fprintf(stderr, " --no-flash Don't use flash attention. Implies no-scratch and no-checkpointing.\n");
fprintf(stderr, " --no-flash Don't use flash attention \n");
fprintf(stderr, " --use-flash Use flash attention (default)\n");
fprintf(stderr, " --no-scratch Don't use scratch buffers. Implies no-checkpointing.\n");
fprintf(stderr, " --use-scratch Use scratch buffers. Implies use-flash. (default)\n");
fprintf(stderr, " --no-checkpointing Don't use gradient checkpointing\n");
fprintf(stderr, " --use-checkpointing Use gradient checkpointing. Implies use-scratch and use-flash. (default)\n");
fprintf(stderr, " --use-checkpointing Use gradient checkpointing (default)\n");
fprintf(stderr, " --no-alloc Don't use allocator\n");
fprintf(stderr, " --use-alloc Use allocator. Implies use-unified. (default)\n");
fprintf(stderr, " --no-unified Don't use unified\n");
fprintf(stderr, " --use-unified Use unified. (default)\n");
fprintf(stderr, " --use-alloc Use allocator (default)\n");
fprintf(stderr, " --warmup N Only for Adam optimizer. Number of warmup steps (default %d)\n", params->warmup);
fprintf(stderr, " --cos-decay-steps N Only for Adam optimizer. Number of cosine decay steps (default %d)\n", params->cos_decay_steps);
fprintf(stderr, " --cos-decay-restart N Only for Adam optimizer. Increase of cosine decay steps after restart (default %f)\n", params->cos_decay_restart);
@ -2480,10 +2472,6 @@ bool train_params_parse(int argc, char ** argv, struct train_params * params) {
params->use_flash = false;
} else if (arg == "--use-flash") {
params->use_flash = true;
} else if (arg == "--no-scratch") {
params->use_scratch = false;
} else if (arg == "--use-scratch") {
params->use_scratch = true;
} else if (arg == "--no-checkpointing") {
params->use_checkpointing = false;
} else if (arg == "--use-checkpointing") {
@ -2492,10 +2480,6 @@ bool train_params_parse(int argc, char ** argv, struct train_params * params) {
params->use_alloc = false;
} else if (arg == "--use-alloc") {
params->use_alloc = true;
} else if (arg == "--no-unified") {
params->use_unified = false;
} else if (arg == "--use-unified") {
params->use_unified = true;
} else if (arg == "--warmup") {
if (++i >= argc) {
invalid_param = true;
@ -2936,7 +2920,7 @@ int main(int argc, char ** argv) {
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
struct ggml_cgraph * gb = ggml_new_graph(ctx0);
struct ggml_cgraph * gb_tmp = (params.use_unified || params.use_alloc)
struct ggml_cgraph * gb_tmp = params.use_alloc
? ggml_new_graph(ctx0)
: NULL;