From 44b831dc59f8d6e7b3bbfc4ccd9cc2a121684339 Mon Sep 17 00:00:00 2001 From: mqy Date: Mon, 19 Jun 2023 13:54:20 +0800 Subject: [PATCH] tune: extract ggml_mulmat_tune_bench_wrapper --- ggml-tune.c | 45 ++++++++++++++++++++++++++++++++++++++++++++ ggml-tune.h | 6 ++++++ llama.cpp | 54 +++-------------------------------------------------- 3 files changed, 54 insertions(+), 51 deletions(-) diff --git a/ggml-tune.c b/ggml-tune.c index 2e292e98e..36c44e1dc 100644 --- a/ggml-tune.c +++ b/ggml-tune.c @@ -935,3 +935,48 @@ bool ggml_mulmat_tune_bench(struct ggml_mulmat_tune *tune, return true; } + +bool ggml_mulmat_tune_bench_wrapper(struct ggml_mulmat_tune *mulmat_tune, + struct ggml_mulmat_tune_params *params, + bool run_bench) { + printf("\n"); + bool empty_fname = !params->fname || strcmp(params->fname, "") == 0; + + if (!ggml_cpu_has_blas()) { + fprintf(stderr, "[tune] this program is not built with BLAS, abort.\n"); + return 1; + } + + if (run_bench) { + return ggml_mulmat_tune_bench(mulmat_tune, params); + } + + if (!empty_fname) { + FILE *fp = fopen(params->fname, "r"); + if (!fp) { + fprintf(stderr, "[tune] failed to open file %s.\n", params->fname); + return false; + } else { + int rc = ggml_mulmat_tune_read_data(mulmat_tune, fp); + fclose(fp); + + if (rc != 0) { + fprintf(stderr, + "[tune] failed to read data from %s, error code: %d\n", + params->fname, rc); + return false; + } + + fprintf(stderr, "[tune] loaded data from %s\n", params->fname); + + bool ok = ggml_mulmat_tune_validate(mulmat_tune, mulmat_tune->model, + params->model.ftype, + params->n_threads); + if (!ok) { + return false; + } + } + } + + return true; +} diff --git a/ggml-tune.h b/ggml-tune.h index addcd34db..633f92697 100644 --- a/ggml-tune.h +++ b/ggml-tune.h @@ -132,6 +132,12 @@ void ggml_mulmat_tune_estimate_time(const struct ggml_mulmat_tune_shape *shape, bool ggml_mulmat_tune_bench(struct ggml_mulmat_tune *tune, struct ggml_mulmat_tune_params *params); +// This API is intended to be called by llama, etc. +// Three modes: bench and run; bench(save) then exit; load and run +bool ggml_mulmat_tune_bench_wrapper(struct ggml_mulmat_tune *mulmat_tune, + struct ggml_mulmat_tune_params *params, + bool run_bench); + #ifdef __cplusplus } #endif diff --git a/llama.cpp b/llama.cpp index e6bddffd5..a3c3586e3 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2748,8 +2748,6 @@ bool llama_mulmat_tune(struct llama_context *ctx, int n_threads, bool tune, const char *fname) { GGML_ASSERT(ctx->model.n_gpu_layers == 0); - printf("\n"); - const char *model_name = llama_model_type_name(ctx->model.type); llama_hparams *hparams = &ctx->model.hparams; @@ -2820,71 +2818,25 @@ bool llama_mulmat_tune(struct llama_context *ctx, int n_threads, bool tune, /* .m_num =*/8, /* .n_pass =*/1, /* .n_threads =*/n_threads, - /* .prrogress =*/true, + /* .progress =*/true, /* .output_console =*/false, /* .fname =*/fname, }; - bool empty_fname = !fname || strcmp(fname, "") == 0; - ctx->tune = new (struct ggml_mulmat_tune); if (!ctx->tune) { fprintf(stderr, "[tune] failed to allocate memory for tune\n"); return false; } - if (!ggml_cpu_has_blas()) { - fprintf(stderr, "[tune] this program is not built with BLAS, abort.\n"); - return false; - } - - if (tune) { - bool ok = ggml_mulmat_tune_bench(ctx->tune, ¶ms); - if (!ok) { - ggml_mulmat_tune_free(ctx->tune); - return false; - } - if (!empty_fname) { - ggml_mulmat_tune_free(ctx->tune); - return true; - } - } else if (empty_fname) { - return false; - } - - if (!empty_fname) { - FILE *fp = fopen(fname, "r"); - if (!fp) { - fprintf(stderr, "[tune] failed to open file %s.\n", fname); - return false; - } else { - int rc = ggml_mulmat_tune_read_data(ctx->tune, fp); - fclose(fp); - - if (rc != 0) { - fprintf(stderr, - "[tune] failed to read data from %s, error code: %d\n", - fname, rc); - return false; - } - - fprintf(stderr, "[tune] loaded data from %s\n", fname); - - bool ok = ggml_mulmat_tune_validate(ctx->tune, model_name, ggml_ftype, - params.n_threads); - if (!ok) { - return false; - } - } - } - - return true; + return ggml_mulmat_tune_bench_wrapper(ctx->tune, ¶ms, tune); } #endif void llama_free(struct llama_context * ctx) { #ifdef GGML_USE_TUNE if (ctx->tune) { + ggml_mulmat_tune_free(ctx->tune); delete(ctx->tune); } #endif