From 44b831dc59f8d6e7b3bbfc4ccd9cc2a121684339 Mon Sep 17 00:00:00 2001
From: mqy <meng.qingyou@gmail.com>
Date: Mon, 19 Jun 2023 13:54:20 +0800
Subject: [PATCH] tune: extract ggml_mulmat_tune_bench_wrapper

---
 ggml-tune.c | 45 ++++++++++++++++++++++++++++++++++++++++++++
 ggml-tune.h |  6 ++++++
 llama.cpp   | 54 +++--------------------------------------------------
 3 files changed, 54 insertions(+), 51 deletions(-)

diff --git a/ggml-tune.c b/ggml-tune.c
index 2e292e98e..36c44e1dc 100644
--- a/ggml-tune.c
+++ b/ggml-tune.c
@@ -935,3 +935,48 @@ bool ggml_mulmat_tune_bench(struct ggml_mulmat_tune *tune,
 
     return true;
 }
+
+bool ggml_mulmat_tune_bench_wrapper(struct ggml_mulmat_tune *mulmat_tune,
+                                    struct ggml_mulmat_tune_params *params,
+                                    bool run_bench) {
+    printf("\n");
+    bool empty_fname = !params->fname || strcmp(params->fname, "") == 0;
+
+    if (!ggml_cpu_has_blas()) {
+        fprintf(stderr, "[tune] this program is not built with BLAS, abort.\n");
+        return 1;
+    }
+
+    if (run_bench) {
+        return ggml_mulmat_tune_bench(mulmat_tune, params);
+    }
+
+    if (!empty_fname) {
+        FILE *fp = fopen(params->fname, "r");
+        if (!fp) {
+            fprintf(stderr, "[tune] failed to open file %s.\n", params->fname);
+            return false;
+        } else {
+            int rc = ggml_mulmat_tune_read_data(mulmat_tune, fp);
+            fclose(fp);
+
+            if (rc != 0) {
+                fprintf(stderr,
+                        "[tune] failed to read data from %s, error code: %d\n",
+                        params->fname, rc);
+                return false;
+            }
+
+            fprintf(stderr, "[tune] loaded data from %s\n", params->fname);
+
+            bool ok = ggml_mulmat_tune_validate(mulmat_tune, mulmat_tune->model,
+                                                params->model.ftype,
+                                                params->n_threads);
+            if (!ok) {
+                return false;
+            }
+        }
+    }
+
+    return true;
+}
diff --git a/ggml-tune.h b/ggml-tune.h
index addcd34db..633f92697 100644
--- a/ggml-tune.h
+++ b/ggml-tune.h
@@ -132,6 +132,12 @@ void ggml_mulmat_tune_estimate_time(const struct ggml_mulmat_tune_shape *shape,
 bool ggml_mulmat_tune_bench(struct ggml_mulmat_tune *tune,
                             struct ggml_mulmat_tune_params *params);
 
+// This API is intended to be called by llama, etc.
+// Three modes: bench and run; bench(save) then exit; load and run
+bool ggml_mulmat_tune_bench_wrapper(struct ggml_mulmat_tune *mulmat_tune,
+                                    struct ggml_mulmat_tune_params *params,
+                                    bool run_bench);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/llama.cpp b/llama.cpp
index e6bddffd5..a3c3586e3 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2748,8 +2748,6 @@ bool llama_mulmat_tune(struct llama_context *ctx, int n_threads, bool tune,
                        const char *fname) {
     GGML_ASSERT(ctx->model.n_gpu_layers == 0);
 
-    printf("\n");
-
     const char *model_name = llama_model_type_name(ctx->model.type);
 
     llama_hparams *hparams = &ctx->model.hparams;
@@ -2820,71 +2818,25 @@ bool llama_mulmat_tune(struct llama_context *ctx, int n_threads, bool tune,
         /* .m_num          =*/8,
         /* .n_pass         =*/1,
         /* .n_threads      =*/n_threads,
-        /* .prrogress      =*/true,
+        /* .progress       =*/true,
         /* .output_console =*/false,
         /* .fname          =*/fname,
     };
 
-    bool empty_fname = !fname || strcmp(fname, "") == 0;
-
     ctx->tune = new (struct ggml_mulmat_tune);
     if (!ctx->tune) {
         fprintf(stderr, "[tune] failed to allocate memory for tune\n");
         return false;
     }
 
-    if (!ggml_cpu_has_blas()) {
-        fprintf(stderr, "[tune] this program is not built with BLAS, abort.\n");
-        return false;
-    }
-
-    if (tune) {
-        bool ok = ggml_mulmat_tune_bench(ctx->tune, &params);
-        if (!ok) {
-            ggml_mulmat_tune_free(ctx->tune);
-            return false;
-        }
-        if (!empty_fname) {
-            ggml_mulmat_tune_free(ctx->tune);
-            return true;
-        }
-    } else if (empty_fname) {
-        return false;
-    }
-
-    if (!empty_fname) {
-        FILE *fp = fopen(fname, "r");
-        if (!fp) {
-            fprintf(stderr, "[tune] failed to open file %s.\n", fname);
-            return false;
-        } else {
-            int rc = ggml_mulmat_tune_read_data(ctx->tune, fp);
-            fclose(fp);
-
-            if (rc != 0) {
-                fprintf(stderr,
-                        "[tune] failed to read data from %s, error code: %d\n",
-                        fname, rc);
-                return false;
-            }
-
-            fprintf(stderr, "[tune] loaded data from %s\n", fname);
-
-            bool ok = ggml_mulmat_tune_validate(ctx->tune, model_name, ggml_ftype,
-                                                params.n_threads);
-            if (!ok) {
-                return false;
-            }
-        }
-    }
-
-    return true;
+    return ggml_mulmat_tune_bench_wrapper(ctx->tune, &params, tune);
 }
 #endif
 
 void llama_free(struct llama_context * ctx) {
 #ifdef GGML_USE_TUNE
     if (ctx->tune) {
+        ggml_mulmat_tune_free(ctx->tune);
         delete(ctx->tune);
     }
 #endif