From 484f6e94381c0f88b9162abc9fe9ef7a49e387a9 Mon Sep 17 00:00:00 2001
From: Yaohui Liu <AsakusaRinne@gmail.com>
Date: Sat, 20 May 2023 00:32:08 +0800
Subject: [PATCH] llama: initialize f16 tables in quantize c api.

---
 examples/quantize/quantize.cpp | 7 -------
 llama.cpp                      | 6 ++++++
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
index 115d8fb1b..7c991c8d5 100644
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@@ -52,13 +52,6 @@ int main(int argc, char ** argv) {
         return 1;
     }
 
-    // needed to initialize f16 tables
-    {
-        struct ggml_init_params params = { 0, NULL, false };
-        struct ggml_context * ctx = ggml_init(params);
-        ggml_free(ctx);
-    }
-
     // parse command line arguments
     const std::string fname_inp = argv[1];
     std::string fname_out;
diff --git a/llama.cpp b/llama.cpp
index 1f9d37844..7b599c173 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2198,6 +2198,12 @@ int llama_model_quantize(
   enum llama_ftype   ftype,
         int          nthread) {
     try {
+        // needed to initialize f16 tables
+        {
+            struct ggml_init_params params = { 0, NULL, false };
+            struct ggml_context * ctx = ggml_init(params);
+            ggml_free(ctx);
+        }
         llama_model_quantize_internal(fname_inp, fname_out, ftype, nthread);
         return 0;
     } catch (const std::string & err) {