diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index 115d8fb1b..7c991c8d5 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -52,13 +52,6 @@ int main(int argc, char ** argv) { return 1; } - // needed to initialize f16 tables - { - struct ggml_init_params params = { 0, NULL, false }; - struct ggml_context * ctx = ggml_init(params); - ggml_free(ctx); - } - // parse command line arguments const std::string fname_inp = argv[1]; std::string fname_out; diff --git a/llama.cpp b/llama.cpp index 1f9d37844..7b599c173 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2198,6 +2198,12 @@ int llama_model_quantize( enum llama_ftype ftype, int nthread) { try { + // needed to initialize f16 tables + { + struct ggml_init_params params = { 0, NULL, false }; + struct ggml_context * ctx = ggml_init(params); + ggml_free(ctx); + } llama_model_quantize_internal(fname_inp, fname_out, ftype, nthread); return 0; } catch (const std::string & err) {