Add llama_init_ggml c api.

2023-05-20 11:42:28 +08:00 · 2023-05-20 11:42:28 +08:00 · e78a971859
commit e78a971859
parent 484f6e9438
3 changed files with 16 additions and 6 deletions
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@ -52,6 +52,13 @@ int main(int argc, char ** argv) {
        return 1;
    }

+    // needed to initialize f16 tables
+    {
+        struct ggml_init_params params = { 0, NULL, false };
+        struct ggml_context * ctx = ggml_init(params);
+        ggml_free(ctx);
+    }
+
    // parse command line arguments
    const std::string fname_inp = argv[1];
    std::string fname_out;
--- a/llama.cpp
+++ b/llama.cpp
@ -2188,6 +2188,11 @@ struct llama_context * llama_init_from_file(
    return ctx;
 }

+void llama_init_ggml(struct ggml_init_params params) {
+    struct ggml_context * ctx = ggml_init(params);
+    ggml_free(ctx);
+}
+
 void llama_free(struct llama_context * ctx) {
    delete ctx;
 }
@ -2198,12 +2203,6 @@ int llama_model_quantize(
  enum llama_ftype   ftype,
        int          nthread) {
    try {
-        // needed to initialize f16 tables
-        {
-            struct ggml_init_params params = { 0, NULL, false };
-            struct ggml_context * ctx = ggml_init(params);
-            ggml_free(ctx);
-        }
        llama_model_quantize_internal(fname_inp, fname_out, ftype, nthread);
        return 0;
    } catch (const std::string & err) {
--- a/llama.h
+++ b/llama.h
@ -97,6 +97,10 @@ extern "C" {
                             const char * path_model,
            struct llama_context_params   params);

+    // Init the ggml context (it won't return a context ptr because it will free
+    // the ctx after initialing it).
+    LLAMA_API void llama_init_ggml(struct ggml_init_params params);
+
    // Frees all allocated memory
    LLAMA_API void llama_free(struct llama_context * ctx);