metal : add comments

2023-06-04 18:10:28 +03:00 · 2023-06-04 18:10:28 +03:00 · b252acbcb6
commit b252acbcb6
parent d8a7486d17
4 changed files with 19 additions and 7 deletions
--- a/examples/metal/metal.cpp
+++ b/examples/metal/metal.cpp
@ -1,3 +1,18 @@
+// Evaluate a statically export ggml computation graph with Metal
+//
+// - First, export a LLaMA graph:
+//
+//  $ ./bin/main -m ../models/7B/ggml-model-q4_0.bin --export
+//
+// - Run this tool to evaluate the exported graph:
+//
+//  $ ./bin/metal llama.ggml
+//
+// The purpose of this tool is mostly for debugging and demonstration purposes.
+// The main limitation of exporting computation graphs is that their sizes are static which often
+// can be a problem for real-world applications.
+//
+
 #include "ggml.h"
 #include "ggml-metal.h"

--- a/ggml.c
+++ b/ggml.c
@ -14869,7 +14869,6 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
    // read file into data
    {
        FILE * fin = fopen(fname, "rb");
-
        if (!fin) {
            fprintf(stderr, "%s: failed to open %s\n", __func__, fname);
            return result;
--- a/llama.cpp
+++ b/llama.cpp
@ -2990,10 +2990,6 @@ int llama_eval(
 }

 int llama_eval_export(struct llama_context * ctx, const char * fname) {
-    // these values determine the maximum inference sizes of the exported computation graph
-    // TODO: need to increase buffers to support the full context
-    //const int n_ctx   = ctx->model.hparams.n_ctx;
-    //const int n_batch = 512;
    const int n_batch = 1;
    const int n_ctx   = 512 - n_batch;

--- a/llama.h
+++ b/llama.h
@ -173,8 +173,10 @@ extern "C" {
                             int   n_past,
                             int   n_threads);

-    // Export a computation graph for model inference
-    // TODO: very likely to change
+    // Export a static computation graph for context of 511 and batch size of 1
+    // NOTE: since this functionality is mostly for debugging and demonstration purposes, we hardcode these
+    //       parameters here to keep things simple
+    // IMPORTANT: do not use for anything else other than debugging and testing!
    LLAMA_API int llama_eval_export(struct llama_context * ctx, const char * fname);

    // Convert the provided text into tokens.