metal : add comments

2023-06-04 18:10:28 +03:00 · 2023-06-04 18:10:28 +03:00 · b252acbcb6
commit b252acbcb6
parent d8a7486d17
4 changed files with 19 additions and 7 deletions
--- a/examples/metal/metal.cpp
+++ b/examples/metal/metal.cpp
@ -1,3 +1,18 @@
 // Evaluate a statically export ggml computation graph with Metal
 //
 // - First, export a LLaMA graph:
 //
 //  $ ./bin/main -m ../models/7B/ggml-model-q4_0.bin --export
 //
 // - Run this tool to evaluate the exported graph:
 //
 //  $ ./bin/metal llama.ggml
 //
 // The purpose of this tool is mostly for debugging and demonstration purposes.
 // The main limitation of exporting computation graphs is that their sizes are static which often
 // can be a problem for real-world applications.
 //
 #include "ggml.h"
 #include "ggml-metal.h"
--- a/ggml.c
+++ b/ggml.c
@ -14869,7 +14869,6 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
    // read file into data
    {
        FILE * fin = fopen(fname, "rb");
        if (!fin) {
            fprintf(stderr, "%s: failed to open %s\n", __func__, fname);
            return result;
--- a/llama.cpp
+++ b/llama.cpp
@ -2990,10 +2990,6 @@ int llama_eval(
 }
 int llama_eval_export(struct llama_context * ctx, const char * fname) {
    // these values determine the maximum inference sizes of the exported computation graph
    // TODO: need to increase buffers to support the full context
    //const int n_ctx   = ctx->model.hparams.n_ctx;
    //const int n_batch = 512;
    const int n_batch = 1;
    const int n_ctx   = 512 - n_batch;
--- a/llama.h
+++ b/llama.h
@ -173,8 +173,10 @@ extern "C" {
                             int   n_past,
                             int   n_threads);
-    // Export a computation graph for model inference
+    // Export a static computation graph for context of 511 and batch size of 1
-    // TODO: very likely to change
+    // NOTE: since this functionality is mostly for debugging and demonstration purposes, we hardcode these
    //       parameters here to keep things simple
    // IMPORTANT: do not use for anything else other than debugging and testing!
    LLAMA_API int llama_eval_export(struct llama_context * ctx, const char * fname);
    // Convert the provided text into tokens.