diff --git a/examples/metal/metal.cpp b/examples/metal/metal.cpp index fc1db90a1..10b35faf8 100644 --- a/examples/metal/metal.cpp +++ b/examples/metal/metal.cpp @@ -1,3 +1,18 @@ +// Evaluate a statically export ggml computation graph with Metal +// +// - First, export a LLaMA graph: +// +// $ ./bin/main -m ../models/7B/ggml-model-q4_0.bin --export +// +// - Run this tool to evaluate the exported graph: +// +// $ ./bin/metal llama.ggml +// +// The purpose of this tool is mostly for debugging and demonstration purposes. +// The main limitation of exporting computation graphs is that their sizes are static which often +// can be a problem for real-world applications. +// + #include "ggml.h" #include "ggml-metal.h" diff --git a/ggml.c b/ggml.c index b5e6997dd..27a9de2be 100644 --- a/ggml.c +++ b/ggml.c @@ -14869,7 +14869,6 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** // read file into data { FILE * fin = fopen(fname, "rb"); - if (!fin) { fprintf(stderr, "%s: failed to open %s\n", __func__, fname); return result; diff --git a/llama.cpp b/llama.cpp index 4b22b215a..471b996aa 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2990,10 +2990,6 @@ int llama_eval( } int llama_eval_export(struct llama_context * ctx, const char * fname) { - // these values determine the maximum inference sizes of the exported computation graph - // TODO: need to increase buffers to support the full context - //const int n_ctx = ctx->model.hparams.n_ctx; - //const int n_batch = 512; const int n_batch = 1; const int n_ctx = 512 - n_batch; diff --git a/llama.h b/llama.h index a650ddf45..87fa97367 100644 --- a/llama.h +++ b/llama.h @@ -173,8 +173,10 @@ extern "C" { int n_past, int n_threads); - // Export a computation graph for model inference - // TODO: very likely to change + // Export a static computation graph for context of 511 and batch size of 1 + // NOTE: since this functionality is mostly for debugging and demonstration purposes, we hardcode these + // parameters here to keep things simple + // IMPORTANT: do not use for anything else other than debugging and testing! LLAMA_API int llama_eval_export(struct llama_context * ctx, const char * fname); // Convert the provided text into tokens.