metal : add comments

This commit is contained in:
Georgi Gerganov 2023-06-04 18:10:28 +03:00
parent d8a7486d17
commit b252acbcb6
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
4 changed files with 19 additions and 7 deletions

View file

@ -1,3 +1,18 @@
// Evaluate a statically export ggml computation graph with Metal
//
// - First, export a LLaMA graph:
//
// $ ./bin/main -m ../models/7B/ggml-model-q4_0.bin --export
//
// - Run this tool to evaluate the exported graph:
//
// $ ./bin/metal llama.ggml
//
// The purpose of this tool is mostly for debugging and demonstration purposes.
// The main limitation of exporting computation graphs is that their sizes are static which often
// can be a problem for real-world applications.
//
#include "ggml.h" #include "ggml.h"
#include "ggml-metal.h" #include "ggml-metal.h"

1
ggml.c
View file

@ -14869,7 +14869,6 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
// read file into data // read file into data
{ {
FILE * fin = fopen(fname, "rb"); FILE * fin = fopen(fname, "rb");
if (!fin) { if (!fin) {
fprintf(stderr, "%s: failed to open %s\n", __func__, fname); fprintf(stderr, "%s: failed to open %s\n", __func__, fname);
return result; return result;

View file

@ -2990,10 +2990,6 @@ int llama_eval(
} }
int llama_eval_export(struct llama_context * ctx, const char * fname) { int llama_eval_export(struct llama_context * ctx, const char * fname) {
// these values determine the maximum inference sizes of the exported computation graph
// TODO: need to increase buffers to support the full context
//const int n_ctx = ctx->model.hparams.n_ctx;
//const int n_batch = 512;
const int n_batch = 1; const int n_batch = 1;
const int n_ctx = 512 - n_batch; const int n_ctx = 512 - n_batch;

View file

@ -173,8 +173,10 @@ extern "C" {
int n_past, int n_past,
int n_threads); int n_threads);
// Export a computation graph for model inference // Export a static computation graph for context of 511 and batch size of 1
// TODO: very likely to change // NOTE: since this functionality is mostly for debugging and demonstration purposes, we hardcode these
// parameters here to keep things simple
// IMPORTANT: do not use for anything else other than debugging and testing!
LLAMA_API int llama_eval_export(struct llama_context * ctx, const char * fname); LLAMA_API int llama_eval_export(struct llama_context * ctx, const char * fname);
// Convert the provided text into tokens. // Convert the provided text into tokens.