metal : add comments
This commit is contained in:
parent
d8a7486d17
commit
b252acbcb6
4 changed files with 19 additions and 7 deletions
|
@ -1,3 +1,18 @@
|
|||
// Evaluate a statically export ggml computation graph with Metal
|
||||
//
|
||||
// - First, export a LLaMA graph:
|
||||
//
|
||||
// $ ./bin/main -m ../models/7B/ggml-model-q4_0.bin --export
|
||||
//
|
||||
// - Run this tool to evaluate the exported graph:
|
||||
//
|
||||
// $ ./bin/metal llama.ggml
|
||||
//
|
||||
// The purpose of this tool is mostly for debugging and demonstration purposes.
|
||||
// The main limitation of exporting computation graphs is that their sizes are static which often
|
||||
// can be a problem for real-world applications.
|
||||
//
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-metal.h"
|
||||
|
||||
|
|
1
ggml.c
1
ggml.c
|
@ -14869,7 +14869,6 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|||
// read file into data
|
||||
{
|
||||
FILE * fin = fopen(fname, "rb");
|
||||
|
||||
if (!fin) {
|
||||
fprintf(stderr, "%s: failed to open %s\n", __func__, fname);
|
||||
return result;
|
||||
|
|
|
@ -2990,10 +2990,6 @@ int llama_eval(
|
|||
}
|
||||
|
||||
int llama_eval_export(struct llama_context * ctx, const char * fname) {
|
||||
// these values determine the maximum inference sizes of the exported computation graph
|
||||
// TODO: need to increase buffers to support the full context
|
||||
//const int n_ctx = ctx->model.hparams.n_ctx;
|
||||
//const int n_batch = 512;
|
||||
const int n_batch = 1;
|
||||
const int n_ctx = 512 - n_batch;
|
||||
|
||||
|
|
6
llama.h
6
llama.h
|
@ -173,8 +173,10 @@ extern "C" {
|
|||
int n_past,
|
||||
int n_threads);
|
||||
|
||||
// Export a computation graph for model inference
|
||||
// TODO: very likely to change
|
||||
// Export a static computation graph for context of 511 and batch size of 1
|
||||
// NOTE: since this functionality is mostly for debugging and demonstration purposes, we hardcode these
|
||||
// parameters here to keep things simple
|
||||
// IMPORTANT: do not use for anything else other than debugging and testing!
|
||||
LLAMA_API int llama_eval_export(struct llama_context * ctx, const char * fname);
|
||||
|
||||
// Convert the provided text into tokens.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue