mtl : export just a small part of the graph for now to make it easier

This commit is contained in:
Georgi Gerganov 2023-05-29 21:40:05 +03:00
parent a792cbd0fc
commit 897d6d8e8f
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
2 changed files with 23 additions and 9 deletions

View file

@ -117,15 +117,15 @@ struct ggml_mtl_context * llama_mtl_init(
ctx->function_add = [ctx->library newFunctionWithName:@"kernel_add"]; ctx->function_add = [ctx->library newFunctionWithName:@"kernel_add"];
ctx->pipeline_add = [ctx->device newComputePipelineStateWithFunction:ctx->function_add error:nil]; ctx->pipeline_add = [ctx->device newComputePipelineStateWithFunction:ctx->function_add error:nil];
fprintf(stderr, "%s: loaded kernel_add: %p\n", __func__, ctx->pipeline_add); fprintf(stderr, "%s: loaded kernel_add: %p\n", __func__, (void *) ctx->pipeline_add);
ctx->function_relu = [ctx->library newFunctionWithName:@"kernel_relu"]; ctx->function_relu = [ctx->library newFunctionWithName:@"kernel_relu"];
ctx->pipeline_relu = [ctx->device newComputePipelineStateWithFunction:ctx->function_relu error:nil]; ctx->pipeline_relu = [ctx->device newComputePipelineStateWithFunction:ctx->function_relu error:nil];
fprintf(stderr, "%s: loaded kernel_relu: %p\n", __func__, ctx->pipeline_relu); fprintf(stderr, "%s: loaded kernel_relu: %p\n", __func__, (void *) ctx->pipeline_relu);
ctx->function_soft_max = [ctx->library newFunctionWithName:@"kernel_soft_max" constantValues:constants error:nil]; ctx->function_soft_max = [ctx->library newFunctionWithName:@"kernel_soft_max" constantValues:constants error:nil];
ctx->pipeline_soft_max = [ctx->device newComputePipelineStateWithFunction:ctx->function_soft_max error:nil]; ctx->pipeline_soft_max = [ctx->device newComputePipelineStateWithFunction:ctx->function_soft_max error:nil];
fprintf(stderr, "%s: loaded kernel_soft_max: %p\n", __func__, ctx->pipeline_soft_max); fprintf(stderr, "%s: loaded kernel_soft_max: %p\n", __func__, (void *) ctx->pipeline_soft_max);
} }
// MTLBuffer approach // MTLBuffer approach
@ -217,11 +217,11 @@ int llama_mtl_eval(
// copy the input data to the GPU // copy the input data to the GPU
{ {
struct ggml_tensor * inp = ggml_graph_get_tensor(gf, "input"); struct ggml_tensor * embd = ggml_graph_get_tensor(gf, "embd");
id<MTLBuffer> id_dst = llama_mtl_get_buffer(ctx, inp, &offs_src0); id<MTLBuffer> id_dst = llama_mtl_get_buffer(ctx, embd, &offs_src0);
memcpy(id_dst.contents + offs_src0, inp->data, ggml_nbytes(inp)); memcpy((char *) id_dst.contents + offs_src0, embd->data, ggml_nbytes(embd));
} }
for (int i = 0; i < gf->n_nodes; ++i) { for (int i = 0; i < gf->n_nodes; ++i) {

View file

@ -1243,6 +1243,10 @@ static bool llama_eval_internal(
ggml_cgraph gf = {}; ggml_cgraph gf = {};
gf.n_threads = N >= 32 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas() ? 1 : n_threads; gf.n_threads = N >= 32 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas() ? 1 : n_threads;
// TODO: TMP !!!
ggml_cgraph gf_export = {};
gf_export.n_threads = 1;
struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
ggml_set_name(embd, "embd"); ggml_set_name(embd, "embd");
memcpy(embd->data, tokens, N*ggml_element_size(embd)); memcpy(embd->data, tokens, N*ggml_element_size(embd));
@ -1264,6 +1268,11 @@ static bool llama_eval_internal(
cur = ggml_mul(ctx0, cur, model.layers[il].attention_norm); cur = ggml_mul(ctx0, cur, model.layers[il].attention_norm);
} }
// TODO: TMP !!!!
if (il == 0) {
ggml_set_name(cur, "mtl-check");
}
// self-attention // self-attention
{ {
// compute Q and K and RoPE them // compute Q and K and RoPE them
@ -1420,12 +1429,17 @@ static bool llama_eval_internal(
// logits -> probs // logits -> probs
//inpL = ggml_soft_max_inplace(ctx0, inpL); //inpL = ggml_soft_max_inplace(ctx0, inpL);
// TODO: TMP !!!!!!!!!!!!!!!!!!!!
// run the computation // run the computation
ggml_build_forward_expand(&gf, inpL); //ggml_build_forward_expand(&gf, inpL);
ggml_graph_compute (ctx0, &gf); //ggml_graph_compute (ctx0, &gf);
// lets export a smaller graph to get things rolling -- baby steps first
ggml_build_forward_expand(&gf_export, ggml_get_tensor(ctx0, "mtl-check"));
if (cgraph_fname) { if (cgraph_fname) {
ggml_graph_export(&gf, cgraph_fname); //ggml_graph_export(&gf, cgraph_fname);
ggml_graph_export(&gf_export, cgraph_fname);
} }
#ifdef GGML_PERF #ifdef GGML_PERF