From ef43a6228966b7ee76cdbe600934709223e56184 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 15 Jun 2023 20:26:56 +0300 Subject: [PATCH] metal : determine number of command buffers based on gf->n_threads --- ggml-metal.h | 1 + ggml-metal.m | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ggml-metal.h b/ggml-metal.h index a9441a9d4..033c4d86a 100644 --- a/ggml-metal.h +++ b/ggml-metal.h @@ -55,6 +55,7 @@ void ggml_metal_set_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * void ggml_metal_get_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t); // same as ggml_graph_compute but uses Metal +// creates gf->n_threads command buffers in parallel void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf); #ifdef __cplusplus diff --git a/ggml-metal.m b/ggml-metal.m index 372575772..0e9b56aa3 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -284,13 +284,13 @@ void ggml_metal_get_tensor( void ggml_metal_graph_compute( struct ggml_metal_context * ctx, - struct ggml_cgraph * gf) { + struct ggml_cgraph * gf) { metal_printf("%s: evaluating graph\n", __func__); // create multiple command buffers and enqueue them // then, we encode the graph into the command buffers in parallel - const int n_cb = 8; + const int n_cb = gf->n_threads; NSMutableArray * command_buffers = [NSMutableArray arrayWithCapacity:n_cb];