From ef43a6228966b7ee76cdbe600934709223e56184 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Thu, 15 Jun 2023 20:26:56 +0300
Subject: [PATCH] metal : determine number of command buffers based on
 gf->n_threads

---
 ggml-metal.h | 1 +
 ggml-metal.m | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/ggml-metal.h b/ggml-metal.h
index a9441a9d4..033c4d86a 100644
--- a/ggml-metal.h
+++ b/ggml-metal.h
@@ -55,6 +55,7 @@ void ggml_metal_set_tensor(struct ggml_metal_context * ctx, struct ggml_tensor *
 void ggml_metal_get_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
 
 // same as ggml_graph_compute but uses Metal
+// creates gf->n_threads command buffers in parallel
 void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
 
 #ifdef __cplusplus
diff --git a/ggml-metal.m b/ggml-metal.m
index 372575772..0e9b56aa3 100644
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -284,13 +284,13 @@ void ggml_metal_get_tensor(
 
 void ggml_metal_graph_compute(
         struct ggml_metal_context * ctx,
-             struct ggml_cgraph * gf) {
+               struct ggml_cgraph * gf) {
     metal_printf("%s: evaluating graph\n", __func__);
 
     // create multiple command buffers and enqueue them
     // then, we encode the graph into the command buffers in parallel
 
-    const int n_cb = 8;
+    const int n_cb = gf->n_threads;
 
     NSMutableArray * command_buffers = [NSMutableArray arrayWithCapacity:n_cb];