From 59196290f8f2db28ee34529dcc6827966ed30564 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 24 Aug 2023 20:59:10 +0300 Subject: [PATCH] metal : fix encoders memory leak --- ggml-metal.m | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/ggml-metal.m b/ggml-metal.m index d38534055..a362a360a 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -521,13 +521,16 @@ void ggml_metal_graph_compute( const int n_cb = ctx->n_cb; - NSMutableArray * command_buffers = [NSMutableArray arrayWithCapacity:n_cb]; + NSMutableArray * command_buffers = [NSMutableArray arrayWithCapacity:n_cb]; + NSMutableArray * command_encoders = [NSMutableArray arrayWithCapacity:n_cb]; for (int i = 0; i < n_cb; ++i) { command_buffers[i] = [ctx->queue commandBuffer]; // enqueue the command buffers in order to specify their execution order [command_buffers[i] enqueue]; + + command_encoders[i] = [command_buffers[i] computeCommandEncoderWithDescriptor: edesc]; } // TODO: is this the best way to start threads? @@ -541,9 +544,8 @@ void ggml_metal_graph_compute( size_t offs_src1 = 0; size_t offs_dst = 0; - id command_buffer = command_buffers[cb_idx]; - - id encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc]; + id command_buffer = command_buffers[cb_idx]; + id encoder = command_encoders[cb_idx]; const int node_start = (cb_idx + 0) * n_nodes_per_cb; const int node_end = MIN((cb_idx == n_cb - 1) ? n_nodes : (cb_idx + 1) * n_nodes_per_cb, n_nodes); @@ -1133,8 +1135,10 @@ void ggml_metal_graph_compute( GGML_ASSERT(false); } + [command_encoders[i] release]; [command_buffers[i] release]; } + [command_encoders release]; [command_buffers release]; }