From 2d6733a8c9a73831340a634b204cd803e3aa26e2 Mon Sep 17 00:00:00 2001 From: RogerD <29764398+RDearnaley@users.noreply.github.com> Date: Sat, 9 Sep 2023 21:15:25 -0700 Subject: [PATCH] Check Metal MTLCommandBufferStatus error codes and report an out of memory error if one occurred --- ggml-metal.m | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/ggml-metal.m b/ggml-metal.m index d0d23442e..232793499 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -1217,7 +1217,22 @@ void ggml_metal_graph_compute( MTLCommandBufferStatus status = (MTLCommandBufferStatus) [ctx->command_buffers[i] status]; if (status != MTLCommandBufferStatusCompleted) { - metal_printf("%s: command buffer %d failed with status %lu\n", __func__, i, status); + if (status == MTLCommandBufferStatusError) { + // Check Metal error code + NSError *error = (MTLCommandBufferError) [ctx->command_buffers[i] error]; + int mtl_error_code = [error code]; + if (([error domain] == MTLCommandBufferErrorDomain) && ([error code] == MTLCommandBufferErrorOutOfMemory)) { + metal_printf("%s: command buffer %d failed with status MTLCommandBufferStatus.error (5) and error code \ +MTLCommandBufferError.outOfMemory (8)\n"); + printf("Metal ran out of memory. Maybe try a smaller context size, or a smaller (more coarsely quantized) model, \ +preferably one under the recommended max working set size, or else fall back to running on CPU only.\n"); + } else { + metal_printf("%s: command buffer %d failed with status MTLCommandBufferStatus.error (5) and error code %d\n", + __func__, i, mtl_error_code); + } + } else { + metal_printf("%s: command buffer %d failed with status %lu\n", __func__, i, status); + } GGML_ASSERT(false); } }