From 90fd43c7eb3d6d69326c26c02fea5cd0c6859a6e Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 6 Jan 2024 15:04:27 +0200 Subject: [PATCH] metal : fix check for simdgroup reduction support --- ggml-metal.m | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/ggml-metal.m b/ggml-metal.m index 3e0f47575..bf3f626ab 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -325,27 +325,37 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) { // https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf { - bool found = false; - - for (int i = MTLGPUFamilyApple1 + 20; !found && i >= MTLGPUFamilyApple1; --i) { + for (int i = MTLGPUFamilyApple1 + 20; i >= MTLGPUFamilyApple1; --i) { if ([ctx->device supportsFamily:i]) { GGML_METAL_LOG_INFO("%s: GPU family: MTLGPUFamilyApple%d (%d)\n", __func__, i - (int) MTLGPUFamilyApple1 + 1, i); - found = true; break; } } - for (int i = MTLGPUFamilyCommon1 + 5; !found && i >= MTLGPUFamilyCommon1; --i) { + for (int i = MTLGPUFamilyCommon1 + 5; i >= MTLGPUFamilyCommon1; --i) { if ([ctx->device supportsFamily:i]) { GGML_METAL_LOG_INFO("%s: GPU family: MTLGPUFamilyCommon%d (%d)\n", __func__, i - (int) MTLGPUFamilyCommon1 + 1, i); - found = true; break; } } + + if (@available(macOS 13.0, iOS 16.0, *)) { + for (int i = MTLGPUFamilyMetal3 + 5; i >= MTLGPUFamilyMetal3; --i) { + if ([ctx->device supportsFamily:i]) { + GGML_METAL_LOG_INFO("%s: GPU family: MTLGPUFamilyMetal%d (%d)\n", __func__, i - (int) MTLGPUFamilyMetal3 + 3, i); + break; + } + } + } } ctx->support_simdgroup_reduction = [ctx->device supportsFamily:MTLGPUFamilyApple7]; + if (@available(macOS 13.0, iOS 16.0, *)) { + ctx->support_simdgroup_reduction |= [ctx->device supportsFamily:MTLGPUFamilyMetal3]; + } + + GGML_METAL_LOG_INFO("%s: simdgroup reduction support = %s\n", __func__, ctx->support_simdgroup_reduction ? "true" : "false"); GGML_METAL_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false"); GGML_METAL_LOG_INFO("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1e6); if (ctx->device.maxTransferRate != 0) {