From 5ba37461711095c0284233dbd14f0d9010cdbf56 Mon Sep 17 00:00:00 2001
From: Xiao-Yong Jin <jinxiaoyong@gmail.com>
Date: Fri, 3 Nov 2023 13:00:31 -0500
Subject: [PATCH 1/2] ggml-metal: fix yarn rope (#3937)

---
 ggml-metal.m | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ggml-metal.m b/ggml-metal.m
index b33a3cb8f..acdb83843 100644
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -1403,7 +1403,8 @@ void ggml_metal_graph_compute(
                             const int n_past     = ((int32_t *) dst->op_params)[0];
                             const int n_dims     = ((int32_t *) dst->op_params)[1];
                             const int mode       = ((int32_t *) dst->op_params)[2];
-                            const int n_orig_ctx = ((int32_t *) dst->op_params)[3];
+                            // skip 3, n_ctx, used in GLM RoPE, unimplemented in metal
+                            const int n_orig_ctx = ((int32_t *) dst->op_params)[4];
 
                             float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
                             memcpy(&freq_base,   (int32_t *) dst->op_params +  5, sizeof(float));

From d9b33fe95bd257b36c84ee5769cc048230067d6f Mon Sep 17 00:00:00 2001
From: Peter Sugihara <peter@campsh.com>
Date: Fri, 3 Nov 2023 12:18:18 -0700
Subject: [PATCH 2/2] metal : round up to 16 to fix
 MTLDebugComputeCommandEncoder assertion (#3938)

---
 ggml-metal.m | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ggml-metal.m b/ggml-metal.m
index acdb83843..78ae4485d 100644
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -1017,7 +1017,7 @@ void ggml_metal_graph_compute(
                             [encoder setBytes:&ne00 length:sizeof(ne00) atIndex:2];
                             [encoder setBytes:&ne01 length:sizeof(ne01) atIndex:3];
                             [encoder setBytes:&ne02 length:sizeof(ne02) atIndex:4];
-                            [encoder setThreadgroupMemoryLength:nth/32*sizeof(float) atIndex:0];
+                            [encoder setThreadgroupMemoryLength:MAX(16, nth/32*sizeof(float)) atIndex:0];
 
                             [encoder dispatchThreadgroups:MTLSizeMake(ne01*ne02*ne03, 1, 1) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
                         } break;
@@ -1348,7 +1348,7 @@ void ggml_metal_graph_compute(
                             [encoder setBytes:&ne00    length:sizeof( int64_t) atIndex:2];
                             [encoder setBytes:&nb01    length:sizeof(uint64_t) atIndex:3];
                             [encoder setBytes:&eps     length:sizeof(   float) atIndex:4];
-                            [encoder setThreadgroupMemoryLength:nth*sizeof(float) atIndex:0];
+                            [encoder setThreadgroupMemoryLength:MAX(16, nth*sizeof(float)) atIndex:0];
 
                             const int64_t nrows = ggml_nrows(src0);