falcon : minor changes (still chasing the Metal problem)

2023-08-23 12:25:49 +03:00 · 2023-08-23 12:25:49 +03:00 · e2d23bed1b
commit e2d23bed1b
parent a0dc47a501
3 changed files with 51 additions and 52 deletions
--- a/ggml-metal.m
+++ b/ggml-metal.m
@ -992,7 +992,9 @@ void ggml_metal_graph_compute(
                            [encoder setBytes:&nb2  length:sizeof(uint64_t) atIndex:16];
                            [encoder setBytes:&nb3  length:sizeof(uint64_t) atIndex:17];
                            [encoder setBytes:&m0  length:sizeof(    float) atIndex:18];
+
                            const int nth = 32;
+
                            [encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
                        } break;
                    case GGML_OP_ROPE:
--- a/llama.cpp
+++ b/llama.cpp
@ -2545,26 +2545,23 @@ static struct ggml_cgraph * llm_build_falcon(
            struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
            ggml_set_name(KQV_merged, "KQV_merged");

-            cur = ggml_cpy(ctx0,
-                    KQV_merged,
-                    ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N));
+            cur = ggml_cpy(ctx0, KQV_merged, ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N));
            ggml_set_name(cur, "KQV_merged_contiguous");

-            cur = ggml_cpy(ctx0,
-                    KQV_merged,
-                    ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N));
-
            cur = ggml_mul_mat(ctx0, model.layers[il].wo, cur);
            ggml_set_name(cur, "result_wo");
        }

+        struct ggml_tensor * attn_out = cur;
+
+        // feed forward
+        {
            struct ggml_tensor * inpFF = attn_norm;
-        struct ggml_tensor * attn_out = ggml_cpy(
-            ctx0, cur, ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N));

            cur = ggml_mul_mat(ctx0, model.layers[il].w3, inpFF);
            cur = ggml_gelu(ctx0, cur);
            cur = ggml_mul_mat(ctx0, model.layers[il].w2, cur);
+        }

        cur = ggml_add(ctx0, cur, attn_out);
        cur = ggml_add(ctx0, cur, inpL);