From 6f33852f3d397ee83759668fd031bc14eec8e575 Mon Sep 17 00:00:00 2001
From: slaren <slarengh@gmail.com>
Date: Tue, 2 Apr 2024 16:08:55 +0200
Subject: [PATCH] minor

---
 ggml-cuda.cu |  2 +-
 ggml.c       | 22 ++++++----------------
 2 files changed, 7 insertions(+), 17 deletions(-)

diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index 409b62cad..f51b2042d 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -2209,7 +2209,7 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
     cudaError_t err = cudaGetLastError();
     if (err != cudaSuccess) {
         fprintf(stderr, "%s: %s failed\n", __func__, ggml_op_desc(dst));
-        GGML_ASSERT(false);
+        CUDA_CHECK(err);
     }
 
     return true;
diff --git a/ggml.c b/ggml.c
index 3e87bedc9..c9b0a6a0e 100644
--- a/ggml.c
+++ b/ggml.c
@@ -4573,6 +4573,8 @@ void ggml_mul_mat_set_prec(
 
 // ggml_mul_mat_id
 
+// NOTE: id will be removed in the future and instead all the experts listed in ids will be computed
+//       this will allow computing all the used experts in a single matrix multiplication
 struct ggml_tensor * ggml_mul_mat_id(
         struct ggml_context * ctx,
         struct ggml_tensor  * as,
@@ -4581,12 +4583,11 @@ struct ggml_tensor * ggml_mul_mat_id(
         struct ggml_tensor  * b) {
 
     GGML_ASSERT(ids->type == GGML_TYPE_I32);
-    GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1);
-    GGML_ASSERT(ids->ne[1] == b->ne[1]);
+    GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1); // ids is 2d
+    GGML_ASSERT(ids->ne[1] == b->ne[1]); // must have an expert per b row
     GGML_ASSERT(ids->ne[2] == b->ne[2] && ids->ne[3] == b->ne[3]);
-    //GGML_ASSERT(n_as > 0 && n_as <= GGML_MAX_SRC - 2);
-    GGML_ASSERT(id >= 0 && id < ids->ne[0]);
-    // TODO: restore checks
+    GGML_ASSERT(id >= 0 && id < ids->ne[0]); // valid id
+    GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat
 
     bool is_node = false;
 
@@ -4605,14 +4606,6 @@ struct ggml_tensor * ggml_mul_mat_id(
     result->src[1] = b;
     result->src[2] = ids;
 
-    //for (int i = 0; i < n_as; i++) {
-    //    struct ggml_tensor * a = as[i];
-    //    GGML_ASSERT(ggml_are_same_shape(as[0], a));
-    //    GGML_ASSERT(ggml_can_mul_mat(a, b));
-    //    GGML_ASSERT(!ggml_is_transposed(a));
-    //    result->src[i + 2] = a;
-    //}
-
     return result;
 }
 
@@ -10980,9 +10973,6 @@ static void ggml_compute_forward_mul_mat_id(
     GGML_ASSERT(nb1 <= nb2);
     GGML_ASSERT(nb2 <= nb3);
 
-    // broadcast factors
-    //const int64_t r2 = ne12/ne02;
-    //const int64_t r3 = ne13/ne03;
     // broadcast is not supported with mmid
     assert(ne12 == 1);
     assert(ne13 == 1);