From 658c686e5abdbe2935661aa6bb652d182699f61b Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Sat, 29 Apr 2023 14:26:36 +0300
Subject: [PATCH] ggml : add asserts to guard for incorrect wsize

---
 Makefile |  9 +++++++--
 ggml.c   | 10 ++++++++--
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index fd695d7dd..4516e8556 100644
--- a/Makefile
+++ b/Makefile
@@ -34,10 +34,15 @@ endif
 #
 
 # keep standard at C11 and C++11
-CFLAGS   = -I.              -O3 -DNDEBUG -std=c11   -fPIC
-CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
+CFLAGS   = -I.              -O3 -std=c11   -fPIC
+CXXFLAGS = -I. -I./examples -O3 -std=c++11 -fPIC
 LDFLAGS  =
 
+ifndef LLAMA_DEBUG
+	CFLAGS   += -DNDEBUG
+	CXXFLAGS += -DNDEBUG
+endif
+
 # warnings
 CFLAGS   += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith
 CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
diff --git a/ggml.c b/ggml.c
index 6d2fb8d61..0dc1939f6 100644
--- a/ggml.c
+++ b/ggml.c
@@ -8245,8 +8245,6 @@ static void ggml_compute_forward_mul_mat_f16_f32(
         ggml_fp16_t * d_X = ggml_cuda_pool_malloc(sizeof(float) * x_ne, &x_size);
         ggml_fp16_t * d_Y = ggml_cuda_pool_malloc(sizeof(float) * y_ne, &y_size);
         float       * d_D = ggml_cuda_pool_malloc(sizeof(float) * d_ne, &d_size);
-#else
-        float * const wdata = params->wdata;
 #endif
         for (int64_t i03 = 0; i03 < ne03; i03++) {
             for (int64_t i02 = 0; i02 < ne02; i02++) {
@@ -8263,8 +8261,11 @@ static void ggml_compute_forward_mul_mat_f16_f32(
                             wdata[id++] = GGML_FP32_TO_FP16(*(float *) ((char *) src1->data + i03*nb13 + i02*nb12 + i01*nb11 + i00*nb10));
                         }
                     }
+
+                    assert(id*sizeof(ggml_fp16_t) <= params->wsize);
                 }
 #else
+                float * const wdata = params->wdata;
                 {
                     size_t id = 0;
                     for (int64_t i01 = 0; i01 < ne01; ++i01) {
@@ -8272,6 +8273,8 @@ static void ggml_compute_forward_mul_mat_f16_f32(
                             wdata[id++] = GGML_FP16_TO_FP32(*(ggml_fp16_t *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01 + i00*nb00));
                         }
                     }
+
+                    assert(id*sizeof(float) <= params->wsize);
                 }
 #endif
 
@@ -8537,7 +8540,10 @@ static void ggml_compute_forward_mul_mat_q_f32(
                         dequantize_row_q((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01, wdata + id, ne00);
                         id += ne00;
                     }
+
+                    assert(id*sizeof(float) <= params->wsize);
                 }
+
                 const float * x = wdata;
 #endif