From 81b57bb37599fea3a2c2806ac37d4fcf39bc5383 Mon Sep 17 00:00:00 2001
From: Francis Couture-Harpin <git@compilade.net>
Date: Wed, 31 Jan 2024 08:47:53 -0500
Subject: [PATCH] mamba : fix self-overlapping view depth stride

---
 llama.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index cf41d69be..f064969d2 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -7960,7 +7960,7 @@ struct llm_build_context {
                 const size_t conv_x_nb1 = (d_conv - 1 + n_tok) * ggml_element_size(conv_x);
 
                 conv_x = ggml_set_2d(ctx0, conv_x, conv_state, conv_x_nb1, 0);
-                // unfortunately, making x contiguous is necessary because ggml_set expects nb0 == sizeof(float)
+                // making x contiguous is necessary because ggml_set expects it
                 conv_x = ggml_set_2d(ctx0, conv_x, ggml_cont(ctx0, ggml_transpose(ctx0, x)), conv_x_nb1, (d_conv - 1)*ggml_element_size(conv_x));
 
                 // store last (d_conv - 1) columns of conv_x back into the KV cache for the next conv_state
@@ -7969,9 +7969,10 @@ struct llm_build_context {
                         ggml_view_2d(ctx0, conv_x, d_conv - 1, d_inner, conv_x_nb1, n_tok*ggml_element_size(conv_x)),
                         ggml_view_tensor(ctx0, kv_self.k_l[il])));
 
-                // prepare convolution for all tokens in the batch with a self-overlapping view
+                // prepare convolution for all tokens in the batch with a self-overlapping view,
+                // shifting by one column each ... depth? ... with a window of d_conv columns.
                 // {(d_conv-1)+n_tok, d_inner} => {d_conv, d_inner, n_tok}
-                conv_x = ggml_view_3d(ctx0, conv_x, d_conv, d_inner, n_tok, conv_x_nb1, -(d_conv - 1)*d_inner*ggml_element_size(conv_x), 0);
+                conv_x = ggml_view_3d(ctx0, conv_x, d_conv, d_inner, n_tok, conv_x_nb1, 1*ggml_element_size(conv_x), 0);
 
                 // perform convolution
                 // => {1, d_inner, n_tok}