diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index 81f6e76e2..db053e3b8 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -7149,6 +7149,7 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
                 CUBLAS_GEMM_DEFAULT_TENSOR_OP));
     } else {
         // use cublasGemmBatchedEx
+        // TODO: https://github.com/ggerganov/llama.cpp/pull/3749#discussion_r1369997000
         const int ne23 = ne12*ne13;
 
         // TODO: avoid this alloc