diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 81f6e76e2..db053e3b8 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -7149,6 +7149,7 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const CUBLAS_GEMM_DEFAULT_TENSOR_OP)); } else { // use cublasGemmBatchedEx + // TODO: https://github.com/ggerganov/llama.cpp/pull/3749#discussion_r1369997000 const int ne23 = ne12*ne13; // TODO: avoid this alloc