diff --git a/ggml-cuda.cu b/ggml-cuda.cu index fe27bbac4..20dfd7561 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -7044,7 +7044,6 @@ inline void ggml_cuda_op_rope( GGML_ASSERT(false); rope_glm_f32_cuda(src0_dd, dst_dd, ne00, nrows, pos, freq_scale, ne01, freq_base, n_ctx, main_stream); } else if (is_neox) { - GGML_ASSERT(ne00 == n_dims && "ne00 != n_dims is not implemented for CUDA yet"); if (src0->type == GGML_TYPE_F32) { rope_neox_cuda( (const float *)src0_dd, (float *)dst_dd, ne00, n_dims, nrows, pos, freq_scale, ne01, freq_base, ext_factor,