diff --git a/ggml-opencl.cpp b/ggml-opencl.cpp
index d2180d9c2..21d053e71 100644
--- a/ggml-opencl.cpp
+++ b/ggml-opencl.cpp
@@ -872,7 +872,7 @@ bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tens
     if ((src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) &&
         src1->type == GGML_TYPE_F32 &&
         dst->type == GGML_TYPE_F32 &&
-        (src0->backend == GGML_BACKEND_CL)) {
+        ((GetQuantsUnshuffled() && ne0 >= 32 && ne1 >= 32 && ne10 >= 32) || src0->backend == GGML_BACKEND_CL)) {
         return true;
     }
 
diff --git a/ggml_v2.c b/ggml_v2.c
index a6ed99e54..de2afd66e 100644
--- a/ggml_v2.c
+++ b/ggml_v2.c
@@ -1571,11 +1571,11 @@ static void ggml_vec_dot_q5_0_q8_0_v2(const int n, float * restrict s, const voi
 static void ggml_vec_dot_q5_1_q8_1_v2(const int n, float * restrict s, const void * restrict vx, const void * restrict vy);
 static void ggml_vec_dot_q8_0_q8_0_v2(const int n, float * restrict s, const void * restrict vx, const void * restrict vy);
 
-void SetQuantsUnshuffled(bool unshuffle)
+inline void SetQuantsUnshuffled(bool unshuffle)
 {
     quants_unshuffled = unshuffle;
 }
-bool GetQuantsUnshuffled()
+inline bool GetQuantsUnshuffled()
 {
     return quants_unshuffled;
 }