diff --git a/llama.cpp b/llama.cpp
index 28de3c741..71a0b8e89 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -8463,8 +8463,18 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
         }
     } else if (name.find("ffn_down") != std::string::npos) {
         const int n_expert = std::max(1, (int)qs.model.hparams.n_expert);
-        const int i_layer = qs.i_feed_forward_w2 / n_expert;
-        const int n_layer = qs.i_feed_forward_w2 / n_expert;
+        int i_layer, n_layer;
+        if (n_expert == 1) {
+            i_layer = qs.i_feed_forward_w2;
+            n_layer = qs.n_feed_forward_w2;
+        } else {
+            // Believe it or not, "experts" in the FFN of Mixtral-8x7B are not consecutive, but iccasionally randomly
+            // sprinkled in the model. Hence, simply dividing i_feed_forward_w2 by n_expert does not work
+            // for getting the current layer as I initially thought, and we need to resort to parsing the
+            // tensor name.
+            n_layer = qs.n_feed_forward_w2 / n_expert;
+            sscanf(name.c_str(), "blk.%d.ffn_down", &i_layer);
+        }
         if      (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q3_K;
         else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S) {
             if (i_layer < n_layer/8) new_type = GGML_TYPE_Q4_K;