diff --git a/src/llama.cpp b/src/llama.cpp
index 3edaa98e8..e65b2ebf9 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -16754,6 +16754,13 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
         quantize &= name.find("ssm_x.weight")      == std::string::npos;
         quantize &= name.find("ssm_dt.weight")     == std::string::npos;
 
+        // do not quantize Deepseek-v2's low-rank attension weights
+        // NOTE: this will have O(((w-q)^2)^2) rate-distortion otherwise (4-th power quantization error!)
+        quantize &= name.find("attn_q_a.weight") == std::string::npos;
+        quantize &= name.find("attn_q_b.weight") == std::string::npos;
+        quantize &= name.find("attn_kv_a_mqa.weight") == std::string::npos;
+        quantize &= name.find("attn_kv_b.weight") == std::string::npos;
+
         enum ggml_type new_type;
         void * new_data;
         size_t new_size;