add review note

2024-04-03 02:10:43 +02:00 · 2024-04-03 02:10:43 +02:00 · 19dafafd5f
commit 19dafafd5f
parent a1343aeb8a
1 changed files with 5 additions and 1 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -13575,7 +13575,11 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
                        imatrix = it->second.data();
                    } else {
                        LLAMA_LOG_INFO("\n====== %s: imatrix size %d is different from tensor size %d for %s\n", __func__,
-                                int(it->second.size()), int(tensor->ne[0]), tensor->name);
+                                int(it->second.size()), int(tensor->ne[0]*tensor->ne[2]), tensor->name);
+                        // REVIEW: this can happen when quantizing an old mixtral model with split tensors with a new incompatible imatrix
+                        //         this is a significant error and it may be good idea to abort the process if this happens,
+                        //         since many people will miss the error and not realize that most of the model is being quantized without an imatrix
+                        //         tok_embd should be ignored in this case, since it always causes this warning
                    }
                }
            }