Allow quantizing k-quants to fall back when tensor size incompatible

This commit is contained in:
KerfuffleV2 2023-10-23 09:24:24 -06:00
parent 34b2a5e1ee
commit 7f20d78e7e

View file

@ -8133,20 +8133,20 @@ static ggml_type get_k_quant_type(
int nx = tensor->ne[0]; int nx = tensor->ne[0];
int ny = tensor->ne[1]; int ny = tensor->ne[1];
if (nx % QK_K != 0) { if (nx % QK_K != 0) {
LLAMA_LOG_WARN("\n\n%s : tensor cols %d x %d are not divisible by %d, required for k-quants\n", __func__, nx, ny, QK_K); LLAMA_LOG_WARN("\n\n%s : tensor cols %d x %d are not divisible by %d, required for %s", __func__, nx, ny, QK_K, ggml_type_name(new_type));
convert_incompatible_tensor = true; convert_incompatible_tensor = true;
} }
} }
if (convert_incompatible_tensor) { if (convert_incompatible_tensor) {
if (name == tn(LLM_TENSOR_OUTPUT, "weight")) { switch (new_type) {
new_type = GGML_TYPE_F16; //fall back to F16 instead of just failing. case GGML_TYPE_Q2_K: new_type = GGML_TYPE_Q4_0; break;
LLAMA_LOG_WARN("F16 will be used for this tensor instead.\n"); case GGML_TYPE_Q3_K: new_type = GGML_TYPE_Q4_1; break;
} else if (name == tn(LLM_TENSOR_TOKEN_EMBD, "weight")) { case GGML_TYPE_Q4_K: new_type = GGML_TYPE_Q5_0; break;
new_type = GGML_TYPE_Q4_0; //fall back to Q4_0 instead of just failing. case GGML_TYPE_Q5_K: new_type = GGML_TYPE_Q5_1; break;
LLAMA_LOG_WARN("Q4_0 will be used for this tensor instead.\n"); case GGML_TYPE_Q6_K: new_type = GGML_TYPE_Q8_0; break;
} else { default: throw std::runtime_error("\nUnsupported tensor size encountered\n");
throw std::runtime_error("Unsupported tensor size encountered\n");
} }
LLAMA_LOG_WARN(" - using fallback quantization %s\n", ggml_type_name(new_type));
} }
return new_type; return new_type;