Allow quantizing k-quants to fall back when tensor size incompatible
This commit is contained in:
parent
34b2a5e1ee
commit
7f20d78e7e
1 changed files with 9 additions and 9 deletions
18
llama.cpp
18
llama.cpp
|
@ -8133,20 +8133,20 @@ static ggml_type get_k_quant_type(
|
||||||
int nx = tensor->ne[0];
|
int nx = tensor->ne[0];
|
||||||
int ny = tensor->ne[1];
|
int ny = tensor->ne[1];
|
||||||
if (nx % QK_K != 0) {
|
if (nx % QK_K != 0) {
|
||||||
LLAMA_LOG_WARN("\n\n%s : tensor cols %d x %d are not divisible by %d, required for k-quants\n", __func__, nx, ny, QK_K);
|
LLAMA_LOG_WARN("\n\n%s : tensor cols %d x %d are not divisible by %d, required for %s", __func__, nx, ny, QK_K, ggml_type_name(new_type));
|
||||||
convert_incompatible_tensor = true;
|
convert_incompatible_tensor = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (convert_incompatible_tensor) {
|
if (convert_incompatible_tensor) {
|
||||||
if (name == tn(LLM_TENSOR_OUTPUT, "weight")) {
|
switch (new_type) {
|
||||||
new_type = GGML_TYPE_F16; //fall back to F16 instead of just failing.
|
case GGML_TYPE_Q2_K: new_type = GGML_TYPE_Q4_0; break;
|
||||||
LLAMA_LOG_WARN("F16 will be used for this tensor instead.\n");
|
case GGML_TYPE_Q3_K: new_type = GGML_TYPE_Q4_1; break;
|
||||||
} else if (name == tn(LLM_TENSOR_TOKEN_EMBD, "weight")) {
|
case GGML_TYPE_Q4_K: new_type = GGML_TYPE_Q5_0; break;
|
||||||
new_type = GGML_TYPE_Q4_0; //fall back to Q4_0 instead of just failing.
|
case GGML_TYPE_Q5_K: new_type = GGML_TYPE_Q5_1; break;
|
||||||
LLAMA_LOG_WARN("Q4_0 will be used for this tensor instead.\n");
|
case GGML_TYPE_Q6_K: new_type = GGML_TYPE_Q8_0; break;
|
||||||
} else {
|
default: throw std::runtime_error("\nUnsupported tensor size encountered\n");
|
||||||
throw std::runtime_error("Unsupported tensor size encountered\n");
|
|
||||||
}
|
}
|
||||||
|
LLAMA_LOG_WARN(" - using fallback quantization %s\n", ggml_type_name(new_type));
|
||||||
}
|
}
|
||||||
|
|
||||||
return new_type;
|
return new_type;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue