llama : quantize more Mamba tensors
* llama : use f16 as the fallback of fallback quant types
This commit is contained in:
parent
5c0f108e15
commit
3491291a32
2 changed files with 4 additions and 19 deletions
|
@ -295,6 +295,7 @@ class Model:
|
||||||
gguf.MODEL_TENSOR.FFN_GATE_INP,
|
gguf.MODEL_TENSOR.FFN_GATE_INP,
|
||||||
gguf.MODEL_TENSOR.POS_EMBD,
|
gguf.MODEL_TENSOR.POS_EMBD,
|
||||||
gguf.MODEL_TENSOR.TOKEN_TYPES,
|
gguf.MODEL_TENSOR.TOKEN_TYPES,
|
||||||
|
gguf.MODEL_TENSOR.SSM_CONV1D,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
or not name.endswith(".weight")
|
or not name.endswith(".weight")
|
||||||
|
@ -2786,23 +2787,6 @@ class MambaModel(Model):
|
||||||
|
|
||||||
return [(new_name, data_torch)]
|
return [(new_name, data_torch)]
|
||||||
|
|
||||||
def tensor_force_quant(self, name: str, new_name: str, bid: int | None, n_dims: int) -> gguf.GGMLQuantizationType | bool:
|
|
||||||
if bid is not None and new_name in (
|
|
||||||
self.format_tensor_name(
|
|
||||||
n, bid, ".weight" if name.endswith(".weight") else ""
|
|
||||||
)
|
|
||||||
for n in [
|
|
||||||
gguf.MODEL_TENSOR.SSM_CONV1D,
|
|
||||||
gguf.MODEL_TENSOR.SSM_X,
|
|
||||||
gguf.MODEL_TENSOR.SSM_DT,
|
|
||||||
gguf.MODEL_TENSOR.SSM_A,
|
|
||||||
gguf.MODEL_TENSOR.SSM_D,
|
|
||||||
]
|
|
||||||
):
|
|
||||||
return gguf.GGMLQuantizationType.F32
|
|
||||||
|
|
||||||
return super().tensor_force_quant(name, new_name, bid, n_dims)
|
|
||||||
|
|
||||||
|
|
||||||
@Model.register("CohereForCausalLM")
|
@Model.register("CohereForCausalLM")
|
||||||
class CommandR2Model(Model):
|
class CommandR2Model(Model):
|
||||||
|
|
|
@ -16122,6 +16122,9 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
|
||||||
case GGML_TYPE_Q6_K: new_type = GGML_TYPE_Q8_0; break;
|
case GGML_TYPE_Q6_K: new_type = GGML_TYPE_Q8_0; break;
|
||||||
default: throw std::runtime_error("\nUnsupported tensor size encountered\n");
|
default: throw std::runtime_error("\nUnsupported tensor size encountered\n");
|
||||||
}
|
}
|
||||||
|
if (tensor->ne[0] % ggml_blck_size(new_type) != 0) {
|
||||||
|
new_type = GGML_TYPE_F16;
|
||||||
|
}
|
||||||
LLAMA_LOG_WARN(" - using fallback quantization %s\n", ggml_type_name(new_type));
|
LLAMA_LOG_WARN(" - using fallback quantization %s\n", ggml_type_name(new_type));
|
||||||
++qs.n_fallback;
|
++qs.n_fallback;
|
||||||
}
|
}
|
||||||
|
@ -16450,8 +16453,6 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||||
// do not quantize Mamba's small yet 2D weights
|
// do not quantize Mamba's small yet 2D weights
|
||||||
// NOTE: can't use LLM_TN here because the layer number is not known
|
// NOTE: can't use LLM_TN here because the layer number is not known
|
||||||
quantize &= name.find("ssm_conv1d.weight") == std::string::npos;
|
quantize &= name.find("ssm_conv1d.weight") == std::string::npos;
|
||||||
quantize &= name.find("ssm_x.weight") == std::string::npos;
|
|
||||||
quantize &= name.find("ssm_dt.weight") == std::string::npos;
|
|
||||||
|
|
||||||
// do not quantize relative position bias (T5)
|
// do not quantize relative position bias (T5)
|
||||||
quantize &= name.find("attn_rel_b.weight") == std::string::npos;
|
quantize &= name.find("attn_rel_b.weight") == std::string::npos;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue