hf bitnet e2e v2
This commit is contained in:
parent
076b4a197b
commit
57dfc3bcdf
2 changed files with 14 additions and 5 deletions
|
@ -1407,9 +1407,20 @@ class BitnetModel(Model):
|
||||||
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
|
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
|
||||||
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
|
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
|
||||||
|
|
||||||
# def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
def weight_quant(self, weight):
|
||||||
|
dtype = weight.dtype
|
||||||
|
weight = weight.float()
|
||||||
|
s = 1 / weight.abs().mean().clamp(min=1e-5)
|
||||||
|
result = (weight * s).round().clamp(-1, 1) / s
|
||||||
|
return result.type(dtype)
|
||||||
|
|
||||||
# return [(self.map_tensor_name(name), data_torch)]
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||||
|
if name.endswith(("q_proj.weight", "k_proj.weight", "v_proj.weight",
|
||||||
|
"down_proj.weight", "up_proj.weight", "gate_proj.weight",
|
||||||
|
"o_proj.weight")):
|
||||||
|
data_torch = data_torch + (self.weight_quant(data_torch) - data_torch).detach()
|
||||||
|
|
||||||
|
return [(self.map_tensor_name(name), data_torch)]
|
||||||
|
|
||||||
@Model.register("GrokForCausalLM")
|
@Model.register("GrokForCausalLM")
|
||||||
class GrokModel(Model):
|
class GrokModel(Model):
|
||||||
|
|
|
@ -6833,8 +6833,6 @@ static struct ggml_tensor * llm_build_qbitlinear(
|
||||||
struct ggml_tensor * cur)
|
struct ggml_tensor * cur)
|
||||||
{
|
{
|
||||||
return ggml_bitlinear_quant(ctx, cur);
|
return ggml_bitlinear_quant(ctx, cur);
|
||||||
|
|
||||||
return cur;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct ggml_tensor * llm_build_ffn(
|
static struct ggml_tensor * llm_build_ffn(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue