From 57b79fda88f95ef953cf90c1ec74b48ef5252b78 Mon Sep 17 00:00:00 2001 From: chentyjpm <317974925@qq.com> Date: Wed, 14 Aug 2024 14:34:47 +0800 Subject: [PATCH] add hf2gguf conv format of q4_0 q4_1 q5_0 q5_1 --- convert_hf_to_gguf.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 550dd5cfd..74f2f5abf 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -311,6 +311,14 @@ class Model: data_qtype = gguf.GGMLQuantizationType.BF16 elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0: data_qtype = gguf.GGMLQuantizationType.Q8_0 + elif self.ftype == gguf.LlamaFileType.MOSTLY_Q4_0: + data_qtype = gguf.GGMLQuantizationType.Q4_0 + elif self.ftype == gguf.LlamaFileType.MOSTLY_Q4_1: + data_qtype = gguf.GGMLQuantizationType.Q4_1 + elif self.ftype == gguf.LlamaFileType.MOSTLY_Q5_0: + data_qtype = gguf.GGMLQuantizationType.Q5_0 + elif self.ftype == gguf.LlamaFileType.MOSTLY_Q5_1: + data_qtype = gguf.GGMLQuantizationType.Q5_1 else: raise ValueError(f"Unknown file type: {self.ftype.name}") @@ -3815,8 +3823,10 @@ def parse_args() -> argparse.Namespace: help="path to write to; default: based on input. {ftype} will be replaced by the outtype.", ) parser.add_argument( - "--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "auto"], default="f16", - help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type", + "--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "q4_0", "q4_1", "q5_0", "q5_1", "auto"], default="f16", + help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, " + "q8_0 for Q8_0, q4_0 for Q4_0, q4_1 for Q4_1, q5_0 for Q5_0, q5_1 for Q5_1," + " auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type", ) parser.add_argument( "--bigendian", action="store_true", @@ -3903,6 +3913,10 @@ def main() -> None: "f16": gguf.LlamaFileType.MOSTLY_F16, "bf16": gguf.LlamaFileType.MOSTLY_BF16, "q8_0": gguf.LlamaFileType.MOSTLY_Q8_0, + "q4_0": gguf.LlamaFileType.MOSTLY_Q4_0, + "q4_1": gguf.LlamaFileType.MOSTLY_Q4_1, + "q5_0": gguf.LlamaFileType.MOSTLY_Q5_0, + "q5_1": gguf.LlamaFileType.MOSTLY_Q5_1, "auto": gguf.LlamaFileType.GUESSED, }