add hf2gguf conv format of q4_0 q4_1 q5_0 q5_1

This commit is contained in:
chentyjpm 2024-08-14 14:34:47 +08:00
parent 43bdd3ce18
commit 57b79fda88

View file

@ -311,6 +311,14 @@ class Model:
data_qtype = gguf.GGMLQuantizationType.BF16 data_qtype = gguf.GGMLQuantizationType.BF16
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0: elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0:
data_qtype = gguf.GGMLQuantizationType.Q8_0 data_qtype = gguf.GGMLQuantizationType.Q8_0
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q4_0:
data_qtype = gguf.GGMLQuantizationType.Q4_0
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q4_1:
data_qtype = gguf.GGMLQuantizationType.Q4_1
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q5_0:
data_qtype = gguf.GGMLQuantizationType.Q5_0
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q5_1:
data_qtype = gguf.GGMLQuantizationType.Q5_1
else: else:
raise ValueError(f"Unknown file type: {self.ftype.name}") raise ValueError(f"Unknown file type: {self.ftype.name}")
@ -3815,8 +3823,10 @@ def parse_args() -> argparse.Namespace:
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.", help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
) )
parser.add_argument( parser.add_argument(
"--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "auto"], default="f16", "--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "q4_0", "q4_1", "q5_0", "q5_1", "auto"], default="f16",
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type", help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, "
"q8_0 for Q8_0, q4_0 for Q4_0, q4_1 for Q4_1, q5_0 for Q5_0, q5_1 for Q5_1,"
" auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
) )
parser.add_argument( parser.add_argument(
"--bigendian", action="store_true", "--bigendian", action="store_true",
@ -3903,6 +3913,10 @@ def main() -> None:
"f16": gguf.LlamaFileType.MOSTLY_F16, "f16": gguf.LlamaFileType.MOSTLY_F16,
"bf16": gguf.LlamaFileType.MOSTLY_BF16, "bf16": gguf.LlamaFileType.MOSTLY_BF16,
"q8_0": gguf.LlamaFileType.MOSTLY_Q8_0, "q8_0": gguf.LlamaFileType.MOSTLY_Q8_0,
"q4_0": gguf.LlamaFileType.MOSTLY_Q4_0,
"q4_1": gguf.LlamaFileType.MOSTLY_Q4_1,
"q5_0": gguf.LlamaFileType.MOSTLY_Q5_0,
"q5_1": gguf.LlamaFileType.MOSTLY_Q5_1,
"auto": gguf.LlamaFileType.GUESSED, "auto": gguf.LlamaFileType.GUESSED,
} }