Merge 4adb77f7bc
into b69a480af4
This commit is contained in:
commit
2fab55ec0c
1 changed files with 29 additions and 2 deletions
|
@ -318,9 +318,30 @@ class Model:
|
||||||
data_qtype = gguf.GGMLQuantizationType.BF16
|
data_qtype = gguf.GGMLQuantizationType.BF16
|
||||||
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0:
|
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0:
|
||||||
data_qtype = gguf.GGMLQuantizationType.Q8_0
|
data_qtype = gguf.GGMLQuantizationType.Q8_0
|
||||||
|
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q4_0:
|
||||||
|
data_qtype = gguf.GGMLQuantizationType.Q4_0
|
||||||
|
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q4_1:
|
||||||
|
data_qtype = gguf.GGMLQuantizationType.Q4_1
|
||||||
|
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q5_0:
|
||||||
|
data_qtype = gguf.GGMLQuantizationType.Q5_0
|
||||||
|
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q5_1:
|
||||||
|
data_qtype = gguf.GGMLQuantizationType.Q5_1
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown file type: {self.ftype.name}")
|
raise ValueError(f"Unknown file type: {self.ftype.name}")
|
||||||
|
|
||||||
|
if data_qtype in [
|
||||||
|
gguf.GGMLQuantizationType.Q5_1, gguf.LlamaFileType.MOSTLY_Q5_0,
|
||||||
|
gguf.GGMLQuantizationType.Q4_1, gguf.LlamaFileType.MOSTLY_Q4_0,
|
||||||
|
]:
|
||||||
|
logger.warning("\n")
|
||||||
|
logger.warning("**************************************************************************************")
|
||||||
|
logger.warning("** WARNING: when quantizing to `Q4_0`, `Q4_1`, `Q5_0`, or `Q5_1`")
|
||||||
|
logger.warning("** is not equivalent to using `llama-quantize`")
|
||||||
|
logger.warning("** `llama-quantize` uses `Q4_K` and `Q6_K` for the token embeddings")
|
||||||
|
logger.warning("** but this code not")
|
||||||
|
logger.warning("**************************************************************************************")
|
||||||
|
logger.warning("\n")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
data = gguf.quants.quantize(data, data_qtype)
|
data = gguf.quants.quantize(data, data_qtype)
|
||||||
except gguf.QuantError as e:
|
except gguf.QuantError as e:
|
||||||
|
@ -4011,8 +4032,10 @@ def parse_args() -> argparse.Namespace:
|
||||||
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
|
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "auto"], default="f16",
|
"--outtype", type=str, choices=["f32", "f16", "bf16", "q8_0", "q4_0", "q4_1", "q5_0", "q5_1", "auto"], default="f16",
|
||||||
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, q8_0 for Q8_0, auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
|
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16, "
|
||||||
|
"q8_0 for Q8_0, limited: q4_0 for Q4_0, q4_1 for Q4_1, q5_0 for Q5_0, q5_1 for Q5_1,"
|
||||||
|
" auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--bigendian", action="store_true",
|
"--bigendian", action="store_true",
|
||||||
|
@ -4099,6 +4122,10 @@ def main() -> None:
|
||||||
"f16": gguf.LlamaFileType.MOSTLY_F16,
|
"f16": gguf.LlamaFileType.MOSTLY_F16,
|
||||||
"bf16": gguf.LlamaFileType.MOSTLY_BF16,
|
"bf16": gguf.LlamaFileType.MOSTLY_BF16,
|
||||||
"q8_0": gguf.LlamaFileType.MOSTLY_Q8_0,
|
"q8_0": gguf.LlamaFileType.MOSTLY_Q8_0,
|
||||||
|
"q4_0": gguf.LlamaFileType.MOSTLY_Q4_0,
|
||||||
|
"q4_1": gguf.LlamaFileType.MOSTLY_Q4_1,
|
||||||
|
"q5_0": gguf.LlamaFileType.MOSTLY_Q5_0,
|
||||||
|
"q5_1": gguf.LlamaFileType.MOSTLY_Q5_1,
|
||||||
"auto": gguf.LlamaFileType.GUESSED,
|
"auto": gguf.LlamaFileType.GUESSED,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue