This commit is contained in:
Robert Sinclair 2024-08-08 08:28:15 -04:00 committed by GitHub
commit e69b098133
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -63,6 +63,7 @@ class Model:
model_name: str | None model_name: str | None
metadata_override: Path | None metadata_override: Path | None
dir_model_card: Path dir_model_card: Path
z: bool
# subclasses should define this! # subclasses should define this!
model_arch: gguf.MODEL_ARCH model_arch: gguf.MODEL_ARCH
@ -70,7 +71,7 @@ class Model:
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool = False, def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool = False,
use_temp_file: bool = False, eager: bool = False, use_temp_file: bool = False, eager: bool = False,
metadata_override: Path | None = None, model_name: str | None = None, metadata_override: Path | None = None, model_name: str | None = None,
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False): split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False, z: bool = False):
if type(self) is Model: if type(self) is Model:
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated") raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
@ -92,6 +93,7 @@ class Model:
self.metadata_override = metadata_override self.metadata_override = metadata_override
self.model_name = model_name self.model_name = model_name
self.dir_model_card = dir_model # overridden in convert_lora_to_gguf.py self.dir_model_card = dir_model # overridden in convert_lora_to_gguf.py
self.z = z
# Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type # Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type
if self.ftype == gguf.LlamaFileType.GUESSED: if self.ftype == gguf.LlamaFileType.GUESSED:
@ -319,7 +321,7 @@ class Model:
assert data.dtype == np.uint16 assert data.dtype == np.uint16
data_qtype = gguf.GGMLQuantizationType.BF16 data_qtype = gguf.GGMLQuantizationType.BF16
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0 and gguf.can_quantize_to_q8_0(data): elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0 and gguf.can_quantize_to_q8_0(data) and (self.z and new_name not in (self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD))):
data = gguf.quantize_q8_0(data) data = gguf.quantize_q8_0(data)
assert data.dtype == np.uint8 assert data.dtype == np.uint8
data_qtype = gguf.GGMLQuantizationType.Q8_0 data_qtype = gguf.GGMLQuantizationType.Q8_0
@ -3732,6 +3734,10 @@ def parse_args() -> argparse.Namespace:
"--metadata", type=Path, "--metadata", type=Path,
help="Specify the path for an authorship metadata override file" help="Specify the path for an authorship metadata override file"
) )
parser.add_argument(
"--z", action="store_true",
help="Keep output and embed tensors at F16"
)
return parser.parse_args() return parser.parse_args()
@ -3806,7 +3812,8 @@ def main() -> None:
metadata_override=args.metadata, model_name=args.model_name, metadata_override=args.metadata, model_name=args.model_name,
split_max_tensors=args.split_max_tensors, split_max_tensors=args.split_max_tensors,
split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run, split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run,
small_first_shard=args.no_tensor_first_split) small_first_shard=args.no_tensor_first_split,
z=args.z)
if args.vocab_only: if args.vocab_only:
logger.info("Exporting model vocab...") logger.info("Exporting model vocab...")