convert-hf : for T5 skip both decoder.embed_tokens and encoder.embed_tokens tensors (they are duplicates of shared tensor)

This commit is contained in:
Stanisław Szymczyk 2024-06-23 15:52:54 +02:00
parent 47a0a0cdff
commit 98931f87d4

View file

@ -2837,9 +2837,10 @@ class T5Model(Model):
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
del bid # unused del bid # unused
# flan-t5-xxl contains "decoder.embed_tokens.weight" tensor that is the same as "shared.weight" tensor # Sometimes T5 and Flan-T5 based models contain "encoder.embed_tokens.weight" tensor or
# To prevent errors caused by an unnecessary unmapped tensor, skip "decoder.embed_tokens.weight". # "decoder.embed_tokens.weight" tensors that are duplicates of "shared.weight" tensor
if name == "decoder.embed_tokens.weight": # To prevent errors caused by an unnecessary unmapped tensor, skip both of them and use only "shared.weight".
if name == "decoder.embed_tokens.weight" or name == "encoder.embed_tokens.weight":
logger.debug(f"Skipping tensor {name!r} in safetensors so that convert can end normally.") logger.debug(f"Skipping tensor {name!r} in safetensors so that convert can end normally.")
return [] return []