From 013a08f72593de5c56ec7150ad7f36f79d2f63cf Mon Sep 17 00:00:00 2001 From: Douglas Hanley Date: Wed, 24 Jul 2024 01:50:16 -0500 Subject: [PATCH] clean up and simplify XLMRoberta conversion --- convert_hf_to_gguf.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index b6075b828..69e9b534e 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2518,7 +2518,7 @@ class XLMRobertaModel(BertModel): tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)] scores: list[float] = [-10000.0] * vocab_size - toktypes: list[int] = [SentencePieceTokenTypes.UNKNOWN] * vocab_size + toktypes: list[int] = [SentencePieceTokenTypes.UNUSED] * vocab_size for token_id in range(tokenizer.vocab_size()): piece = tokenizer.IdToPiece(token_id) @@ -2549,7 +2549,7 @@ class XLMRobertaModel(BertModel): # realign tokens (see HF tokenizer code) tokens = [b'', b'', b'', b''] + tokens[3:-1] - scores = [0.0, -10000.0, 0.0, -10000.0] + scores[3:-1] + scores = [0.0, 0.0, 0.0, 0.0] + scores[3:-1] toktypes = [ SentencePieceTokenTypes.CONTROL, SentencePieceTokenTypes.CONTROL, @@ -2577,15 +2577,12 @@ class XLMRobertaModel(BertModel): def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: # position embeddings start at pad_token_id + 1, so just chop down the weight tensor if name == "embeddings.position_embeddings.weight": - del bid # unused - if self._position_offset is not None: data_torch = data_torch[self._position_offset:,:] - return [(self.map_tensor_name(name), data_torch)] - return super().modify_tensors(data_torch, name, bid) + @Model.register("GemmaForCausalLM") class GemmaModel(Model): model_arch = gguf.MODEL_ARCH.GEMMA