convert_hf_to_gguf: rwkv tokenizer: Don't escape sequences manually

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
2024-08-12 09:08:30 +08:00 · 2024-08-12 09:08:30 +08:00 · 7f2e370fa2
commit 7f2e370fa2
parent 18decea3ed
1 changed files with 1 additions and 3 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -2737,9 +2737,7 @@ class RwkvModel(Model):
                token = token.encode("utf-8") if isinstance(token, str) else token
                assert isinstance(token, bytes)
                assert len(token) == token_len
-                token_text: str = ""
+                token_text: str = str(token)[2:-1]
                for b in token:
                    token_text += f"\\x{b:02x}"
                tokens.append(token_text.encode("utf-8"))
                toktypes.append(gguf.TokenType.NORMAL)
        remainder = vocab_size - len(tokens)