convert_hf_to_gguf: rwkv tokenizer: Don't escape sequences manually

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
This commit is contained in:
Molly Sophia 2024-08-12 09:08:30 +08:00
parent 18decea3ed
commit 7f2e370fa2

View file

@ -2737,9 +2737,7 @@ class RwkvModel(Model):
token = token.encode("utf-8") if isinstance(token, str) else token token = token.encode("utf-8") if isinstance(token, str) else token
assert isinstance(token, bytes) assert isinstance(token, bytes)
assert len(token) == token_len assert len(token) == token_len
token_text: str = "" token_text: str = str(token)[2:-1]
for b in token:
token_text += f"\\x{b:02x}"
tokens.append(token_text.encode("utf-8")) tokens.append(token_text.encode("utf-8"))
toktypes.append(gguf.TokenType.NORMAL) toktypes.append(gguf.TokenType.NORMAL)
remainder = vocab_size - len(tokens) remainder = vocab_size - len(tokens)