convert_hf_to_gguf: rwkv: Avoid using `eval
`
Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
This commit is contained in:
parent
8bc1f9ae80
commit
18decea3ed
1 changed files with 8 additions and 5 deletions
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import ast
|
||||||
import logging
|
import logging
|
||||||
import argparse
|
import argparse
|
||||||
import contextlib
|
import contextlib
|
||||||
|
@ -2730,12 +2731,14 @@ class RwkvModel(Model):
|
||||||
with open(self.dir_model / "rwkv_vocab_v20230424.txt", "r", encoding="utf-8") as f:
|
with open(self.dir_model / "rwkv_vocab_v20230424.txt", "r", encoding="utf-8") as f:
|
||||||
lines = f.readlines()
|
lines = f.readlines()
|
||||||
for line in lines:
|
for line in lines:
|
||||||
x = eval(line[line.index(' '):line.rindex(' ')])
|
parts = line.split(' ')
|
||||||
x = x.encode("utf-8") if isinstance(x, str) else x
|
assert len(parts) >= 3
|
||||||
assert isinstance(x, bytes)
|
_, token, token_len = int(parts[0]), ast.literal_eval(' '.join(parts[1:-1])), int(parts[-1])
|
||||||
assert len(x) == int(line[line.rindex(' '):])
|
token = token.encode("utf-8") if isinstance(token, str) else token
|
||||||
|
assert isinstance(token, bytes)
|
||||||
|
assert len(token) == token_len
|
||||||
token_text: str = ""
|
token_text: str = ""
|
||||||
for b in x:
|
for b in token:
|
||||||
token_text += f"\\x{b:02x}"
|
token_text += f"\\x{b:02x}"
|
||||||
tokens.append(token_text.encode("utf-8"))
|
tokens.append(token_text.encode("utf-8"))
|
||||||
toktypes.append(gguf.TokenType.NORMAL)
|
toktypes.append(gguf.TokenType.NORMAL)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue