Add special token handling to conver script
This commit is contained in:
parent
d9c0332323
commit
fa2cd7e7b9
1 changed files with 4 additions and 1 deletions
|
@ -120,7 +120,10 @@ for i in range(vocab_size):
|
|||
toktypes.append(gguf.TokenType.USER_DEFINED)
|
||||
elif reverse_vocab[i] in added_vocab:
|
||||
tokens.append(reverse_vocab[i])
|
||||
toktypes.append(gguf.TokenType.USER_DEFINED)
|
||||
if tokenizer.added_tokens_decoder[i].special:
|
||||
toktypes.append(gguf.TokenType.CONTROL)
|
||||
else:
|
||||
toktypes.append(gguf.TokenType.USER_DEFINED)
|
||||
else:
|
||||
tokens.append(reverse_vocab[i])
|
||||
toktypes.append(gguf.TokenType.NORMAL)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue