From fa2cd7e7b906581cc76bb6e743624c51abe28dee Mon Sep 17 00:00:00 2001 From: Galunid Date: Tue, 24 Oct 2023 12:47:00 +0200 Subject: [PATCH] Add special token handling to conver script --- convert-stablelm-hf-to-gguf.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/convert-stablelm-hf-to-gguf.py b/convert-stablelm-hf-to-gguf.py index bc3c84f2d..3bd8bdda0 100755 --- a/convert-stablelm-hf-to-gguf.py +++ b/convert-stablelm-hf-to-gguf.py @@ -120,7 +120,10 @@ for i in range(vocab_size): toktypes.append(gguf.TokenType.USER_DEFINED) elif reverse_vocab[i] in added_vocab: tokens.append(reverse_vocab[i]) - toktypes.append(gguf.TokenType.USER_DEFINED) + if tokenizer.added_tokens_decoder[i].special: + toktypes.append(gguf.TokenType.CONTROL) + else: + toktypes.append(gguf.TokenType.USER_DEFINED) else: tokens.append(reverse_vocab[i]) toktypes.append(gguf.TokenType.NORMAL)