Add comment explaining the logic behind the if statement

This commit is contained in:
Billel Mokeddem 2024-12-18 05:46:07 +00:00
parent fc055407b7
commit b3d022aa1a

View file

@ -527,6 +527,7 @@ class Model:
if token in added_vocab:
# We need to manually encode and decode the added tokens in case special characters
# used for `\n` / `\t` have been manually added in the added tokens
# To avoid unexpected issues - we make sure to encode single-char tokens
if len(token) == 1:
token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False))