Update log to only print when input and output characters are different
This commit is contained in:
parent
d8d2f370dc
commit
92e41ec4b9
1 changed files with 3 additions and 1 deletions
|
@ -529,8 +529,10 @@ class Model:
|
|||
# used for `\n` / `\t` have been manually added in the added tokens
|
||||
# To avoid unexpected issues - we make sure to encode single-char tokens
|
||||
if len(token) == 1:
|
||||
logger.info("Ecode-Decode special characters using AutoTokenizer")
|
||||
previous_token = token
|
||||
token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False))
|
||||
if previous_token != token:
|
||||
logger.info(f"{repr(previous_token)} is encoded and decoded back to {repr(token)} using AutoTokenizer")
|
||||
|
||||
if tokenizer.added_tokens_decoder[i].special or self.does_token_look_special(token):
|
||||
toktypes.append(gguf.TokenType.CONTROL)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue