Test l/r-strip for more than 4 spaces

This commit is contained in:
jaime-m-p 2024-07-09 00:59:29 +02:00
parent e8b3955346
commit 9307c3fd46

View file

@ -273,7 +273,7 @@ def generator_apostrophe() -> Iterator[str]:
def generator_added_lr_strip(tokenizer: TokenizerGroundtruth) -> Iterator[str]:
WHITESPACES = ["", " ", " ", "\n", "\r\n", "\n\n", "\t", "\t\t"]
WHITESPACES = ["", " ", " ", "\n", "\r\n", "\n\n", "\t", "\t\t", " "]
all_tokens = list(sorted(set(tokenizer.special_tokens + tokenizer.added_tokens)))
for token in all_tokens:
for lstrip in WHITESPACES: