From 9307c3fd46470658d8432b8de193c66738c92fd3 Mon Sep 17 00:00:00 2001 From: jaime-m-p <> Date: Tue, 9 Jul 2024 00:59:29 +0200 Subject: [PATCH] Test l/r-strip for more than 4 spaces --- tests/test-tokenizer-random.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-tokenizer-random.py b/tests/test-tokenizer-random.py index 11baf6989..5b31cfc9c 100644 --- a/tests/test-tokenizer-random.py +++ b/tests/test-tokenizer-random.py @@ -273,7 +273,7 @@ def generator_apostrophe() -> Iterator[str]: def generator_added_lr_strip(tokenizer: TokenizerGroundtruth) -> Iterator[str]: - WHITESPACES = ["", " ", " ", "\n", "\r\n", "\n\n", "\t", "\t\t"] + WHITESPACES = ["", " ", " ", "\n", "\r\n", "\n\n", "\t", "\t\t", " "] all_tokens = list(sorted(set(tokenizer.special_tokens + tokenizer.added_tokens))) for token in all_tokens: for lstrip in WHITESPACES: