Update bruteforce test: fix binary search
This commit is contained in:
parent
2ca313830e
commit
80f41234e4
1 changed files with 8 additions and 6 deletions
|
@ -513,14 +513,16 @@ def compare_tokenizers(tokenizer1: TokenizerGroundtruth, tokenizer2: TokenizerLl
|
|||
a, b = 0, len(text)
|
||||
step = b
|
||||
while step > 1:
|
||||
step = step // 2
|
||||
if not _compare(text[a : b - step])[0]:
|
||||
b = b - step
|
||||
step = (step + 1) // 2
|
||||
t = max(a, b - step)
|
||||
if not _compare(text[a : t])[0]:
|
||||
b = t
|
||||
step = b
|
||||
while step > 1:
|
||||
step = step // 2
|
||||
if not _compare(text[a + step : b])[0]:
|
||||
a = a + step
|
||||
step = (step + 1) // 2
|
||||
t = min(a + step, b)
|
||||
if not _compare(text[t : b])[0]:
|
||||
a = t
|
||||
ok, ids1, ids2, text1, text2 = _compare(text[a : b])
|
||||
assert a <= b and not ok
|
||||
# show unique failing texts differences
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue