Update bruteforce test: fix binary search
This commit is contained in:
parent
2ca313830e
commit
80f41234e4
1 changed files with 8 additions and 6 deletions
|
@ -513,14 +513,16 @@ def compare_tokenizers(tokenizer1: TokenizerGroundtruth, tokenizer2: TokenizerLl
|
||||||
a, b = 0, len(text)
|
a, b = 0, len(text)
|
||||||
step = b
|
step = b
|
||||||
while step > 1:
|
while step > 1:
|
||||||
step = step // 2
|
step = (step + 1) // 2
|
||||||
if not _compare(text[a : b - step])[0]:
|
t = max(a, b - step)
|
||||||
b = b - step
|
if not _compare(text[a : t])[0]:
|
||||||
|
b = t
|
||||||
step = b
|
step = b
|
||||||
while step > 1:
|
while step > 1:
|
||||||
step = step // 2
|
step = (step + 1) // 2
|
||||||
if not _compare(text[a + step : b])[0]:
|
t = min(a + step, b)
|
||||||
a = a + step
|
if not _compare(text[t : b])[0]:
|
||||||
|
a = t
|
||||||
ok, ids1, ids2, text1, text2 = _compare(text[a : b])
|
ok, ids1, ids2, text1, text2 = _compare(text[a : b])
|
||||||
assert a <= b and not ok
|
assert a <= b and not ok
|
||||||
# show unique failing texts differences
|
# show unique failing texts differences
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue