diff --git a/unicode.cpp b/unicode.cpp index 0394f485f..3214387e5 100644 --- a/unicode.cpp +++ b/unicode.cpp @@ -357,6 +357,7 @@ static std::vector unicode_gpt2_regex_preprocess(const std::wstring & wt token += utf_char; } } + start += offset; } return bpe_offsets;