From 36d983262e4558c670fa94d42641abda37bf3b1c Mon Sep 17 00:00:00 2001 From: Kazim Abrar Mahi Date: Wed, 17 Apr 2024 07:40:40 +0600 Subject: [PATCH] Fixed issue with gpt2 regex custom preprocessor --- unicode.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/unicode.cpp b/unicode.cpp index 0394f485f..3214387e5 100644 --- a/unicode.cpp +++ b/unicode.cpp @@ -357,6 +357,7 @@ static std::vector unicode_gpt2_regex_preprocess(const std::wstring & wt token += utf_char; } } + start += offset; } return bpe_offsets;