Adding unicode regex function

This commit is contained in:
Kazim Abrar Mahi 2024-04-16 01:52:33 +06:00 committed by Georgi Gerganov
parent a5710a4101
commit 7e308ed212
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
4 changed files with 16 additions and 41 deletions

View file

@ -28,21 +28,5 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8);
char32_t unicode_tolower(char32_t cp);
std::vector<std::wstring> get_gpt2_regex();
std::vector<std::wstring> get_deepseek_coder_regex();
std::vector<std::wstring> get_deepseek_llm_regex();
inline std::wstring from_utf8(const std::string & s)
{
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
return conv.from_bytes(s);
}
inline std::string to_utf8(const std::wstring & ws)
{
// code to convert from utf32/utf16 to utf8
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> converter;
std::string utf8 = converter.to_bytes(ws);
return utf8;
}
bool unicode_wregex_exists(const std::string & regex);
std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::wstring> & regex_exprs);