Resolved issues

This commit is contained in:
Kazim Abrar Mahi 2024-03-23 14:38:06 +06:00 committed by Georgi Gerganov
parent 54f93eb50b
commit 1c924e4b35
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
5 changed files with 18 additions and 18 deletions

View file

@ -3,6 +3,8 @@
#include <cstdint>
#include <string>
#include <vector>
#include <locale>
#include <codecvt>
#define CODEPOINT_TYPE_UNIDENTIFIED 0
#define CODEPOINT_TYPE_DIGIT 1
@ -30,5 +32,16 @@ std::vector<std::wstring> get_gpt2_regex();
std::vector<std::wstring> get_deepseek_coder_regex();
std::vector<std::wstring> get_deepseek_llm_regex();
inline std::wstring from_utf8(const std::string & s);
inline std::string to_utf8(const std::wstring & ws);
inline std::wstring from_utf8(const std::string & s)
{
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
return conv.from_bytes(s);
}
inline std::string to_utf8(const std::wstring & ws)
{
// code to convert from utf32/utf16 to utf8
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> converter;
std::string utf8 = converter.to_bytes(ws);
return utf8;
}