Resolved issues
This commit is contained in:
parent
54f93eb50b
commit
1c924e4b35
5 changed files with 18 additions and 18 deletions
17
unicode.h
17
unicode.h
|
@ -3,6 +3,8 @@
|
|||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <locale>
|
||||
#include <codecvt>
|
||||
|
||||
#define CODEPOINT_TYPE_UNIDENTIFIED 0
|
||||
#define CODEPOINT_TYPE_DIGIT 1
|
||||
|
@ -30,5 +32,16 @@ std::vector<std::wstring> get_gpt2_regex();
|
|||
std::vector<std::wstring> get_deepseek_coder_regex();
|
||||
std::vector<std::wstring> get_deepseek_llm_regex();
|
||||
|
||||
inline std::wstring from_utf8(const std::string & s);
|
||||
inline std::string to_utf8(const std::wstring & ws);
|
||||
inline std::wstring from_utf8(const std::string & s)
|
||||
{
|
||||
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
|
||||
return conv.from_bytes(s);
|
||||
}
|
||||
|
||||
inline std::string to_utf8(const std::wstring & ws)
|
||||
{
|
||||
// code to convert from utf32/utf16 to utf8
|
||||
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> converter;
|
||||
std::string utf8 = converter.to_bytes(ws);
|
||||
return utf8;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue