added and refactored unicode_regex_split and related functions
This commit is contained in:
parent
1c924e4b35
commit
4056dc5b1e
3 changed files with 264 additions and 103 deletions
|
@ -1,10 +1,10 @@
|
|||
#pragma once
|
||||
|
||||
#include <codecvt>
|
||||
#include <cstdint>
|
||||
#include <locale>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <locale>
|
||||
#include <codecvt>
|
||||
|
||||
#define CODEPOINT_TYPE_UNIDENTIFIED 0
|
||||
#define CODEPOINT_TYPE_DIGIT 1
|
||||
|
@ -44,4 +44,5 @@ inline std::string to_utf8(const std::wstring & ws)
|
|||
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> converter;
|
||||
std::string utf8 = converter.to_bytes(ws);
|
||||
return utf8;
|
||||
}
|
||||
}
|
||||
std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::wstring> & regex_exprs);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue