unicode : clean-up
This commit is contained in:
parent
d63cc9068b
commit
e972e6cbf8
3 changed files with 53 additions and 62 deletions
|
@ -1,4 +1,4 @@
|
|||
#include "unicode-data.h"
|
||||
#include "unicode-data.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
|
@ -1649,7 +1649,3 @@ const std::map<char32_t, char32_t> unicode_map_lowercase = {
|
|||
{0x1E917, 0x1E939}, {0x1E918, 0x1E93A}, {0x1E919, 0x1E93B}, {0x1E91A, 0x1E93C}, {0x1E91B, 0x1E93D}, {0x1E91C, 0x1E93E},
|
||||
{0x1E91D, 0x1E93F}, {0x1E91E, 0x1E940}, {0x1E91F, 0x1E941}, {0x1E920, 0x1E942}, {0x1E921, 0x1E943},
|
||||
};
|
||||
|
||||
const std::set<std::string> unicode_regex_with_custom_preprocessor = {
|
||||
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)"
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue