unicode : straighten tables

This commit is contained in:
Georgi Gerganov 2024-03-11 11:53:17 +02:00
parent de0929ae7d
commit e607540ec9
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
2 changed files with 1386 additions and 507 deletions

File diff suppressed because it is too large Load diff

View file

@ -5,13 +5,13 @@
#include <vector> #include <vector>
#define CODEPOINT_TYPE_UNIDENTIFIED 0 #define CODEPOINT_TYPE_UNIDENTIFIED 0
#define CODEPOINT_TYPE_DIGIT 1 #define CODEPOINT_TYPE_DIGIT 1
#define CODEPOINT_TYPE_LETTER 2 #define CODEPOINT_TYPE_LETTER 2
#define CODEPOINT_TYPE_WHITESPACE 3 #define CODEPOINT_TYPE_WHITESPACE 3
#define CODEPOINT_TYPE_ACCENT_MARK 4 #define CODEPOINT_TYPE_ACCENT_MARK 4
#define CODEPOINT_TYPE_PUNCTUATION 5 #define CODEPOINT_TYPE_PUNCTUATION 5
#define CODEPOINT_TYPE_SYMBOL 6 #define CODEPOINT_TYPE_SYMBOL 6
#define CODEPOINT_TYPE_CONTROL 7 #define CODEPOINT_TYPE_CONTROL 7
// TODO: remove // TODO: remove
const std::multimap<uint32_t, uint32_t> & unicode_nfd_map(); const std::multimap<uint32_t, uint32_t> & unicode_nfd_map();