wpm : portable unicode tolower (#6305)

Also use C locale for ispunct/isspace, and split unicode-data.cpp from unicode.cpp.
This commit is contained in:
Jared Van Bortel 2024-03-26 17:46:21 -04:00 committed by GitHub
parent 557410b8f0
commit 32c8486e1f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 1699 additions and 1425 deletions

16
unicode-data.h Normal file
View file

@ -0,0 +1,16 @@
#pragma once
#include <cstdint>
#include <map>
#include <utility>
#include <vector>
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_digit;
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_letter;
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_whitespace;
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_accent_mark;
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_punctuation;
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_symbol;
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_control;
extern const std::multimap<uint32_t, uint32_t> unicode_map_nfd;
extern const std::map<char32_t, char32_t> unicode_map_lowercase;