fix: add real values
This commit is contained in:
parent
88e943f921
commit
d6edc627db
3 changed files with 7 additions and 34 deletions
File diff suppressed because one or more lines are too long
35
unicode.cpp
35
unicode.cpp
|
@ -13,6 +13,7 @@
|
|||
#include <vector>
|
||||
#include <locale>
|
||||
#include <codecvt>
|
||||
#include <algorithm>
|
||||
|
||||
static std::string unicode_cpts_to_utf8(const std::vector<uint32_t> & cps) {
|
||||
std::string result;
|
||||
|
@ -470,9 +471,7 @@ std::string unicode_cpt_to_utf8(uint32_t cp) {
|
|||
}
|
||||
|
||||
// Function to sort subsequences based on canonical class
|
||||
std::vector<uint32_t> sort_by_canonical_class(const std::vector<uint32_t> & cpts) {
|
||||
std::vector<uint32_t> subsequence;
|
||||
std::vector<uint32_t> result;
|
||||
std::vector<uint32_t> sort_by_canonical_class(std::vector<uint32_t> & cpts) {
|
||||
auto compareByCanonicalClass = [&](const uint32_t& a, const uint32_t& b) {
|
||||
auto cc_a_it = unicode_canonical_class.find(a);
|
||||
if (cc_a_it != unicode_canonical_class.end()) {
|
||||
|
@ -485,33 +484,9 @@ std::vector<uint32_t> sort_by_canonical_class(const std::vector<uint32_t> & cpts
|
|||
return false;
|
||||
};
|
||||
|
||||
for (const auto& cpt : cpts) {
|
||||
auto it = unicode_canonical_class.find(cpt);
|
||||
if (it != unicode_canonical_class.end()) {
|
||||
if (it->second > 0) {
|
||||
subsequence.push_back(cpt);
|
||||
} else {
|
||||
if (!subsequence.empty()) {
|
||||
sort(subsequence.begin(), subsequence.end(), compareByCanonicalClass);
|
||||
for (const auto& codepoint : subsequence) {
|
||||
result.push_back(codepoint);
|
||||
}
|
||||
subsequence.clear();
|
||||
}
|
||||
|
||||
result.push_back(cpt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!subsequence.empty()) {
|
||||
sort(subsequence.begin(), subsequence.end(), compareByCanonicalClass);
|
||||
for (const auto& codepoint : subsequence) {
|
||||
result.push_back(codepoint);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
// Sort the sequence using the custom comparator function
|
||||
sort(cpts.begin(), cpts.end(), compareByCanonicalClass);
|
||||
return cpts;
|
||||
}
|
||||
|
||||
std::vector<uint32_t> canonical_decomposition_cpts(std::vector<uint32_t> & cpts, const std::vector<uint32_t>::iterator& cpt_begin, const std::vector<uint32_t>::iterator& cpt_end) {
|
||||
|
|
|
@ -18,6 +18,7 @@ std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8);
|
|||
|
||||
std::vector<uint32_t> unicode_cpts_normalize_nfd(std::vector<uint32_t> & cpts);
|
||||
std::vector<uint32_t> canonical_decomposition_cpts(std::vector<uint32_t> & cpts, const std::vector<uint32_t>::iterator& cpt_begin, const std::vector<uint32_t>::iterator& cpt_end);
|
||||
std::vector<uint32_t> sort_by_canonical_class(const std::vector<uint32_t> & cpts);
|
||||
|
||||
int unicode_cpt_type(uint32_t cp);
|
||||
int unicode_cpt_type(const std::string & utf8);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue