fix: add real values

This commit is contained in:
Joan Martinez 2024-05-07 14:44:09 +02:00
parent 88e943f921
commit d6edc627db
3 changed files with 7 additions and 34 deletions

File diff suppressed because one or more lines are too long

View file

@ -13,6 +13,7 @@
#include <vector>
#include <locale>
#include <codecvt>
#include <algorithm>
static std::string unicode_cpts_to_utf8(const std::vector<uint32_t> & cps) {
std::string result;
@ -470,9 +471,7 @@ std::string unicode_cpt_to_utf8(uint32_t cp) {
}
// Function to sort subsequences based on canonical class
std::vector<uint32_t> sort_by_canonical_class(const std::vector<uint32_t> & cpts) {
std::vector<uint32_t> subsequence;
std::vector<uint32_t> result;
std::vector<uint32_t> sort_by_canonical_class(std::vector<uint32_t> & cpts) {
auto compareByCanonicalClass = [&](const uint32_t& a, const uint32_t& b) {
auto cc_a_it = unicode_canonical_class.find(a);
if (cc_a_it != unicode_canonical_class.end()) {
@ -485,33 +484,9 @@ std::vector<uint32_t> sort_by_canonical_class(const std::vector<uint32_t> & cpts
return false;
};
for (const auto& cpt : cpts) {
auto it = unicode_canonical_class.find(cpt);
if (it != unicode_canonical_class.end()) {
if (it->second > 0) {
subsequence.push_back(cpt);
} else {
if (!subsequence.empty()) {
sort(subsequence.begin(), subsequence.end(), compareByCanonicalClass);
for (const auto& codepoint : subsequence) {
result.push_back(codepoint);
}
subsequence.clear();
}
result.push_back(cpt);
}
}
}
if (!subsequence.empty()) {
sort(subsequence.begin(), subsequence.end(), compareByCanonicalClass);
for (const auto& codepoint : subsequence) {
result.push_back(codepoint);
}
}
return result;
// Sort the sequence using the custom comparator function
sort(cpts.begin(), cpts.end(), compareByCanonicalClass);
return cpts;
}
std::vector<uint32_t> canonical_decomposition_cpts(std::vector<uint32_t> & cpts, const std::vector<uint32_t>::iterator& cpt_begin, const std::vector<uint32_t>::iterator& cpt_end) {

View file

@ -18,6 +18,7 @@ std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8);
std::vector<uint32_t> unicode_cpts_normalize_nfd(std::vector<uint32_t> & cpts);
std::vector<uint32_t> canonical_decomposition_cpts(std::vector<uint32_t> & cpts, const std::vector<uint32_t>::iterator& cpt_begin, const std::vector<uint32_t>::iterator& cpt_end);
std::vector<uint32_t> sort_by_canonical_class(const std::vector<uint32_t> & cpts);
int unicode_cpt_type(uint32_t cp);
int unicode_cpt_type(const std::string & utf8);