Fix MSVC Unicode BOM problem

This commit is contained in:
goerch 2023-09-18 21:15:01 +02:00
parent 77704232b2
commit 37cf135cb0
2 changed files with 7 additions and 9 deletions

View file

@ -1,7 +1,6 @@
function(llama_build_executable source) function(llama_build_executable source)
get_filename_component(TEST_TARGET ${source} NAME_WE) get_filename_component(TEST_TARGET ${source} NAME_WE)
add_executable(${TEST_TARGET} ${source}) add_executable(${TEST_TARGET} ${source})
target_compile_definitions(${TEST_TARGET} PUBLIC UNICODE _UNICODE)
install(TARGETS ${TEST_TARGET} RUNTIME) install(TARGETS ${TEST_TARGET} RUNTIME)
target_link_libraries(${TEST_TARGET} PRIVATE llama common) target_link_libraries(${TEST_TARGET} PRIVATE llama common)
endfunction() endfunction()
@ -14,7 +13,6 @@ endfunction()
function(llama_build_and_test_executable source) function(llama_build_and_test_executable source)
get_filename_component(TEST_TARGET ${source} NAME_WE) get_filename_component(TEST_TARGET ${source} NAME_WE)
add_executable(${TEST_TARGET} ${source}) add_executable(${TEST_TARGET} ${source})
target_compile_definitions(${TEST_TARGET} PUBLIC UNICODE _UNICODE)
install(TARGETS ${TEST_TARGET} RUNTIME) install(TARGETS ${TEST_TARGET} RUNTIME)
target_link_libraries(${TEST_TARGET} PRIVATE llama common) target_link_libraries(${TEST_TARGET} PRIVATE llama common)
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN}) add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})

View file

@ -1,4 +1,4 @@
#pragma once #pragma once
#include <cassert> #include <cassert>
#include <stdexcept> #include <stdexcept>
@ -400,13 +400,13 @@ static int codepoint_type(std::string utf8) {
static std::string bytes_to_unicode_bpe(uint8_t byte) { static std::string bytes_to_unicode_bpe(uint8_t byte) {
static std::unordered_map<uint8_t, std::string> map; static std::unordered_map<uint8_t, std::string> map;
for (int ch = U'!'; ch <= U'~'; ++ch) { for (int ch = u'!'; ch <= u'~'; ++ch) {
map[ch] = codepoint_to_utf8(ch); map[ch] = codepoint_to_utf8(ch);
} }
for (int ch = U'¡'; ch <= U'¬'; ++ch) { for (int ch = u'¡'; ch <= u'¬'; ++ch) {
map[ch] = codepoint_to_utf8(ch); map[ch] = codepoint_to_utf8(ch);
} }
for (int ch = U'®'; ch <= U'ÿ'; ++ch) { for (int ch = u'®'; ch <= u'ÿ'; ++ch) {
map[ch] = codepoint_to_utf8(ch); map[ch] = codepoint_to_utf8(ch);
} }
auto n = 0; auto n = 0;
@ -422,13 +422,13 @@ static std::string bytes_to_unicode_bpe(uint8_t byte) {
static uint8_t unicode_to_bytes_bpe(const std::string& utf8) { static uint8_t unicode_to_bytes_bpe(const std::string& utf8) {
static std::unordered_map<std::string, uint8_t> map; static std::unordered_map<std::string, uint8_t> map;
if (map.size() == 0) { if (map.size() == 0) {
for (int ch = U'!'; ch <= U'~'; ++ch) { for (int ch = u'!'; ch <= u'~'; ++ch) {
map[codepoint_to_utf8(ch)] = ch; map[codepoint_to_utf8(ch)] = ch;
} }
for (int ch = U'¡'; ch <= U'¬'; ++ch) { for (int ch = u'¡'; ch <= u'¬'; ++ch) {
map[codepoint_to_utf8(ch)] = ch; map[codepoint_to_utf8(ch)] = ch;
} }
for (int ch = U'®'; ch <= U'ÿ'; ++ch) { for (int ch = u'®'; ch <= u'ÿ'; ++ch) {
map[codepoint_to_utf8(ch)] = ch; map[codepoint_to_utf8(ch)] = ch;
} }
auto n = 0; auto n = 0;