From 37cf135cb012a5f122dcc37aee5ddd4a57389a5c Mon Sep 17 00:00:00 2001 From: goerch Date: Mon, 18 Sep 2023 21:15:01 +0200 Subject: [PATCH] Fix MSVC Unicode BOM problem --- tests/CMakeLists.txt | 2 -- unicode.h | 14 +++++++------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c78b96842..227dd6659 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,7 +1,6 @@ function(llama_build_executable source) get_filename_component(TEST_TARGET ${source} NAME_WE) add_executable(${TEST_TARGET} ${source}) - target_compile_definitions(${TEST_TARGET} PUBLIC UNICODE _UNICODE) install(TARGETS ${TEST_TARGET} RUNTIME) target_link_libraries(${TEST_TARGET} PRIVATE llama common) endfunction() @@ -14,7 +13,6 @@ endfunction() function(llama_build_and_test_executable source) get_filename_component(TEST_TARGET ${source} NAME_WE) add_executable(${TEST_TARGET} ${source}) - target_compile_definitions(${TEST_TARGET} PUBLIC UNICODE _UNICODE) install(TARGETS ${TEST_TARGET} RUNTIME) target_link_libraries(${TEST_TARGET} PRIVATE llama common) add_test(NAME ${TEST_TARGET} COMMAND $ ${ARGN}) diff --git a/unicode.h b/unicode.h index 33488a690..99bf8137c 100644 --- a/unicode.h +++ b/unicode.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include #include @@ -400,13 +400,13 @@ static int codepoint_type(std::string utf8) { static std::string bytes_to_unicode_bpe(uint8_t byte) { static std::unordered_map map; - for (int ch = U'!'; ch <= U'~'; ++ch) { + for (int ch = u'!'; ch <= u'~'; ++ch) { map[ch] = codepoint_to_utf8(ch); } - for (int ch = U'¡'; ch <= U'¬'; ++ch) { + for (int ch = u'¡'; ch <= u'¬'; ++ch) { map[ch] = codepoint_to_utf8(ch); } - for (int ch = U'®'; ch <= U'ÿ'; ++ch) { + for (int ch = u'®'; ch <= u'ÿ'; ++ch) { map[ch] = codepoint_to_utf8(ch); } auto n = 0; @@ -422,13 +422,13 @@ static std::string bytes_to_unicode_bpe(uint8_t byte) { static uint8_t unicode_to_bytes_bpe(const std::string& utf8) { static std::unordered_map map; if (map.size() == 0) { - for (int ch = U'!'; ch <= U'~'; ++ch) { + for (int ch = u'!'; ch <= u'~'; ++ch) { map[codepoint_to_utf8(ch)] = ch; } - for (int ch = U'¡'; ch <= U'¬'; ++ch) { + for (int ch = u'¡'; ch <= u'¬'; ++ch) { map[codepoint_to_utf8(ch)] = ch; } - for (int ch = U'®'; ch <= U'ÿ'; ++ch) { + for (int ch = u'®'; ch <= u'ÿ'; ++ch) { map[codepoint_to_utf8(ch)] = ch; } auto n = 0;