Fix MSVC Unicode BOM problem
This commit is contained in:
parent
77704232b2
commit
37cf135cb0
2 changed files with 7 additions and 9 deletions
|
@ -1,7 +1,6 @@
|
||||||
function(llama_build_executable source)
|
function(llama_build_executable source)
|
||||||
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
||||||
add_executable(${TEST_TARGET} ${source})
|
add_executable(${TEST_TARGET} ${source})
|
||||||
target_compile_definitions(${TEST_TARGET} PUBLIC UNICODE _UNICODE)
|
|
||||||
install(TARGETS ${TEST_TARGET} RUNTIME)
|
install(TARGETS ${TEST_TARGET} RUNTIME)
|
||||||
target_link_libraries(${TEST_TARGET} PRIVATE llama common)
|
target_link_libraries(${TEST_TARGET} PRIVATE llama common)
|
||||||
endfunction()
|
endfunction()
|
||||||
|
@ -14,7 +13,6 @@ endfunction()
|
||||||
function(llama_build_and_test_executable source)
|
function(llama_build_and_test_executable source)
|
||||||
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
||||||
add_executable(${TEST_TARGET} ${source})
|
add_executable(${TEST_TARGET} ${source})
|
||||||
target_compile_definitions(${TEST_TARGET} PUBLIC UNICODE _UNICODE)
|
|
||||||
install(TARGETS ${TEST_TARGET} RUNTIME)
|
install(TARGETS ${TEST_TARGET} RUNTIME)
|
||||||
target_link_libraries(${TEST_TARGET} PRIVATE llama common)
|
target_link_libraries(${TEST_TARGET} PRIVATE llama common)
|
||||||
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
|
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
|
||||||
|
|
14
unicode.h
14
unicode.h
|
@ -1,4 +1,4 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
@ -400,13 +400,13 @@ static int codepoint_type(std::string utf8) {
|
||||||
|
|
||||||
static std::string bytes_to_unicode_bpe(uint8_t byte) {
|
static std::string bytes_to_unicode_bpe(uint8_t byte) {
|
||||||
static std::unordered_map<uint8_t, std::string> map;
|
static std::unordered_map<uint8_t, std::string> map;
|
||||||
for (int ch = U'!'; ch <= U'~'; ++ch) {
|
for (int ch = u'!'; ch <= u'~'; ++ch) {
|
||||||
map[ch] = codepoint_to_utf8(ch);
|
map[ch] = codepoint_to_utf8(ch);
|
||||||
}
|
}
|
||||||
for (int ch = U'¡'; ch <= U'¬'; ++ch) {
|
for (int ch = u'¡'; ch <= u'¬'; ++ch) {
|
||||||
map[ch] = codepoint_to_utf8(ch);
|
map[ch] = codepoint_to_utf8(ch);
|
||||||
}
|
}
|
||||||
for (int ch = U'®'; ch <= U'ÿ'; ++ch) {
|
for (int ch = u'®'; ch <= u'ÿ'; ++ch) {
|
||||||
map[ch] = codepoint_to_utf8(ch);
|
map[ch] = codepoint_to_utf8(ch);
|
||||||
}
|
}
|
||||||
auto n = 0;
|
auto n = 0;
|
||||||
|
@ -422,13 +422,13 @@ static std::string bytes_to_unicode_bpe(uint8_t byte) {
|
||||||
static uint8_t unicode_to_bytes_bpe(const std::string& utf8) {
|
static uint8_t unicode_to_bytes_bpe(const std::string& utf8) {
|
||||||
static std::unordered_map<std::string, uint8_t> map;
|
static std::unordered_map<std::string, uint8_t> map;
|
||||||
if (map.size() == 0) {
|
if (map.size() == 0) {
|
||||||
for (int ch = U'!'; ch <= U'~'; ++ch) {
|
for (int ch = u'!'; ch <= u'~'; ++ch) {
|
||||||
map[codepoint_to_utf8(ch)] = ch;
|
map[codepoint_to_utf8(ch)] = ch;
|
||||||
}
|
}
|
||||||
for (int ch = U'¡'; ch <= U'¬'; ++ch) {
|
for (int ch = u'¡'; ch <= u'¬'; ++ch) {
|
||||||
map[codepoint_to_utf8(ch)] = ch;
|
map[codepoint_to_utf8(ch)] = ch;
|
||||||
}
|
}
|
||||||
for (int ch = U'®'; ch <= U'ÿ'; ++ch) {
|
for (int ch = u'®'; ch <= u'ÿ'; ++ch) {
|
||||||
map[codepoint_to_utf8(ch)] = ch;
|
map[codepoint_to_utf8(ch)] = ch;
|
||||||
}
|
}
|
||||||
auto n = 0;
|
auto n = 0;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue