From 37cf135cb012a5f122dcc37aee5ddd4a57389a5c Mon Sep 17 00:00:00 2001
From: goerch <jhr.walter@t-online.de>
Date: Mon, 18 Sep 2023 21:15:01 +0200
Subject: [PATCH] Fix MSVC Unicode BOM problem

---
 tests/CMakeLists.txt |  2 --
 unicode.h            | 14 +++++++-------
 2 files changed, 7 insertions(+), 9 deletions(-)
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index c78b96842..227dd6659 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,7 +1,6 @@
 function(llama_build_executable source)
     get_filename_component(TEST_TARGET ${source} NAME_WE)
     add_executable(${TEST_TARGET} ${source})
-    target_compile_definitions(${TEST_TARGET} PUBLIC UNICODE _UNICODE)
     install(TARGETS ${TEST_TARGET} RUNTIME)
     target_link_libraries(${TEST_TARGET} PRIVATE llama common)
 endfunction()
@@ -14,7 +13,6 @@ endfunction()
 function(llama_build_and_test_executable source)
     get_filename_component(TEST_TARGET ${source} NAME_WE)
     add_executable(${TEST_TARGET} ${source})
-    target_compile_definitions(${TEST_TARGET} PUBLIC UNICODE _UNICODE)
     install(TARGETS ${TEST_TARGET} RUNTIME)
     target_link_libraries(${TEST_TARGET} PRIVATE llama common)
     add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
diff --git a/unicode.h b/unicode.h
index 33488a690..99bf8137c 100644
--- a/unicode.h
+++ b/unicode.h
@@ -1,4 +1,4 @@
-#pragma once
+﻿#pragma once
 
 #include <cassert>
 #include <stdexcept>
@@ -400,13 +400,13 @@ static int codepoint_type(std::string utf8) {
 
 static std::string bytes_to_unicode_bpe(uint8_t byte) {
     static std::unordered_map<uint8_t, std::string> map;
-    for (int ch = U'!'; ch <= U'~'; ++ch) {
+    for (int ch = u'!'; ch <= u'~'; ++ch) {
         map[ch] = codepoint_to_utf8(ch);
     }
-    for (int ch = U'¡'; ch <= U'¬'; ++ch) {
+    for (int ch = u'¡'; ch <= u'¬'; ++ch) {
         map[ch] = codepoint_to_utf8(ch);
     }
-    for (int ch = U'®'; ch <= U'ÿ'; ++ch) {
+    for (int ch = u'®'; ch <= u'ÿ'; ++ch) {
         map[ch] = codepoint_to_utf8(ch);
     }
     auto n = 0;
@@ -422,13 +422,13 @@ static std::string bytes_to_unicode_bpe(uint8_t byte) {
 static uint8_t unicode_to_bytes_bpe(const std::string& utf8) {
     static std::unordered_map<std::string, uint8_t> map;
     if (map.size() == 0) {
-        for (int ch = U'!'; ch <= U'~'; ++ch) {
+        for (int ch = u'!'; ch <= u'~'; ++ch) {
             map[codepoint_to_utf8(ch)] = ch;
         }
-        for (int ch = U'¡'; ch <= U'¬'; ++ch) {
+        for (int ch = u'¡'; ch <= u'¬'; ++ch) {
             map[codepoint_to_utf8(ch)] = ch;
         }
-        for (int ch = U'®'; ch <= U'ÿ'; ++ch) {
+        for (int ch = u'®'; ch <= u'ÿ'; ++ch) {
             map[codepoint_to_utf8(ch)] = ch;
         }
         auto n = 0;