From 18504b6a3435688f2b6b9dbf76db7949f08e1ab4 Mon Sep 17 00:00:00 2001 From: Bruce MacDonald Date: Mon, 11 Mar 2024 11:13:35 -0400 Subject: [PATCH] handle wide characters in llama_file examples --- common/train.cpp | 21 +++++++++++++++++++ .../convert-llama2c-to-ggml.cpp | 21 +++++++++++++++++++ examples/finetune/finetune.cpp | 21 +++++++++++++++++++ llama.cpp | 21 +++++++++++++++++++ 4 files changed, 84 insertions(+) diff --git a/common/train.cpp b/common/train.cpp index 0dbfd24df..c852131b8 100644 --- a/common/train.cpp +++ b/common/train.cpp @@ -716,7 +716,28 @@ struct llama_file { size_t size; llama_file(const char * fname, const char * mode) { +#ifdef _WIN32 + // temporarily change the locale to the system default to handle Unicode file names + std::string oldLocale = std::setlocale(LC_ALL, nullptr); + std::setlocale(LC_ALL, ""); + + // convert multi-byte string to wide-char string + int wsize = MultiByteToWideChar(CP_UTF8, 0, fname, -1, nullptr, 0); + std::vector wfname(wsize); + MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname.data(), wsize); + + // determine the correct wide-character mode string + std::wstring wmode; + for(; *mode; ++mode) { + wmode += wchar_t(*mode); + } + + fp = _wfopen(wfname.data(), wmode.c_str()); + + std::setlocale(LC_ALL, oldLocale.c_str()); +#else fp = std::fopen(fname, mode); +#endif if (fp == NULL) { size = 0; } else { diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp index 8209dcb64..ebaebf970 100644 --- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp @@ -464,7 +464,28 @@ struct llama_file { size_t size; llama_file(const char * fname, const char * mode) { +#ifdef _WIN32 + // temporarily change the locale to the system default to handle Unicode file names + std::string oldLocale = std::setlocale(LC_ALL, nullptr); + std::setlocale(LC_ALL, ""); + + // convert multi-byte string to wide-char string + int wsize = MultiByteToWideChar(CP_UTF8, 0, fname, -1, nullptr, 0); + std::vector wfname(wsize); + MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname.data(), wsize); + + // determine the correct wide-character mode string + std::wstring wmode; + for(; *mode; ++mode) { + wmode += wchar_t(*mode); + } + + fp = _wfopen(wfname.data(), wmode.c_str()); + + std::setlocale(LC_ALL, oldLocale.c_str()); +#else fp = std::fopen(fname, mode); +#endif if (fp == NULL) { size = 0; } else { diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index 3da5317b3..0a241781d 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -952,7 +952,28 @@ struct llama_file { size_t size; llama_file(const char * fname, const char * mode) { +#ifdef _WIN32 + // temporarily change the locale to the system default to handle Unicode file names + std::string oldLocale = std::setlocale(LC_ALL, nullptr); + std::setlocale(LC_ALL, ""); + + // convert multi-byte string to wide-char string + int wsize = MultiByteToWideChar(CP_UTF8, 0, fname, -1, nullptr, 0); + std::vector wfname(wsize); + MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname.data(), wsize); + + // determine the correct wide-character mode string + std::wstring wmode; + for(; *mode; ++mode) { + wmode += wchar_t(*mode); + } + + fp = _wfopen(wfname.data(), wmode.c_str()); + + std::setlocale(LC_ALL, oldLocale.c_str()); +#else fp = std::fopen(fname, mode); +#endif if (fp == NULL) { size = 0; } else { diff --git a/llama.cpp b/llama.cpp index 4225f9555..a249c59b1 100644 --- a/llama.cpp +++ b/llama.cpp @@ -988,7 +988,28 @@ struct llama_file { size_t size; llama_file(const char * fname, const char * mode) { +#ifdef _WIN32 + // temporarily change the locale to the system default to handle Unicode file names + std::string oldLocale = std::setlocale(LC_ALL, nullptr); + std::setlocale(LC_ALL, ""); + + // convert multi-byte string to wide-char string + int wsize = MultiByteToWideChar(CP_UTF8, 0, fname, -1, nullptr, 0); + std::vector wfname(wsize); + MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname.data(), wsize); + + // determine the correct wide-character mode string + std::wstring wmode; + for(; *mode; ++mode) { + wmode += wchar_t(*mode); + } + + fp = _wfopen(wfname.data(), wmode.c_str()); + + std::setlocale(LC_ALL, oldLocale.c_str()); +#else fp = std::fopen(fname, mode); +#endif if (fp == NULL) { throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno))); }