From a8a922ca18253bb462fb13fec8f1d69cd1265430 Mon Sep 17 00:00:00 2001
From: Bruce MacDonald <brucewmacdonald@gmail.com>
Date: Sun, 10 Mar 2024 22:17:00 -0400
Subject: [PATCH] move repeated llama_file logic to llama.cpp

- allow for opening unicode file names on windows
---
 common/train.cpp                              |  84 ----------
 .../convert-llama2c-to-ggml.cpp               |  73 --------
 examples/export-lora/export-lora.cpp          |  88 +---------
 examples/finetune/finetune.cpp                |  83 ---------
 llama.cpp                                     | 158 +++++++++++-------
 llama.h                                       |  27 +++
 6 files changed, 122 insertions(+), 391 deletions(-)
diff --git a/common/train.cpp b/common/train.cpp
index 0dbfd24df..e5572a081 100644
--- a/common/train.cpp
+++ b/common/train.cpp
@@ -709,90 +709,6 @@ void save_train_state_gguf(struct gguf_context * fctx, struct train_state * trai
     save_opt_context_gguf(fctx, train->opt);
 }
 
-
-struct llama_file {
-    // use FILE * so we don't have to re-open the file to mmap
-    FILE * fp;
-    size_t size;
-
-    llama_file(const char * fname, const char * mode) {
-        fp = std::fopen(fname, mode);
-        if (fp == NULL) {
-            size = 0;
-        } else {
-            seek(0, SEEK_END);
-            size = tell();
-            seek(0, SEEK_SET);
-        }
-    }
-
-    size_t tell() const {
-#ifdef _WIN32
-        __int64 ret = _ftelli64(fp);
-#else
-        long ret = std::ftell(fp);
-#endif
-        GGML_ASSERT(ret != -1); // this really shouldn't fail
-        return (size_t) ret;
-    }
-
-    void seek(size_t offset, int whence) {
-#ifdef _WIN32
-        int ret = _fseeki64(fp, (__int64) offset, whence);
-#else
-        int ret = std::fseek(fp, (long) offset, whence);
-#endif
-        GGML_ASSERT(ret == 0); // same
-    }
-
-    void read_raw(void * ptr, size_t size) {
-        if (size == 0) {
-            return;
-        }
-        errno = 0;
-        std::size_t ret = std::fread(ptr, size, 1, fp);
-        if (ferror(fp)) {
-            die_fmt("read error: %s", strerror(errno));
-        }
-        if (ret != 1) {
-            die("unexpectedly reached end of file");
-        }
-    }
-
-    std::uint32_t read_u32() {
-        std::uint32_t ret;
-        read_raw(&ret, sizeof(ret));
-        return ret;
-    }
-
-    std::string read_string(std::uint32_t len) {
-        std::vector<char> chars(len);
-        read_raw(chars.data(), len);
-        return std::string(chars.data(), len);
-    }
-
-    void write_raw(const void * ptr, size_t size) {
-        if (size == 0) {
-            return;
-        }
-        errno = 0;
-        size_t ret = std::fwrite(ptr, size, 1, fp);
-        if (ret != 1) {
-            die_fmt("write error: %s", strerror(errno));
-        }
-    }
-
-    void write_u32(std::uint32_t val) {
-        write_raw(&val, sizeof(val));
-    }
-
-    ~llama_file() {
-        if (fp) {
-            std::fclose(fp);
-        }
-    }
-};
-
 static size_t utf8_len(char src) {
     const size_t lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
     uint8_t highbits = static_cast<uint8_t>(src) >> 4;
diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
index 8209dcb64..7fe6e0c2f 100644
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@@ -458,79 +458,6 @@ static std::string format(const char * fmt, ...) {
     return std::string(buf.data(), size);
 }
 
-struct llama_file {
-    // use FILE * so we don't have to re-open the file to mmap
-    FILE * fp;
-    size_t size;
-
-    llama_file(const char * fname, const char * mode) {
-        fp = std::fopen(fname, mode);
-        if (fp == NULL) {
-            size = 0;
-        } else {
-            seek(0, SEEK_END);
-            size = tell();
-            seek(0, SEEK_SET);
-        }
-    }
-
-    size_t tell() const {
-#ifdef _WIN32
-        __int64 ret = _ftelli64(fp);
-#else
-        long ret = std::ftell(fp);
-#endif
-        GGML_ASSERT(ret != -1); // this really shouldn't fail
-        return (size_t) ret;
-    }
-
-    void seek(size_t offset, int whence) {
-#ifdef _WIN32
-        int ret = _fseeki64(fp, (__int64) offset, whence);
-#else
-        int ret = std::fseek(fp, (long) offset, whence);
-#endif
-        GGML_ASSERT(ret == 0); // same
-    }
-
-    void read_raw(void * ptr, size_t size) {
-        if (size == 0) {
-            return;
-        }
-        errno = 0;
-        std::size_t ret = std::fread(ptr, size, 1, fp);
-        if (ferror(fp)) {
-            die_fmt("fread failed: %s", strerror(errno));
-        }
-        if (ret != 1) {
-            die("unexpectedly reached end of file");
-        }
-    }
-
-    std::uint32_t read_u32() {
-        std::uint32_t ret;
-        read_raw(&ret, sizeof(ret));
-        return ret;
-    }
-    std::float_t read_f32() {
-        std::float_t ret;
-        read_raw(&ret, sizeof(ret));
-        return ret;
-    }
-
-    std::string read_string(std::uint32_t len) {
-        std::vector<char> chars(len);
-        read_raw(chars.data(), len);
-        return std::string(chars.data(), len);
-    }
-
-    ~llama_file() {
-        if (fp) {
-            std::fclose(fp);
-        }
-    }
-};
-
 static bool is_ggml_file(const char * filename) {
     llama_file file(filename, "rb");
     if (file.size < 4) {
diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp
index 08413f57e..82476cf37 100644
--- a/examples/export-lora/export-lora.cpp
+++ b/examples/export-lora/export-lora.cpp
@@ -2,6 +2,7 @@
 #include "common.h"
 #include "ggml.h"
 #include "ggml-alloc.h"
+#include "llama.h"
 
 #include <vector>
 #include <string>
@@ -28,93 +29,6 @@ struct lora_data {
     uint32_t lora_alpha;
 };
 
-struct llama_file {
-    // use FILE * so we don't have to re-open the file to mmap
-    FILE * fp;
-    size_t size;
-
-    llama_file(const char * fname, const char * mode) {
-        fp = std::fopen(fname, mode);
-        if (fp == NULL) {
-            size = 0;
-        } else {
-            seek(0, SEEK_END);
-            size = tell();
-            seek(0, SEEK_SET);
-        }
-    }
-
-    size_t tell() const {
-#ifdef _WIN32
-        __int64 ret = _ftelli64(fp);
-#else
-        long ret = std::ftell(fp);
-#endif
-        GGML_ASSERT(ret != -1); // this really shouldn't fail
-        return (size_t) ret;
-    }
-
-    void seek(size_t offset, int whence) {
-#ifdef _WIN32
-        int ret = _fseeki64(fp, (__int64) offset, whence);
-#else
-        int ret = std::fseek(fp, (long) offset, whence);
-#endif
-        GGML_ASSERT(ret == 0); // same
-    }
-
-    void read_raw(void * ptr, size_t size) {
-        if (size == 0) {
-            return;
-        }
-        errno = 0;
-        std::size_t ret = std::fread(ptr, size, 1, fp);
-        if (ferror(fp)) {
-            die_fmt("read error: %s", strerror(errno));
-        }
-        if (ret != 1) {
-            die("unexpectedly reached end of file");
-        }
-    }
-
-    std::uint32_t read_u32() {
-        std::uint32_t ret;
-        read_raw(&ret, sizeof(ret));
-        return ret;
-    }
-
-    std::string read_string(std::uint32_t len) {
-        std::vector<char> chars(len);
-        read_raw(chars.data(), len);
-        return std::string(chars.data(), len);
-    }
-
-    void write_raw(const void * ptr, size_t size) {
-        if (size == 0) {
-            return;
-        }
-        errno = 0;
-        size_t ret = std::fwrite(ptr, size, 1, fp);
-        if (ret != 1) {
-            die_fmt("write error: %s", strerror(errno));
-        }
-    }
-
-    void write_u32(std::uint32_t val) {
-        write_raw(&val, sizeof(val));
-    }
-
-    bool eof() {
-        return tell() >= size;
-    }
-
-    ~llama_file() {
-        if (fp) {
-            std::fclose(fp);
-        }
-    }
-};
-
 static struct export_lora_params get_default_export_lora_params() {
     struct export_lora_params result;
     result.fn_model_base = "";
diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp
index 3da5317b3..91d552a63 100644
--- a/examples/finetune/finetune.cpp
+++ b/examples/finetune/finetune.cpp
@@ -946,89 +946,6 @@ static void save_checkpoint_lora_file(const char * filename, struct my_llama_mod
     gguf_free(fctx);
 }
 
-struct llama_file {
-    // use FILE * so we don't have to re-open the file to mmap
-    FILE * fp;
-    size_t size;
-
-    llama_file(const char * fname, const char * mode) {
-        fp = std::fopen(fname, mode);
-        if (fp == NULL) {
-            size = 0;
-        } else {
-            seek(0, SEEK_END);
-            size = tell();
-            seek(0, SEEK_SET);
-        }
-    }
-
-    size_t tell() const {
-#ifdef _WIN32
-        __int64 ret = _ftelli64(fp);
-#else
-        long ret = std::ftell(fp);
-#endif
-        GGML_ASSERT(ret != -1); // this really shouldn't fail
-        return (size_t) ret;
-    }
-
-    void seek(size_t offset, int whence) {
-#ifdef _WIN32
-        int ret = _fseeki64(fp, (__int64) offset, whence);
-#else
-        int ret = std::fseek(fp, (long) offset, whence);
-#endif
-        GGML_ASSERT(ret == 0); // same
-    }
-
-    void read_raw(void * ptr, size_t size) {
-        if (size == 0) {
-            return;
-        }
-        errno = 0;
-        std::size_t ret = std::fread(ptr, size, 1, fp);
-        if (ferror(fp)) {
-            die_fmt("read error: %s", strerror(errno));
-        }
-        if (ret != 1) {
-            die("unexpectedly reached end of file");
-        }
-    }
-
-    std::uint32_t read_u32() {
-        std::uint32_t ret;
-        read_raw(&ret, sizeof(ret));
-        return ret;
-    }
-
-    std::string read_string(std::uint32_t len) {
-        std::vector<char> chars(len);
-        read_raw(chars.data(), len);
-        return std::string(chars.data(), len);
-    }
-
-    void write_raw(const void * ptr, size_t size) {
-        if (size == 0) {
-            return;
-        }
-        errno = 0;
-        size_t ret = std::fwrite(ptr, size, 1, fp);
-        if (ret != 1) {
-            die_fmt("write error: %s", strerror(errno));
-        }
-    }
-
-    void write_u32(std::uint32_t val) {
-        write_raw(&val, sizeof(val));
-    }
-
-    ~llama_file() {
-        if (fp) {
-            std::fclose(fp);
-        }
-    }
-};
-
 static void write_tensor(struct llama_file * file, struct ggml_tensor * tensor, const char * name) {
     if (tensor == NULL) {
         file->write_u32(0);
diff --git a/llama.cpp b/llama.cpp
index 4225f9555..1dc0fa187 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -65,8 +65,6 @@
 #include <cstdarg>
 #include <cstddef>
 #include <cstdint>
-#include <cstdio>
-#include <cstring>
 #include <ctime>
 #include <cwctype>
 #include <forward_list>
@@ -982,81 +980,113 @@ struct no_init {
     no_init() { /* do nothing */ }
 };
 
-struct llama_file {
-    // use FILE * so we don't have to re-open the file to mmap
-    FILE * fp;
-    size_t size;
-
-    llama_file(const char * fname, const char * mode) {
-        fp = std::fopen(fname, mode);
-        if (fp == NULL) {
-            throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
-        }
-        seek(0, SEEK_END);
-        size = tell();
-        seek(0, SEEK_SET);
-    }
-
-    size_t tell() const {
+llama_file::llama_file(const char * fname, const char * mode) {
 #ifdef _WIN32
-        __int64 ret = _ftelli64(fp);
-#else
-        long ret = std::ftell(fp);
-#endif
-        GGML_ASSERT(ret != -1); // this really shouldn't fail
-        return (size_t) ret;
+    // temporarily change the locale to the system default to handle Unicode file names
+    std::string oldLocale = std::setlocale(LC_ALL, nullptr);
+    std::setlocale(LC_ALL, "");
+
+    // convert multi-byte string to wide-char string
+    int wsize = MultiByteToWideChar(CP_UTF8, 0, fname, -1, nullptr, 0);
+    std::vector<wchar_t> wfname(wsize);
+    MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname.data(), wsize);
+
+    // determine the correct wide-character mode string
+    std::wstring wmode;
+    for(; *mode; ++mode) {
+        wmode += wchar_t(*mode);
     }
 
-    void seek(size_t offset, int whence) const {
+    fp = _wfopen(wfname.data(), wmode.c_str());
+
+    std::setlocale(LC_ALL, oldLocale.c_str());
+#else
+    fp = fopen(fname, mode);
+#endif
+    if (fp == NULL) {
+        throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
+    }
+    seek(0, SEEK_END);
+    size = tell();
+    seek(0, SEEK_SET);
+}
+
+size_t llama_file::tell() const {
 #ifdef _WIN32
-        int ret = _fseeki64(fp, (__int64) offset, whence);
+    __int64 ret = _ftelli64(fp);
 #else
-        int ret = std::fseek(fp, (long) offset, whence);
+    long ret = std::ftell(fp);
 #endif
-        GGML_ASSERT(ret == 0); // same
-    }
+    GGML_ASSERT(ret != -1); // this really shouldn't fail
+    return (size_t) ret;
+}
 
-    void read_raw(void * ptr, size_t len) const {
-        if (len == 0) {
-            return;
-        }
-        errno = 0;
-        std::size_t ret = std::fread(ptr, len, 1, fp);
-        if (ferror(fp)) {
-            throw std::runtime_error(format("read error: %s", strerror(errno)));
-        }
-        if (ret != 1) {
-            throw std::runtime_error("unexpectedly reached end of file");
-        }
-    }
+void llama_file::seek(size_t offset, int whence) const {
+#ifdef _WIN32
+    int ret = _fseeki64(fp, (__int64) offset, whence);
+#else
+    int ret = std::fseek(fp, (long) offset, whence);
+#endif
+    GGML_ASSERT(ret == 0); // same
+}
 
-    uint32_t read_u32() const {
-        uint32_t ret;
-        read_raw(&ret, sizeof(ret));
-        return ret;
+void llama_file::read_raw(void * ptr, size_t len) const {
+    if (len == 0) {
+         return;
     }
+    errno = 0;
+    std::size_t ret = std::fread(ptr, len, 1, fp);
+    if (ferror(fp)) {
+        throw std::runtime_error(format("read error: %s", strerror(errno)));
+    }
+    if (ret != 1) {
+        throw std::runtime_error("unexpectedly reached end of file");
+    }
+}
 
-    void write_raw(const void * ptr, size_t len) const {
-        if (len == 0) {
-            return;
-        }
-        errno = 0;
-        size_t ret = std::fwrite(ptr, len, 1, fp);
-        if (ret != 1) {
-            throw std::runtime_error(format("write error: %s", strerror(errno)));
-        }
-    }
+uint32_t llama_file::read_u32() const {
+    uint32_t ret;
+    read_raw(&ret, sizeof(ret));
+    return ret;
+}
 
-    void write_u32(std::uint32_t val) const {
-        write_raw(&val, sizeof(val));
-    }
 
-    ~llama_file() {
-        if (fp) {
-            std::fclose(fp);
-        }
+float_t llama_file::read_f32() const {
+    std::float_t ret;
+    read_raw(&ret, sizeof(ret));
+    return ret;
+}
+
+std::string llama_file::read_string(std::uint32_t len) const {
+    std::vector<char> chars(len);
+    read_raw(chars.data(), len);
+    return std::string(chars.data(), len);
+}
+
+void llama_file::write_raw(const void * ptr, size_t len) const {
+    if (len == 0) {
+        return;
     }
-};
+    errno = 0;
+    size_t ret = std::fwrite(ptr, len, 1, fp);
+    if (ret != 1) {
+        throw std::runtime_error(format("write error: %s", strerror(errno)));
+    }
+}
+
+void llama_file::write_u32(std::uint32_t val) const {
+    write_raw(&val, sizeof(val));
+}
+
+bool llama_file::eof() const {
+    return tell() >= size;
+}
+
+llama_file::~llama_file() {
+    if (fp) {
+        std::fclose(fp);
+    }
+}
 
 struct llama_mmap {
     void * addr;
diff --git a/llama.h b/llama.h
index 3dc162b07..35b9fdb05 100644
--- a/llama.h
+++ b/llama.h
@@ -950,6 +950,33 @@ extern "C" {
 }
 #endif
 
+#ifdef __cplusplus
+#include <cfloat>
+#include <cstdio>
+#include <cstring>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+struct llama_file {
+    FILE *fp;
+    size_t size;
+
+    llama_file(const char* fname, const char* mode);
+    ~llama_file();
+
+    size_t tell() const;
+    void seek(size_t offset, int whence) const;
+    void read_raw(void* ptr, size_t len) const;
+    uint32_t read_u32() const;
+    float_t read_f32() const;
+    std::string read_string(std::uint32_t len) const;
+    void write_raw(const void* ptr, size_t len) const;
+    void write_u32(std::uint32_t val) const;
+    bool eof() const;
+};
+#endif
+
 // Internal API to be implemented by llama.cpp and used by tests/benchmarks only
 #ifdef LLAMA_API_INTERNAL