diff --git a/common/train.cpp b/common/train.cpp
index e5572a081..0dbfd24df 100644
--- a/common/train.cpp
+++ b/common/train.cpp
@@ -709,6 +709,90 @@ void save_train_state_gguf(struct gguf_context * fctx, struct train_state * trai
     save_opt_context_gguf(fctx, train->opt);
 }
 
+
+struct llama_file {
+    // use FILE * so we don't have to re-open the file to mmap
+    FILE * fp;
+    size_t size;
+
+    llama_file(const char * fname, const char * mode) {
+        fp = std::fopen(fname, mode);
+        if (fp == NULL) {
+            size = 0;
+        } else {
+            seek(0, SEEK_END);
+            size = tell();
+            seek(0, SEEK_SET);
+        }
+    }
+
+    size_t tell() const {
+#ifdef _WIN32
+        __int64 ret = _ftelli64(fp);
+#else
+        long ret = std::ftell(fp);
+#endif
+        GGML_ASSERT(ret != -1); // this really shouldn't fail
+        return (size_t) ret;
+    }
+
+    void seek(size_t offset, int whence) {
+#ifdef _WIN32
+        int ret = _fseeki64(fp, (__int64) offset, whence);
+#else
+        int ret = std::fseek(fp, (long) offset, whence);
+#endif
+        GGML_ASSERT(ret == 0); // same
+    }
+
+    void read_raw(void * ptr, size_t size) {
+        if (size == 0) {
+            return;
+        }
+        errno = 0;
+        std::size_t ret = std::fread(ptr, size, 1, fp);
+        if (ferror(fp)) {
+            die_fmt("read error: %s", strerror(errno));
+        }
+        if (ret != 1) {
+            die("unexpectedly reached end of file");
+        }
+    }
+
+    std::uint32_t read_u32() {
+        std::uint32_t ret;
+        read_raw(&ret, sizeof(ret));
+        return ret;
+    }
+
+    std::string read_string(std::uint32_t len) {
+        std::vector<char> chars(len);
+        read_raw(chars.data(), len);
+        return std::string(chars.data(), len);
+    }
+
+    void write_raw(const void * ptr, size_t size) {
+        if (size == 0) {
+            return;
+        }
+        errno = 0;
+        size_t ret = std::fwrite(ptr, size, 1, fp);
+        if (ret != 1) {
+            die_fmt("write error: %s", strerror(errno));
+        }
+    }
+
+    void write_u32(std::uint32_t val) {
+        write_raw(&val, sizeof(val));
+    }
+
+    ~llama_file() {
+        if (fp) {
+            std::fclose(fp);
+        }
+    }
+};
+
 static size_t utf8_len(char src) {
     const size_t lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
     uint8_t highbits = static_cast<uint8_t>(src) >> 4;
diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
index 7fe6e0c2f..8209dcb64 100644
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@@ -458,6 +458,79 @@ static std::string format(const char * fmt, ...) {
     return std::string(buf.data(), size);
 }
 
+struct llama_file {
+    // use FILE * so we don't have to re-open the file to mmap
+    FILE * fp;
+    size_t size;
+
+    llama_file(const char * fname, const char * mode) {
+        fp = std::fopen(fname, mode);
+        if (fp == NULL) {
+            size = 0;
+        } else {
+            seek(0, SEEK_END);
+            size = tell();
+            seek(0, SEEK_SET);
+        }
+    }
+
+    size_t tell() const {
+#ifdef _WIN32
+        __int64 ret = _ftelli64(fp);
+#else
+        long ret = std::ftell(fp);
+#endif
+        GGML_ASSERT(ret != -1); // this really shouldn't fail
+        return (size_t) ret;
+    }
+
+    void seek(size_t offset, int whence) {
+#ifdef _WIN32
+        int ret = _fseeki64(fp, (__int64) offset, whence);
+#else
+        int ret = std::fseek(fp, (long) offset, whence);
+#endif
+        GGML_ASSERT(ret == 0); // same
+    }
+
+    void read_raw(void * ptr, size_t size) {
+        if (size == 0) {
+            return;
+        }
+        errno = 0;
+        std::size_t ret = std::fread(ptr, size, 1, fp);
+        if (ferror(fp)) {
+            die_fmt("fread failed: %s", strerror(errno));
+        }
+        if (ret != 1) {
+            die("unexpectedly reached end of file");
+        }
+    }
+
+    std::uint32_t read_u32() {
+        std::uint32_t ret;
+        read_raw(&ret, sizeof(ret));
+        return ret;
+    }
+    std::float_t read_f32() {
+        std::float_t ret;
+        read_raw(&ret, sizeof(ret));
+        return ret;
+    }
+
+    std::string read_string(std::uint32_t len) {
+        std::vector<char> chars(len);
+        read_raw(chars.data(), len);
+        return std::string(chars.data(), len);
+    }
+
+    ~llama_file() {
+        if (fp) {
+            std::fclose(fp);
+        }
+    }
+};
+
 static bool is_ggml_file(const char * filename) {
     llama_file file(filename, "rb");
     if (file.size < 4) {
diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp
index 82476cf37..08413f57e 100644
--- a/examples/export-lora/export-lora.cpp
+++ b/examples/export-lora/export-lora.cpp
@@ -2,7 +2,6 @@
 #include "common.h"
 #include "ggml.h"
 #include "ggml-alloc.h"
-#include "llama.h"
 
 #include <vector>
 #include <string>
@@ -29,6 +28,93 @@ struct lora_data {
     uint32_t lora_alpha;
 };
 
+struct llama_file {
+    // use FILE * so we don't have to re-open the file to mmap
+    FILE * fp;
+    size_t size;
+
+    llama_file(const char * fname, const char * mode) {
+        fp = std::fopen(fname, mode);
+        if (fp == NULL) {
+            size = 0;
+        } else {
+            seek(0, SEEK_END);
+            size = tell();
+            seek(0, SEEK_SET);
+        }
+    }
+
+    size_t tell() const {
+#ifdef _WIN32
+        __int64 ret = _ftelli64(fp);
+#else
+        long ret = std::ftell(fp);
+#endif
+        GGML_ASSERT(ret != -1); // this really shouldn't fail
+        return (size_t) ret;
+    }
+
+    void seek(size_t offset, int whence) {
+#ifdef _WIN32
+        int ret = _fseeki64(fp, (__int64) offset, whence);
+#else
+        int ret = std::fseek(fp, (long) offset, whence);
+#endif
+        GGML_ASSERT(ret == 0); // same
+    }
+
+    void read_raw(void * ptr, size_t size) {
+        if (size == 0) {
+            return;
+        }
+        errno = 0;
+        std::size_t ret = std::fread(ptr, size, 1, fp);
+        if (ferror(fp)) {
+            die_fmt("read error: %s", strerror(errno));
+        }
+        if (ret != 1) {
+            die("unexpectedly reached end of file");
+        }
+    }
+
+    std::uint32_t read_u32() {
+        std::uint32_t ret;
+        read_raw(&ret, sizeof(ret));
+        return ret;
+    }
+
+    std::string read_string(std::uint32_t len) {
+        std::vector<char> chars(len);
+        read_raw(chars.data(), len);
+        return std::string(chars.data(), len);
+    }
+
+    void write_raw(const void * ptr, size_t size) {
+        if (size == 0) {
+            return;
+        }
+        errno = 0;
+        size_t ret = std::fwrite(ptr, size, 1, fp);
+        if (ret != 1) {
+            die_fmt("write error: %s", strerror(errno));
+        }
+    }
+
+    void write_u32(std::uint32_t val) {
+        write_raw(&val, sizeof(val));
+    }
+
+    bool eof() {
+        return tell() >= size;
+    }
+
+    ~llama_file() {
+        if (fp) {
+            std::fclose(fp);
+        }
+    }
+};
+
 static struct export_lora_params get_default_export_lora_params() {
     struct export_lora_params result;
     result.fn_model_base = "";
diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp
index 91d552a63..3da5317b3 100644
--- a/examples/finetune/finetune.cpp
+++ b/examples/finetune/finetune.cpp
@@ -946,6 +946,89 @@ static void save_checkpoint_lora_file(const char * filename, struct my_llama_mod
     gguf_free(fctx);
 }
 
+struct llama_file {
+    // use FILE * so we don't have to re-open the file to mmap
+    FILE * fp;
+    size_t size;
+
+    llama_file(const char * fname, const char * mode) {
+        fp = std::fopen(fname, mode);
+        if (fp == NULL) {
+            size = 0;
+        } else {
+            seek(0, SEEK_END);
+            size = tell();
+            seek(0, SEEK_SET);
+        }
+    }
+
+    size_t tell() const {
+#ifdef _WIN32
+        __int64 ret = _ftelli64(fp);
+#else
+        long ret = std::ftell(fp);
+#endif
+        GGML_ASSERT(ret != -1); // this really shouldn't fail
+        return (size_t) ret;
+    }
+
+    void seek(size_t offset, int whence) {
+#ifdef _WIN32
+        int ret = _fseeki64(fp, (__int64) offset, whence);
+#else
+        int ret = std::fseek(fp, (long) offset, whence);
+#endif
+        GGML_ASSERT(ret == 0); // same
+    }
+
+    void read_raw(void * ptr, size_t size) {
+        if (size == 0) {
+            return;
+        }
+        errno = 0;
+        std::size_t ret = std::fread(ptr, size, 1, fp);
+        if (ferror(fp)) {
+            die_fmt("read error: %s", strerror(errno));
+        }
+        if (ret != 1) {
+            die("unexpectedly reached end of file");
+        }
+    }
+
+    std::uint32_t read_u32() {
+        std::uint32_t ret;
+        read_raw(&ret, sizeof(ret));
+        return ret;
+    }
+
+    std::string read_string(std::uint32_t len) {
+        std::vector<char> chars(len);
+        read_raw(chars.data(), len);
+        return std::string(chars.data(), len);
+    }
+
+    void write_raw(const void * ptr, size_t size) {
+        if (size == 0) {
+            return;
+        }
+        errno = 0;
+        size_t ret = std::fwrite(ptr, size, 1, fp);
+        if (ret != 1) {
+            die_fmt("write error: %s", strerror(errno));
+        }
+    }
+
+    void write_u32(std::uint32_t val) {
+        write_raw(&val, sizeof(val));
+    }
+
+    ~llama_file() {
+        if (fp) {
+            std::fclose(fp);
+        }
+    }
+};
+
 static void write_tensor(struct llama_file * file, struct ggml_tensor * tensor, const char * name) {
     if (tensor == NULL) {
         file->write_u32(0);
diff --git a/llama.cpp b/llama.cpp
index 1dc0fa187..4225f9555 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -65,6 +65,8 @@
 #include <cstdarg>
 #include <cstddef>
 #include <cstdint>
+#include <cstdio>
+#include <cstring>
 #include <ctime>
 #include <cwctype>
 #include <forward_list>
@@ -980,113 +982,81 @@ struct no_init {
     no_init() { /* do nothing */ }
 };
 
-llama_file::llama_file(const char * fname, const char * mode) {
+struct llama_file {
+    // use FILE * so we don't have to re-open the file to mmap
+    FILE * fp;
+    size_t size;
+
+    llama_file(const char * fname, const char * mode) {
+        fp = std::fopen(fname, mode);
+        if (fp == NULL) {
+            throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
+        }
+        seek(0, SEEK_END);
+        size = tell();
+        seek(0, SEEK_SET);
+    }
+
+    size_t tell() const {
 #ifdef _WIN32
-    // temporarily change the locale to the system default to handle Unicode file names
-    std::string oldLocale = std::setlocale(LC_ALL, nullptr);
-    std::setlocale(LC_ALL, "");
-
-    // convert multi-byte string to wide-char string
-    int wsize = MultiByteToWideChar(CP_UTF8, 0, fname, -1, nullptr, 0);
-    std::vector<wchar_t> wfname(wsize);
-    MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname.data(), wsize);
-
-    // determine the correct wide-character mode string
-    std::wstring wmode;
-    for(; *mode; ++mode) {
-        wmode += wchar_t(*mode);
-    }
-
-    fp = _wfopen(wfname.data(), wmode.c_str());
-
-    std::setlocale(LC_ALL, oldLocale.c_str());
+        __int64 ret = _ftelli64(fp);
 #else
-    fp = fopen(fname, mode);
+        long ret = std::ftell(fp);
 #endif
-    if (fp == NULL) {
-        throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
+        GGML_ASSERT(ret != -1); // this really shouldn't fail
+        return (size_t) ret;
     }
-    seek(0, SEEK_END);
-    size = tell();
-    seek(0, SEEK_SET);
-}
 
-size_t llama_file::tell() const {
+    void seek(size_t offset, int whence) const {
 #ifdef _WIN32
-    __int64 ret = _ftelli64(fp);
+        int ret = _fseeki64(fp, (__int64) offset, whence);
 #else
-    long ret = std::ftell(fp);
+        int ret = std::fseek(fp, (long) offset, whence);
 #endif
-    GGML_ASSERT(ret != -1); // this really shouldn't fail
-    return (size_t) ret;
-}
-
-void llama_file::seek(size_t offset, int whence) const {
-#ifdef _WIN32
-    int ret = _fseeki64(fp, (__int64) offset, whence);
-#else
-    int ret = std::fseek(fp, (long) offset, whence);
-#endif
-    GGML_ASSERT(ret == 0); // same
-}
-
-void llama_file::read_raw(void * ptr, size_t len) const {
-    if (len == 0) {
-         return;
+        GGML_ASSERT(ret == 0); // same
     }
-    errno = 0;
-    std::size_t ret = std::fread(ptr, len, 1, fp);
-    if (ferror(fp)) {
-        throw std::runtime_error(format("read error: %s", strerror(errno)));
+
+    void read_raw(void * ptr, size_t len) const {
+        if (len == 0) {
+            return;
+        }
+        errno = 0;
+        std::size_t ret = std::fread(ptr, len, 1, fp);
+        if (ferror(fp)) {
+            throw std::runtime_error(format("read error: %s", strerror(errno)));
+        }
+        if (ret != 1) {
+            throw std::runtime_error("unexpectedly reached end of file");
+        }
     }
-    if (ret != 1) {
-        throw std::runtime_error("unexpectedly reached end of file");
+
+    uint32_t read_u32() const {
+        uint32_t ret;
+        read_raw(&ret, sizeof(ret));
+        return ret;
     }
-}
 
-uint32_t llama_file::read_u32() const {
-    uint32_t ret;
-    read_raw(&ret, sizeof(ret));
-    return ret;
-}
-
-
-float_t llama_file::read_f32() const {
-    std::float_t ret;
-    read_raw(&ret, sizeof(ret));
-    return ret;
-}
-
-std::string llama_file::read_string(std::uint32_t len) const {
-    std::vector<char> chars(len);
-    read_raw(chars.data(), len);
-    return std::string(chars.data(), len);
-}
-
-void llama_file::write_raw(const void * ptr, size_t len) const {
-    if (len == 0) {
-        return;
+    void write_raw(const void * ptr, size_t len) const {
+        if (len == 0) {
+            return;
+        }
+        errno = 0;
+        size_t ret = std::fwrite(ptr, len, 1, fp);
+        if (ret != 1) {
+            throw std::runtime_error(format("write error: %s", strerror(errno)));
+        }
     }
-    errno = 0;
-    size_t ret = std::fwrite(ptr, len, 1, fp);
-    if (ret != 1) {
-        throw std::runtime_error(format("write error: %s", strerror(errno)));
+
+    void write_u32(std::uint32_t val) const {
+        write_raw(&val, sizeof(val));
     }
-}
 
-void llama_file::write_u32(std::uint32_t val) const {
-    write_raw(&val, sizeof(val));
-}
-
-bool llama_file::eof() const {
-    return tell() >= size;
-}
-
-llama_file::~llama_file() {
-    if (fp) {
-        std::fclose(fp);
+    ~llama_file() {
+        if (fp) {
+            std::fclose(fp);
+        }
     }
-}
+};
 
 struct llama_mmap {
     void * addr;
diff --git a/llama.h b/llama.h
index 35b9fdb05..3dc162b07 100644
--- a/llama.h
+++ b/llama.h
@@ -950,33 +950,6 @@ extern "C" {
 }
 #endif
 
-#ifdef __cplusplus
-#include <cfloat>
-#include <cstdio>
-#include <cstring>
-#include <stdexcept>
-#include <string>
-#include <vector>
-
-struct llama_file {
-    FILE *fp;
-    size_t size;
-
-    llama_file(const char* fname, const char* mode);
-    ~llama_file();
-
-    size_t tell() const;
-    void seek(size_t offset, int whence) const;
-    void read_raw(void* ptr, size_t len) const;
-    uint32_t read_u32() const;
-    float_t read_f32() const;
-    std::string read_string(std::uint32_t len) const;
-    void write_raw(const void* ptr, size_t len) const;
-    void write_u32(std::uint32_t val) const;
-    bool eof() const;
-};
-#endif
-
 // Internal API to be implemented by llama.cpp and used by tests/benchmarks only
 #ifdef LLAMA_API_INTERNAL