From a8a922ca18253bb462fb13fec8f1d69cd1265430 Mon Sep 17 00:00:00 2001 From: Bruce MacDonald Date: Sun, 10 Mar 2024 22:17:00 -0400 Subject: [PATCH] move repeated llama_file logic to llama.cpp - allow for opening unicode file names on windows --- common/train.cpp | 84 ---------- .../convert-llama2c-to-ggml.cpp | 73 -------- examples/export-lora/export-lora.cpp | 88 +--------- examples/finetune/finetune.cpp | 83 --------- llama.cpp | 158 +++++++++++------- llama.h | 27 +++ 6 files changed, 122 insertions(+), 391 deletions(-) diff --git a/common/train.cpp b/common/train.cpp index 0dbfd24df..e5572a081 100644 --- a/common/train.cpp +++ b/common/train.cpp @@ -709,90 +709,6 @@ void save_train_state_gguf(struct gguf_context * fctx, struct train_state * trai save_opt_context_gguf(fctx, train->opt); } - -struct llama_file { - // use FILE * so we don't have to re-open the file to mmap - FILE * fp; - size_t size; - - llama_file(const char * fname, const char * mode) { - fp = std::fopen(fname, mode); - if (fp == NULL) { - size = 0; - } else { - seek(0, SEEK_END); - size = tell(); - seek(0, SEEK_SET); - } - } - - size_t tell() const { -#ifdef _WIN32 - __int64 ret = _ftelli64(fp); -#else - long ret = std::ftell(fp); -#endif - GGML_ASSERT(ret != -1); // this really shouldn't fail - return (size_t) ret; - } - - void seek(size_t offset, int whence) { -#ifdef _WIN32 - int ret = _fseeki64(fp, (__int64) offset, whence); -#else - int ret = std::fseek(fp, (long) offset, whence); -#endif - GGML_ASSERT(ret == 0); // same - } - - void read_raw(void * ptr, size_t size) { - if (size == 0) { - return; - } - errno = 0; - std::size_t ret = std::fread(ptr, size, 1, fp); - if (ferror(fp)) { - die_fmt("read error: %s", strerror(errno)); - } - if (ret != 1) { - die("unexpectedly reached end of file"); - } - } - - std::uint32_t read_u32() { - std::uint32_t ret; - read_raw(&ret, sizeof(ret)); - return ret; - } - - std::string read_string(std::uint32_t len) { - std::vector chars(len); - read_raw(chars.data(), len); - return std::string(chars.data(), len); - } - - void write_raw(const void * ptr, size_t size) { - if (size == 0) { - return; - } - errno = 0; - size_t ret = std::fwrite(ptr, size, 1, fp); - if (ret != 1) { - die_fmt("write error: %s", strerror(errno)); - } - } - - void write_u32(std::uint32_t val) { - write_raw(&val, sizeof(val)); - } - - ~llama_file() { - if (fp) { - std::fclose(fp); - } - } -}; - static size_t utf8_len(char src) { const size_t lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 }; uint8_t highbits = static_cast(src) >> 4; diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp index 8209dcb64..7fe6e0c2f 100644 --- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp @@ -458,79 +458,6 @@ static std::string format(const char * fmt, ...) { return std::string(buf.data(), size); } -struct llama_file { - // use FILE * so we don't have to re-open the file to mmap - FILE * fp; - size_t size; - - llama_file(const char * fname, const char * mode) { - fp = std::fopen(fname, mode); - if (fp == NULL) { - size = 0; - } else { - seek(0, SEEK_END); - size = tell(); - seek(0, SEEK_SET); - } - } - - size_t tell() const { -#ifdef _WIN32 - __int64 ret = _ftelli64(fp); -#else - long ret = std::ftell(fp); -#endif - GGML_ASSERT(ret != -1); // this really shouldn't fail - return (size_t) ret; - } - - void seek(size_t offset, int whence) { -#ifdef _WIN32 - int ret = _fseeki64(fp, (__int64) offset, whence); -#else - int ret = std::fseek(fp, (long) offset, whence); -#endif - GGML_ASSERT(ret == 0); // same - } - - void read_raw(void * ptr, size_t size) { - if (size == 0) { - return; - } - errno = 0; - std::size_t ret = std::fread(ptr, size, 1, fp); - if (ferror(fp)) { - die_fmt("fread failed: %s", strerror(errno)); - } - if (ret != 1) { - die("unexpectedly reached end of file"); - } - } - - std::uint32_t read_u32() { - std::uint32_t ret; - read_raw(&ret, sizeof(ret)); - return ret; - } - std::float_t read_f32() { - std::float_t ret; - read_raw(&ret, sizeof(ret)); - return ret; - } - - std::string read_string(std::uint32_t len) { - std::vector chars(len); - read_raw(chars.data(), len); - return std::string(chars.data(), len); - } - - ~llama_file() { - if (fp) { - std::fclose(fp); - } - } -}; - static bool is_ggml_file(const char * filename) { llama_file file(filename, "rb"); if (file.size < 4) { diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp index 08413f57e..82476cf37 100644 --- a/examples/export-lora/export-lora.cpp +++ b/examples/export-lora/export-lora.cpp @@ -2,6 +2,7 @@ #include "common.h" #include "ggml.h" #include "ggml-alloc.h" +#include "llama.h" #include #include @@ -28,93 +29,6 @@ struct lora_data { uint32_t lora_alpha; }; -struct llama_file { - // use FILE * so we don't have to re-open the file to mmap - FILE * fp; - size_t size; - - llama_file(const char * fname, const char * mode) { - fp = std::fopen(fname, mode); - if (fp == NULL) { - size = 0; - } else { - seek(0, SEEK_END); - size = tell(); - seek(0, SEEK_SET); - } - } - - size_t tell() const { -#ifdef _WIN32 - __int64 ret = _ftelli64(fp); -#else - long ret = std::ftell(fp); -#endif - GGML_ASSERT(ret != -1); // this really shouldn't fail - return (size_t) ret; - } - - void seek(size_t offset, int whence) { -#ifdef _WIN32 - int ret = _fseeki64(fp, (__int64) offset, whence); -#else - int ret = std::fseek(fp, (long) offset, whence); -#endif - GGML_ASSERT(ret == 0); // same - } - - void read_raw(void * ptr, size_t size) { - if (size == 0) { - return; - } - errno = 0; - std::size_t ret = std::fread(ptr, size, 1, fp); - if (ferror(fp)) { - die_fmt("read error: %s", strerror(errno)); - } - if (ret != 1) { - die("unexpectedly reached end of file"); - } - } - - std::uint32_t read_u32() { - std::uint32_t ret; - read_raw(&ret, sizeof(ret)); - return ret; - } - - std::string read_string(std::uint32_t len) { - std::vector chars(len); - read_raw(chars.data(), len); - return std::string(chars.data(), len); - } - - void write_raw(const void * ptr, size_t size) { - if (size == 0) { - return; - } - errno = 0; - size_t ret = std::fwrite(ptr, size, 1, fp); - if (ret != 1) { - die_fmt("write error: %s", strerror(errno)); - } - } - - void write_u32(std::uint32_t val) { - write_raw(&val, sizeof(val)); - } - - bool eof() { - return tell() >= size; - } - - ~llama_file() { - if (fp) { - std::fclose(fp); - } - } -}; - static struct export_lora_params get_default_export_lora_params() { struct export_lora_params result; result.fn_model_base = ""; diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index 3da5317b3..91d552a63 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -946,89 +946,6 @@ static void save_checkpoint_lora_file(const char * filename, struct my_llama_mod gguf_free(fctx); } -struct llama_file { - // use FILE * so we don't have to re-open the file to mmap - FILE * fp; - size_t size; - - llama_file(const char * fname, const char * mode) { - fp = std::fopen(fname, mode); - if (fp == NULL) { - size = 0; - } else { - seek(0, SEEK_END); - size = tell(); - seek(0, SEEK_SET); - } - } - - size_t tell() const { -#ifdef _WIN32 - __int64 ret = _ftelli64(fp); -#else - long ret = std::ftell(fp); -#endif - GGML_ASSERT(ret != -1); // this really shouldn't fail - return (size_t) ret; - } - - void seek(size_t offset, int whence) { -#ifdef _WIN32 - int ret = _fseeki64(fp, (__int64) offset, whence); -#else - int ret = std::fseek(fp, (long) offset, whence); -#endif - GGML_ASSERT(ret == 0); // same - } - - void read_raw(void * ptr, size_t size) { - if (size == 0) { - return; - } - errno = 0; - std::size_t ret = std::fread(ptr, size, 1, fp); - if (ferror(fp)) { - die_fmt("read error: %s", strerror(errno)); - } - if (ret != 1) { - die("unexpectedly reached end of file"); - } - } - - std::uint32_t read_u32() { - std::uint32_t ret; - read_raw(&ret, sizeof(ret)); - return ret; - } - - std::string read_string(std::uint32_t len) { - std::vector chars(len); - read_raw(chars.data(), len); - return std::string(chars.data(), len); - } - - void write_raw(const void * ptr, size_t size) { - if (size == 0) { - return; - } - errno = 0; - size_t ret = std::fwrite(ptr, size, 1, fp); - if (ret != 1) { - die_fmt("write error: %s", strerror(errno)); - } - } - - void write_u32(std::uint32_t val) { - write_raw(&val, sizeof(val)); - } - - ~llama_file() { - if (fp) { - std::fclose(fp); - } - } -}; - static void write_tensor(struct llama_file * file, struct ggml_tensor * tensor, const char * name) { if (tensor == NULL) { file->write_u32(0); diff --git a/llama.cpp b/llama.cpp index 4225f9555..1dc0fa187 100644 --- a/llama.cpp +++ b/llama.cpp @@ -65,8 +65,6 @@ #include #include #include -#include -#include #include #include #include @@ -982,81 +980,113 @@ struct no_init { no_init() { /* do nothing */ } }; -struct llama_file { - // use FILE * so we don't have to re-open the file to mmap - FILE * fp; - size_t size; - - llama_file(const char * fname, const char * mode) { - fp = std::fopen(fname, mode); - if (fp == NULL) { - throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno))); - } - seek(0, SEEK_END); - size = tell(); - seek(0, SEEK_SET); - } - - size_t tell() const { +llama_file::llama_file(const char * fname, const char * mode) { #ifdef _WIN32 - __int64 ret = _ftelli64(fp); -#else - long ret = std::ftell(fp); -#endif - GGML_ASSERT(ret != -1); // this really shouldn't fail - return (size_t) ret; + // temporarily change the locale to the system default to handle Unicode file names + std::string oldLocale = std::setlocale(LC_ALL, nullptr); + std::setlocale(LC_ALL, ""); + + // convert multi-byte string to wide-char string + int wsize = MultiByteToWideChar(CP_UTF8, 0, fname, -1, nullptr, 0); + std::vector wfname(wsize); + MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname.data(), wsize); + + // determine the correct wide-character mode string + std::wstring wmode; + for(; *mode; ++mode) { + wmode += wchar_t(*mode); } - void seek(size_t offset, int whence) const { + fp = _wfopen(wfname.data(), wmode.c_str()); + + std::setlocale(LC_ALL, oldLocale.c_str()); +#else + fp = fopen(fname, mode); +#endif + if (fp == NULL) { + throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno))); + } + seek(0, SEEK_END); + size = tell(); + seek(0, SEEK_SET); +} + +size_t llama_file::tell() const { #ifdef _WIN32 - int ret = _fseeki64(fp, (__int64) offset, whence); + __int64 ret = _ftelli64(fp); #else - int ret = std::fseek(fp, (long) offset, whence); + long ret = std::ftell(fp); #endif - GGML_ASSERT(ret == 0); // same - } + GGML_ASSERT(ret != -1); // this really shouldn't fail + return (size_t) ret; +} - void read_raw(void * ptr, size_t len) const { - if (len == 0) { - return; - } - errno = 0; - std::size_t ret = std::fread(ptr, len, 1, fp); - if (ferror(fp)) { - throw std::runtime_error(format("read error: %s", strerror(errno))); - } - if (ret != 1) { - throw std::runtime_error("unexpectedly reached end of file"); - } - } +void llama_file::seek(size_t offset, int whence) const { +#ifdef _WIN32 + int ret = _fseeki64(fp, (__int64) offset, whence); +#else + int ret = std::fseek(fp, (long) offset, whence); +#endif + GGML_ASSERT(ret == 0); // same +} - uint32_t read_u32() const { - uint32_t ret; - read_raw(&ret, sizeof(ret)); - return ret; +void llama_file::read_raw(void * ptr, size_t len) const { + if (len == 0) { + return; } + errno = 0; + std::size_t ret = std::fread(ptr, len, 1, fp); + if (ferror(fp)) { + throw std::runtime_error(format("read error: %s", strerror(errno))); + } + if (ret != 1) { + throw std::runtime_error("unexpectedly reached end of file"); + } +} - void write_raw(const void * ptr, size_t len) const { - if (len == 0) { - return; - } - errno = 0; - size_t ret = std::fwrite(ptr, len, 1, fp); - if (ret != 1) { - throw std::runtime_error(format("write error: %s", strerror(errno))); - } - } +uint32_t llama_file::read_u32() const { + uint32_t ret; + read_raw(&ret, sizeof(ret)); + return ret; +} - void write_u32(std::uint32_t val) const { - write_raw(&val, sizeof(val)); - } - ~llama_file() { - if (fp) { - std::fclose(fp); - } +float_t llama_file::read_f32() const { + std::float_t ret; + read_raw(&ret, sizeof(ret)); + return ret; +} + +std::string llama_file::read_string(std::uint32_t len) const { + std::vector chars(len); + read_raw(chars.data(), len); + return std::string(chars.data(), len); +} + +void llama_file::write_raw(const void * ptr, size_t len) const { + if (len == 0) { + return; } -}; + errno = 0; + size_t ret = std::fwrite(ptr, len, 1, fp); + if (ret != 1) { + throw std::runtime_error(format("write error: %s", strerror(errno))); + } +} + +void llama_file::write_u32(std::uint32_t val) const { + write_raw(&val, sizeof(val)); +} + +bool llama_file::eof() const { + return tell() >= size; +} + +llama_file::~llama_file() { + if (fp) { + std::fclose(fp); + } +} struct llama_mmap { void * addr; diff --git a/llama.h b/llama.h index 3dc162b07..35b9fdb05 100644 --- a/llama.h +++ b/llama.h @@ -950,6 +950,33 @@ extern "C" { } #endif +#ifdef __cplusplus +#include +#include +#include +#include +#include +#include + +struct llama_file { + FILE *fp; + size_t size; + + llama_file(const char* fname, const char* mode); + ~llama_file(); + + size_t tell() const; + void seek(size_t offset, int whence) const; + void read_raw(void* ptr, size_t len) const; + uint32_t read_u32() const; + float_t read_f32() const; + std::string read_string(std::uint32_t len) const; + void write_raw(const void* ptr, size_t len) const; + void write_u32(std::uint32_t val) const; + bool eof() const; +}; +#endif + // Internal API to be implemented by llama.cpp and used by tests/benchmarks only #ifdef LLAMA_API_INTERNAL