diff --git a/common/train.cpp b/common/train.cpp index e5572a081..0dbfd24df 100644 --- a/common/train.cpp +++ b/common/train.cpp @@ -709,6 +709,90 @@ void save_train_state_gguf(struct gguf_context * fctx, struct train_state * trai save_opt_context_gguf(fctx, train->opt); } + +struct llama_file { + // use FILE * so we don't have to re-open the file to mmap + FILE * fp; + size_t size; + + llama_file(const char * fname, const char * mode) { + fp = std::fopen(fname, mode); + if (fp == NULL) { + size = 0; + } else { + seek(0, SEEK_END); + size = tell(); + seek(0, SEEK_SET); + } + } + + size_t tell() const { +#ifdef _WIN32 + __int64 ret = _ftelli64(fp); +#else + long ret = std::ftell(fp); +#endif + GGML_ASSERT(ret != -1); // this really shouldn't fail + return (size_t) ret; + } + + void seek(size_t offset, int whence) { +#ifdef _WIN32 + int ret = _fseeki64(fp, (__int64) offset, whence); +#else + int ret = std::fseek(fp, (long) offset, whence); +#endif + GGML_ASSERT(ret == 0); // same + } + + void read_raw(void * ptr, size_t size) { + if (size == 0) { + return; + } + errno = 0; + std::size_t ret = std::fread(ptr, size, 1, fp); + if (ferror(fp)) { + die_fmt("read error: %s", strerror(errno)); + } + if (ret != 1) { + die("unexpectedly reached end of file"); + } + } + + std::uint32_t read_u32() { + std::uint32_t ret; + read_raw(&ret, sizeof(ret)); + return ret; + } + + std::string read_string(std::uint32_t len) { + std::vector chars(len); + read_raw(chars.data(), len); + return std::string(chars.data(), len); + } + + void write_raw(const void * ptr, size_t size) { + if (size == 0) { + return; + } + errno = 0; + size_t ret = std::fwrite(ptr, size, 1, fp); + if (ret != 1) { + die_fmt("write error: %s", strerror(errno)); + } + } + + void write_u32(std::uint32_t val) { + write_raw(&val, sizeof(val)); + } + + ~llama_file() { + if (fp) { + std::fclose(fp); + } + } +}; + static size_t utf8_len(char src) { const size_t lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 }; uint8_t highbits = static_cast(src) >> 4; diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp index 7fe6e0c2f..8209dcb64 100644 --- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp @@ -458,6 +458,79 @@ static std::string format(const char * fmt, ...) { return std::string(buf.data(), size); } +struct llama_file { + // use FILE * so we don't have to re-open the file to mmap + FILE * fp; + size_t size; + + llama_file(const char * fname, const char * mode) { + fp = std::fopen(fname, mode); + if (fp == NULL) { + size = 0; + } else { + seek(0, SEEK_END); + size = tell(); + seek(0, SEEK_SET); + } + } + + size_t tell() const { +#ifdef _WIN32 + __int64 ret = _ftelli64(fp); +#else + long ret = std::ftell(fp); +#endif + GGML_ASSERT(ret != -1); // this really shouldn't fail + return (size_t) ret; + } + + void seek(size_t offset, int whence) { +#ifdef _WIN32 + int ret = _fseeki64(fp, (__int64) offset, whence); +#else + int ret = std::fseek(fp, (long) offset, whence); +#endif + GGML_ASSERT(ret == 0); // same + } + + void read_raw(void * ptr, size_t size) { + if (size == 0) { + return; + } + errno = 0; + std::size_t ret = std::fread(ptr, size, 1, fp); + if (ferror(fp)) { + die_fmt("fread failed: %s", strerror(errno)); + } + if (ret != 1) { + die("unexpectedly reached end of file"); + } + } + + std::uint32_t read_u32() { + std::uint32_t ret; + read_raw(&ret, sizeof(ret)); + return ret; + } + std::float_t read_f32() { + std::float_t ret; + read_raw(&ret, sizeof(ret)); + return ret; + } + + std::string read_string(std::uint32_t len) { + std::vector chars(len); + read_raw(chars.data(), len); + return std::string(chars.data(), len); + } + + ~llama_file() { + if (fp) { + std::fclose(fp); + } + } +}; + static bool is_ggml_file(const char * filename) { llama_file file(filename, "rb"); if (file.size < 4) { diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp index 82476cf37..08413f57e 100644 --- a/examples/export-lora/export-lora.cpp +++ b/examples/export-lora/export-lora.cpp @@ -2,7 +2,6 @@ #include "common.h" #include "ggml.h" #include "ggml-alloc.h" -#include "llama.h" #include #include @@ -29,6 +28,93 @@ struct lora_data { uint32_t lora_alpha; }; +struct llama_file { + // use FILE * so we don't have to re-open the file to mmap + FILE * fp; + size_t size; + + llama_file(const char * fname, const char * mode) { + fp = std::fopen(fname, mode); + if (fp == NULL) { + size = 0; + } else { + seek(0, SEEK_END); + size = tell(); + seek(0, SEEK_SET); + } + } + + size_t tell() const { +#ifdef _WIN32 + __int64 ret = _ftelli64(fp); +#else + long ret = std::ftell(fp); +#endif + GGML_ASSERT(ret != -1); // this really shouldn't fail + return (size_t) ret; + } + + void seek(size_t offset, int whence) { +#ifdef _WIN32 + int ret = _fseeki64(fp, (__int64) offset, whence); +#else + int ret = std::fseek(fp, (long) offset, whence); +#endif + GGML_ASSERT(ret == 0); // same + } + + void read_raw(void * ptr, size_t size) { + if (size == 0) { + return; + } + errno = 0; + std::size_t ret = std::fread(ptr, size, 1, fp); + if (ferror(fp)) { + die_fmt("read error: %s", strerror(errno)); + } + if (ret != 1) { + die("unexpectedly reached end of file"); + } + } + + std::uint32_t read_u32() { + std::uint32_t ret; + read_raw(&ret, sizeof(ret)); + return ret; + } + + std::string read_string(std::uint32_t len) { + std::vector chars(len); + read_raw(chars.data(), len); + return std::string(chars.data(), len); + } + + void write_raw(const void * ptr, size_t size) { + if (size == 0) { + return; + } + errno = 0; + size_t ret = std::fwrite(ptr, size, 1, fp); + if (ret != 1) { + die_fmt("write error: %s", strerror(errno)); + } + } + + void write_u32(std::uint32_t val) { + write_raw(&val, sizeof(val)); + } + + bool eof() { + return tell() >= size; + } + + ~llama_file() { + if (fp) { + std::fclose(fp); + } + } +}; + static struct export_lora_params get_default_export_lora_params() { struct export_lora_params result; result.fn_model_base = ""; diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index 91d552a63..3da5317b3 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -946,6 +946,89 @@ static void save_checkpoint_lora_file(const char * filename, struct my_llama_mod gguf_free(fctx); } +struct llama_file { + // use FILE * so we don't have to re-open the file to mmap + FILE * fp; + size_t size; + + llama_file(const char * fname, const char * mode) { + fp = std::fopen(fname, mode); + if (fp == NULL) { + size = 0; + } else { + seek(0, SEEK_END); + size = tell(); + seek(0, SEEK_SET); + } + } + + size_t tell() const { +#ifdef _WIN32 + __int64 ret = _ftelli64(fp); +#else + long ret = std::ftell(fp); +#endif + GGML_ASSERT(ret != -1); // this really shouldn't fail + return (size_t) ret; + } + + void seek(size_t offset, int whence) { +#ifdef _WIN32 + int ret = _fseeki64(fp, (__int64) offset, whence); +#else + int ret = std::fseek(fp, (long) offset, whence); +#endif + GGML_ASSERT(ret == 0); // same + } + + void read_raw(void * ptr, size_t size) { + if (size == 0) { + return; + } + errno = 0; + std::size_t ret = std::fread(ptr, size, 1, fp); + if (ferror(fp)) { + die_fmt("read error: %s", strerror(errno)); + } + if (ret != 1) { + die("unexpectedly reached end of file"); + } + } + + std::uint32_t read_u32() { + std::uint32_t ret; + read_raw(&ret, sizeof(ret)); + return ret; + } + + std::string read_string(std::uint32_t len) { + std::vector chars(len); + read_raw(chars.data(), len); + return std::string(chars.data(), len); + } + + void write_raw(const void * ptr, size_t size) { + if (size == 0) { + return; + } + errno = 0; + size_t ret = std::fwrite(ptr, size, 1, fp); + if (ret != 1) { + die_fmt("write error: %s", strerror(errno)); + } + } + + void write_u32(std::uint32_t val) { + write_raw(&val, sizeof(val)); + } + + ~llama_file() { + if (fp) { + std::fclose(fp); + } + } +}; + static void write_tensor(struct llama_file * file, struct ggml_tensor * tensor, const char * name) { if (tensor == NULL) { file->write_u32(0); diff --git a/llama.cpp b/llama.cpp index 1dc0fa187..4225f9555 100644 --- a/llama.cpp +++ b/llama.cpp @@ -65,6 +65,8 @@ #include #include #include +#include +#include #include #include #include @@ -980,113 +982,81 @@ struct no_init { no_init() { /* do nothing */ } }; -llama_file::llama_file(const char * fname, const char * mode) { +struct llama_file { + // use FILE * so we don't have to re-open the file to mmap + FILE * fp; + size_t size; + + llama_file(const char * fname, const char * mode) { + fp = std::fopen(fname, mode); + if (fp == NULL) { + throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno))); + } + seek(0, SEEK_END); + size = tell(); + seek(0, SEEK_SET); + } + + size_t tell() const { #ifdef _WIN32 - // temporarily change the locale to the system default to handle Unicode file names - std::string oldLocale = std::setlocale(LC_ALL, nullptr); - std::setlocale(LC_ALL, ""); - - // convert multi-byte string to wide-char string - int wsize = MultiByteToWideChar(CP_UTF8, 0, fname, -1, nullptr, 0); - std::vector wfname(wsize); - MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname.data(), wsize); - - // determine the correct wide-character mode string - std::wstring wmode; - for(; *mode; ++mode) { - wmode += wchar_t(*mode); - } - - fp = _wfopen(wfname.data(), wmode.c_str()); - - std::setlocale(LC_ALL, oldLocale.c_str()); + __int64 ret = _ftelli64(fp); #else - fp = fopen(fname, mode); + long ret = std::ftell(fp); #endif - if (fp == NULL) { - throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno))); + GGML_ASSERT(ret != -1); // this really shouldn't fail + return (size_t) ret; } - seek(0, SEEK_END); - size = tell(); - seek(0, SEEK_SET); -} -size_t llama_file::tell() const { + void seek(size_t offset, int whence) const { #ifdef _WIN32 - __int64 ret = _ftelli64(fp); + int ret = _fseeki64(fp, (__int64) offset, whence); #else - long ret = std::ftell(fp); + int ret = std::fseek(fp, (long) offset, whence); #endif - GGML_ASSERT(ret != -1); // this really shouldn't fail - return (size_t) ret; -} - -void llama_file::seek(size_t offset, int whence) const { -#ifdef _WIN32 - int ret = _fseeki64(fp, (__int64) offset, whence); -#else - int ret = std::fseek(fp, (long) offset, whence); -#endif - GGML_ASSERT(ret == 0); // same -} - -void llama_file::read_raw(void * ptr, size_t len) const { - if (len == 0) { - return; + GGML_ASSERT(ret == 0); // same } - errno = 0; - std::size_t ret = std::fread(ptr, len, 1, fp); - if (ferror(fp)) { - throw std::runtime_error(format("read error: %s", strerror(errno))); + + void read_raw(void * ptr, size_t len) const { + if (len == 0) { + return; + } + errno = 0; + std::size_t ret = std::fread(ptr, len, 1, fp); + if (ferror(fp)) { + throw std::runtime_error(format("read error: %s", strerror(errno))); + } + if (ret != 1) { + throw std::runtime_error("unexpectedly reached end of file"); + } } - if (ret != 1) { - throw std::runtime_error("unexpectedly reached end of file"); + + uint32_t read_u32() const { + uint32_t ret; + read_raw(&ret, sizeof(ret)); + return ret; } -} -uint32_t llama_file::read_u32() const { - uint32_t ret; - read_raw(&ret, sizeof(ret)); - return ret; -} - - -float_t llama_file::read_f32() const { - std::float_t ret; - read_raw(&ret, sizeof(ret)); - return ret; -} - -std::string llama_file::read_string(std::uint32_t len) const { - std::vector chars(len); - read_raw(chars.data(), len); - return std::string(chars.data(), len); -} - -void llama_file::write_raw(const void * ptr, size_t len) const { - if (len == 0) { - return; + void write_raw(const void * ptr, size_t len) const { + if (len == 0) { + return; + } + errno = 0; + size_t ret = std::fwrite(ptr, len, 1, fp); + if (ret != 1) { + throw std::runtime_error(format("write error: %s", strerror(errno))); + } } - errno = 0; - size_t ret = std::fwrite(ptr, len, 1, fp); - if (ret != 1) { - throw std::runtime_error(format("write error: %s", strerror(errno))); + + void write_u32(std::uint32_t val) const { + write_raw(&val, sizeof(val)); } -} -void llama_file::write_u32(std::uint32_t val) const { - write_raw(&val, sizeof(val)); -} - -bool llama_file::eof() const { - return tell() >= size; -} - -llama_file::~llama_file() { - if (fp) { - std::fclose(fp); + ~llama_file() { + if (fp) { + std::fclose(fp); + } } -} +}; struct llama_mmap { void * addr; diff --git a/llama.h b/llama.h index 35b9fdb05..3dc162b07 100644 --- a/llama.h +++ b/llama.h @@ -950,33 +950,6 @@ extern "C" { } #endif -#ifdef __cplusplus -#include -#include -#include -#include -#include -#include - -struct llama_file { - FILE *fp; - size_t size; - - llama_file(const char* fname, const char* mode); - ~llama_file(); - - size_t tell() const; - void seek(size_t offset, int whence) const; - void read_raw(void* ptr, size_t len) const; - uint32_t read_u32() const; - float_t read_f32() const; - std::string read_string(std::uint32_t len) const; - void write_raw(const void* ptr, size_t len) const; - void write_u32(std::uint32_t val) const; - bool eof() const; -}; -#endif - // Internal API to be implemented by llama.cpp and used by tests/benchmarks only #ifdef LLAMA_API_INTERNAL