Revert "move repeated llama_file logic to llama.cpp"

This reverts commit a8a922ca18.
This commit is contained in:
Bruce MacDonald 2024-03-11 10:55:38 -04:00
parent 5125896beb
commit 8a11598b1e
6 changed files with 388 additions and 119 deletions

View file

@ -709,6 +709,90 @@ void save_train_state_gguf(struct gguf_context * fctx, struct train_state * trai
save_opt_context_gguf(fctx, train->opt);
}
struct llama_file {
// use FILE * so we don't have to re-open the file to mmap
FILE * fp;
size_t size;
llama_file(const char * fname, const char * mode) {
fp = std::fopen(fname, mode);
if (fp == NULL) {
size = 0;
} else {
seek(0, SEEK_END);
size = tell();
seek(0, SEEK_SET);
}
}
size_t tell() const {
#ifdef _WIN32
__int64 ret = _ftelli64(fp);
#else
long ret = std::ftell(fp);
#endif
GGML_ASSERT(ret != -1); // this really shouldn't fail
return (size_t) ret;
}
void seek(size_t offset, int whence) {
#ifdef _WIN32
int ret = _fseeki64(fp, (__int64) offset, whence);
#else
int ret = std::fseek(fp, (long) offset, whence);
#endif
GGML_ASSERT(ret == 0); // same
}
void read_raw(void * ptr, size_t size) {
if (size == 0) {
return;
}
errno = 0;
std::size_t ret = std::fread(ptr, size, 1, fp);
if (ferror(fp)) {
die_fmt("read error: %s", strerror(errno));
}
if (ret != 1) {
die("unexpectedly reached end of file");
}
}
std::uint32_t read_u32() {
std::uint32_t ret;
read_raw(&ret, sizeof(ret));
return ret;
}
std::string read_string(std::uint32_t len) {
std::vector<char> chars(len);
read_raw(chars.data(), len);
return std::string(chars.data(), len);
}
void write_raw(const void * ptr, size_t size) {
if (size == 0) {
return;
}
errno = 0;
size_t ret = std::fwrite(ptr, size, 1, fp);
if (ret != 1) {
die_fmt("write error: %s", strerror(errno));
}
}
void write_u32(std::uint32_t val) {
write_raw(&val, sizeof(val));
}
~llama_file() {
if (fp) {
std::fclose(fp);
}
}
};
static size_t utf8_len(char src) {
const size_t lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
uint8_t highbits = static_cast<uint8_t>(src) >> 4;

View file

@ -458,6 +458,79 @@ static std::string format(const char * fmt, ...) {
return std::string(buf.data(), size);
}
struct llama_file {
// use FILE * so we don't have to re-open the file to mmap
FILE * fp;
size_t size;
llama_file(const char * fname, const char * mode) {
fp = std::fopen(fname, mode);
if (fp == NULL) {
size = 0;
} else {
seek(0, SEEK_END);
size = tell();
seek(0, SEEK_SET);
}
}
size_t tell() const {
#ifdef _WIN32
__int64 ret = _ftelli64(fp);
#else
long ret = std::ftell(fp);
#endif
GGML_ASSERT(ret != -1); // this really shouldn't fail
return (size_t) ret;
}
void seek(size_t offset, int whence) {
#ifdef _WIN32
int ret = _fseeki64(fp, (__int64) offset, whence);
#else
int ret = std::fseek(fp, (long) offset, whence);
#endif
GGML_ASSERT(ret == 0); // same
}
void read_raw(void * ptr, size_t size) {
if (size == 0) {
return;
}
errno = 0;
std::size_t ret = std::fread(ptr, size, 1, fp);
if (ferror(fp)) {
die_fmt("fread failed: %s", strerror(errno));
}
if (ret != 1) {
die("unexpectedly reached end of file");
}
}
std::uint32_t read_u32() {
std::uint32_t ret;
read_raw(&ret, sizeof(ret));
return ret;
}
std::float_t read_f32() {
std::float_t ret;
read_raw(&ret, sizeof(ret));
return ret;
}
std::string read_string(std::uint32_t len) {
std::vector<char> chars(len);
read_raw(chars.data(), len);
return std::string(chars.data(), len);
}
~llama_file() {
if (fp) {
std::fclose(fp);
}
}
};
static bool is_ggml_file(const char * filename) {
llama_file file(filename, "rb");
if (file.size < 4) {

View file

@ -2,7 +2,6 @@
#include "common.h"
#include "ggml.h"
#include "ggml-alloc.h"
#include "llama.h"
#include <vector>
#include <string>
@ -29,6 +28,93 @@ struct lora_data {
uint32_t lora_alpha;
};
struct llama_file {
// use FILE * so we don't have to re-open the file to mmap
FILE * fp;
size_t size;
llama_file(const char * fname, const char * mode) {
fp = std::fopen(fname, mode);
if (fp == NULL) {
size = 0;
} else {
seek(0, SEEK_END);
size = tell();
seek(0, SEEK_SET);
}
}
size_t tell() const {
#ifdef _WIN32
__int64 ret = _ftelli64(fp);
#else
long ret = std::ftell(fp);
#endif
GGML_ASSERT(ret != -1); // this really shouldn't fail
return (size_t) ret;
}
void seek(size_t offset, int whence) {
#ifdef _WIN32
int ret = _fseeki64(fp, (__int64) offset, whence);
#else
int ret = std::fseek(fp, (long) offset, whence);
#endif
GGML_ASSERT(ret == 0); // same
}
void read_raw(void * ptr, size_t size) {
if (size == 0) {
return;
}
errno = 0;
std::size_t ret = std::fread(ptr, size, 1, fp);
if (ferror(fp)) {
die_fmt("read error: %s", strerror(errno));
}
if (ret != 1) {
die("unexpectedly reached end of file");
}
}
std::uint32_t read_u32() {
std::uint32_t ret;
read_raw(&ret, sizeof(ret));
return ret;
}
std::string read_string(std::uint32_t len) {
std::vector<char> chars(len);
read_raw(chars.data(), len);
return std::string(chars.data(), len);
}
void write_raw(const void * ptr, size_t size) {
if (size == 0) {
return;
}
errno = 0;
size_t ret = std::fwrite(ptr, size, 1, fp);
if (ret != 1) {
die_fmt("write error: %s", strerror(errno));
}
}
void write_u32(std::uint32_t val) {
write_raw(&val, sizeof(val));
}
bool eof() {
return tell() >= size;
}
~llama_file() {
if (fp) {
std::fclose(fp);
}
}
};
static struct export_lora_params get_default_export_lora_params() {
struct export_lora_params result;
result.fn_model_base = "";

View file

@ -946,6 +946,89 @@ static void save_checkpoint_lora_file(const char * filename, struct my_llama_mod
gguf_free(fctx);
}
struct llama_file {
// use FILE * so we don't have to re-open the file to mmap
FILE * fp;
size_t size;
llama_file(const char * fname, const char * mode) {
fp = std::fopen(fname, mode);
if (fp == NULL) {
size = 0;
} else {
seek(0, SEEK_END);
size = tell();
seek(0, SEEK_SET);
}
}
size_t tell() const {
#ifdef _WIN32
__int64 ret = _ftelli64(fp);
#else
long ret = std::ftell(fp);
#endif
GGML_ASSERT(ret != -1); // this really shouldn't fail
return (size_t) ret;
}
void seek(size_t offset, int whence) {
#ifdef _WIN32
int ret = _fseeki64(fp, (__int64) offset, whence);
#else
int ret = std::fseek(fp, (long) offset, whence);
#endif
GGML_ASSERT(ret == 0); // same
}
void read_raw(void * ptr, size_t size) {
if (size == 0) {
return;
}
errno = 0;
std::size_t ret = std::fread(ptr, size, 1, fp);
if (ferror(fp)) {
die_fmt("read error: %s", strerror(errno));
}
if (ret != 1) {
die("unexpectedly reached end of file");
}
}
std::uint32_t read_u32() {
std::uint32_t ret;
read_raw(&ret, sizeof(ret));
return ret;
}
std::string read_string(std::uint32_t len) {
std::vector<char> chars(len);
read_raw(chars.data(), len);
return std::string(chars.data(), len);
}
void write_raw(const void * ptr, size_t size) {
if (size == 0) {
return;
}
errno = 0;
size_t ret = std::fwrite(ptr, size, 1, fp);
if (ret != 1) {
die_fmt("write error: %s", strerror(errno));
}
}
void write_u32(std::uint32_t val) {
write_raw(&val, sizeof(val));
}
~llama_file() {
if (fp) {
std::fclose(fp);
}
}
};
static void write_tensor(struct llama_file * file, struct ggml_tensor * tensor, const char * name) {
if (tensor == NULL) {
file->write_u32(0);

View file

@ -65,6 +65,8 @@
#include <cstdarg>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <ctime>
#include <cwctype>
#include <forward_list>
@ -980,29 +982,13 @@ struct no_init {
no_init() { /* do nothing */ }
};
llama_file::llama_file(const char * fname, const char * mode) {
#ifdef _WIN32
// temporarily change the locale to the system default to handle Unicode file names
std::string oldLocale = std::setlocale(LC_ALL, nullptr);
std::setlocale(LC_ALL, "");
struct llama_file {
// use FILE * so we don't have to re-open the file to mmap
FILE * fp;
size_t size;
// convert multi-byte string to wide-char string
int wsize = MultiByteToWideChar(CP_UTF8, 0, fname, -1, nullptr, 0);
std::vector<wchar_t> wfname(wsize);
MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname.data(), wsize);
// determine the correct wide-character mode string
std::wstring wmode;
for(; *mode; ++mode) {
wmode += wchar_t(*mode);
}
fp = _wfopen(wfname.data(), wmode.c_str());
std::setlocale(LC_ALL, oldLocale.c_str());
#else
fp = fopen(fname, mode);
#endif
llama_file(const char * fname, const char * mode) {
fp = std::fopen(fname, mode);
if (fp == NULL) {
throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
}
@ -1011,7 +997,7 @@ llama_file::llama_file(const char * fname, const char * mode) {
seek(0, SEEK_SET);
}
size_t llama_file::tell() const {
size_t tell() const {
#ifdef _WIN32
__int64 ret = _ftelli64(fp);
#else
@ -1021,7 +1007,7 @@ size_t llama_file::tell() const {
return (size_t) ret;
}
void llama_file::seek(size_t offset, int whence) const {
void seek(size_t offset, int whence) const {
#ifdef _WIN32
int ret = _fseeki64(fp, (__int64) offset, whence);
#else
@ -1030,7 +1016,7 @@ void llama_file::seek(size_t offset, int whence) const {
GGML_ASSERT(ret == 0); // same
}
void llama_file::read_raw(void * ptr, size_t len) const {
void read_raw(void * ptr, size_t len) const {
if (len == 0) {
return;
}
@ -1044,26 +1030,13 @@ void llama_file::read_raw(void * ptr, size_t len) const {
}
}
uint32_t llama_file::read_u32() const {
uint32_t read_u32() const {
uint32_t ret;
read_raw(&ret, sizeof(ret));
return ret;
}
float_t llama_file::read_f32() const {
std::float_t ret;
read_raw(&ret, sizeof(ret));
return ret;
}
std::string llama_file::read_string(std::uint32_t len) const {
std::vector<char> chars(len);
read_raw(chars.data(), len);
return std::string(chars.data(), len);
}
void llama_file::write_raw(const void * ptr, size_t len) const {
void write_raw(const void * ptr, size_t len) const {
if (len == 0) {
return;
}
@ -1074,19 +1047,16 @@ void llama_file::write_raw(const void * ptr, size_t len) const {
}
}
void llama_file::write_u32(std::uint32_t val) const {
void write_u32(std::uint32_t val) const {
write_raw(&val, sizeof(val));
}
bool llama_file::eof() const {
return tell() >= size;
}
llama_file::~llama_file() {
~llama_file() {
if (fp) {
std::fclose(fp);
}
}
};
struct llama_mmap {
void * addr;

27
llama.h
View file

@ -950,33 +950,6 @@ extern "C" {
}
#endif
#ifdef __cplusplus
#include <cfloat>
#include <cstdio>
#include <cstring>
#include <stdexcept>
#include <string>
#include <vector>
struct llama_file {
FILE *fp;
size_t size;
llama_file(const char* fname, const char* mode);
~llama_file();
size_t tell() const;
void seek(size_t offset, int whence) const;
void read_raw(void* ptr, size_t len) const;
uint32_t read_u32() const;
float_t read_f32() const;
std::string read_string(std::uint32_t len) const;
void write_raw(const void* ptr, size_t len) const;
void write_u32(std::uint32_t val) const;
bool eof() const;
};
#endif
// Internal API to be implemented by llama.cpp and used by tests/benchmarks only
#ifdef LLAMA_API_INTERNAL