split : minor style + fix compile warnings

This commit is contained in:
Georgi Gerganov 2024-03-19 11:17:29 +02:00
parent b3a94dd9e0
commit 7f0e73b27a
No known key found for this signature in database
GPG key ID: 449E073F9DC10735

View file

@ -18,9 +18,9 @@
#include <unistd.h> #include <unistd.h>
enum split_operation : uint8_t { enum split_operation : uint8_t {
SPLIT_OP_SPLIT, SPLIT_OP_SPLIT,
SPLIT_OP_MERGE, SPLIT_OP_MERGE,
SPLIT_OP_UPLOAD, SPLIT_OP_UPLOAD,
}; };
static const char * const LLM_KV_GENERAL_SPLIT_I_SPLIT = "general.split"; static const char * const LLM_KV_GENERAL_SPLIT_I_SPLIT = "general.split";
@ -31,10 +31,10 @@ static const int SPLIT_FILENAME_MAX = 256;
static const char * const SPLIT_FILENAME_FORMAT = "%s-%05d-of-%05d.gguf"; static const char * const SPLIT_FILENAME_FORMAT = "%s-%05d-of-%05d.gguf";
struct split_params { struct split_params {
split_operation operation = SPLIT_OP_SPLIT; split_operation operation = SPLIT_OP_SPLIT;
int n_split_tensors = 128; int n_split_tensors = 128;
std::string input; std::string input;
std::string output; std::string output;
}; };
static void split_print_usage(const char * executable) { static void split_print_usage(const char * executable) {
@ -149,104 +149,102 @@ static std::string split_file_name(const std::string & path, int i_split, int n_
} }
struct split_strategy { struct split_strategy {
const split_params params;
std::ifstream & f_input;
struct gguf_context * ctx_gguf;
struct ggml_context * ctx_meta = NULL;
const int n_tensors;
const split_params params; const int n_split;
std::ifstream & f_input; int i_split = 0;
struct gguf_context * ctx_gguf;
struct ggml_context * ctx_meta = NULL;
const int n_tensors;
const int n_split; int i_tensor = 0;
int i_split = 0;
int i_tensor = 0; std::vector<uint8_t> read_data;
std::vector<uint8_t> read_data; struct gguf_context * ctx_out;
std::ofstream fout;
struct gguf_context * ctx_out; split_strategy(const split_params & params,
std::ofstream fout; std::ifstream & f_input,
struct gguf_context * ctx_gguf,
struct ggml_context * ctx_meta) :
params(params),
f_input(f_input),
ctx_gguf(ctx_gguf),
ctx_meta(ctx_meta),
n_tensors(gguf_get_n_tensors(ctx_gguf)),
n_split(std::ceil(1. * n_tensors / params.n_split_tensors)) {
}
split_strategy(const split_params & params, bool should_split() const {
std::ifstream & f_input, return i_tensor < n_tensors && i_tensor % params.n_split_tensors == 0;
struct gguf_context * ctx_gguf, }
struct ggml_context * ctx_meta) :
params(params),
f_input(f_input),
ctx_gguf(ctx_gguf),
ctx_meta(ctx_meta),
n_tensors(gguf_get_n_tensors(ctx_gguf)),
n_split(std::ceil(1. * n_tensors / params.n_split_tensors)) {
}
bool should_split() const { void split_start() {
return i_tensor < n_tensors && i_tensor % params.n_split_tensors == 0; ctx_out = gguf_init_empty();
}
void split_start() { // Save all metadata in first split only
ctx_out = gguf_init_empty(); if (i_split == 0) {
gguf_set_kv(ctx_out, ctx_gguf);
}
gguf_set_val_u8(ctx_out, LLM_KV_GENERAL_SPLIT_I_SPLIT, i_split);
gguf_set_val_u8(ctx_out, LLM_KV_GENERAL_SPLIT_N_SPLIT, n_split);
// Save all metadata in first split only // populate the original tensors, so we get an initial metadata
if (i_split == 0) { for (int i = i_split * params.n_split_tensors; i < n_tensors && i < (i_split + 1) * params.n_split_tensors; ++i) {
gguf_set_kv(ctx_out, ctx_gguf); struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
} gguf_add_tensor(ctx_out, meta);
gguf_set_val_u8(ctx_out, LLM_KV_GENERAL_SPLIT_I_SPLIT, i_split); }
gguf_set_val_u8(ctx_out, LLM_KV_GENERAL_SPLIT_N_SPLIT, n_split);
// populate the original tensors, so we get an initial metadata auto split_name = split_file_name(params.output, i_split, n_split);
for (int i = i_split * params.n_split_tensors; i < n_tensors
&& i < (i_split + 1) * params.n_split_tensors; ++i) {
struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
gguf_add_tensor(ctx_out, meta);
}
auto split_name = split_file_name(params.output, i_split, n_split); fprintf(stderr, "%s: %s ...", __func__, split_name.c_str());
fout = std::ofstream(split_name, std::ios::binary);
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
fprintf(stderr, "%s: %s ...", __func__, split_name.c_str()); auto meta_size = gguf_get_meta_size(ctx_out);
fout = std::ofstream(split_name, std::ios::binary);
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
auto meta_size = gguf_get_meta_size(ctx_out); // placeholder for the meta data
::zeros(fout, meta_size);
// placeholder for the meta data i_split++;
::zeros(fout, meta_size); }
i_split++; void next_tensor() {
} const char * t_name = gguf_get_tensor_name(ctx_gguf, i_tensor);
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
auto n_bytes = ggml_nbytes(t);
void next_tensor() { if (read_data.size() < n_bytes) {
const char * t_name = gguf_get_tensor_name(ctx_gguf, i_tensor); read_data.resize(n_bytes);
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name); }
auto n_bytes = ggml_nbytes(t);
if (read_data.size() < n_bytes) { auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor);
read_data.resize(n_bytes); f_input.seekg(offset);
} f_input.read((char *)read_data.data(), n_bytes);
auto offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i_tensor); t->data = read_data.data();
f_input.seekg(offset);
f_input.read((char *)read_data.data(), n_bytes);
t->data = read_data.data(); // write tensor data + padding
fout.write((const char *)t->data, n_bytes);
zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
// write tensor data + padding i_tensor++;
fout.write((const char *)t->data, n_bytes); }
zeros(fout, GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT) - n_bytes);
i_tensor++; void split_end() {
} // go back to beginning of file and write the updated metadata
fout.seekp(0);
std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
gguf_get_meta_data(ctx_out, data.data());
fout.write((const char *)data.data(), data.size());
void split_end() { fout.close();
// go back to beginning of file and write the updated metadata gguf_free(ctx_out);
fout.seekp(0);
std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
gguf_get_meta_data(ctx_out, data.data());
fout.write((const char *)data.data(), data.size());
fout.close(); fprintf(stderr, "\033[3Ddone\n");
gguf_free(ctx_out); }
fprintf(stderr, "\033[3Ddone\n");
}
}; };
static void gguf_split(const split_params & split_params) { static void gguf_split(const split_params & split_params) {
@ -481,10 +479,8 @@ static void gguf_merge(const split_params & split_params) {
static void gguf_upload(const split_params & /*params*/) { static void gguf_upload(const split_params & /*params*/) {
#ifdef LLAMA_USE_CURL #ifdef LLAMA_USE_CURL
fprintf(stderr, "%s: NOT IMPLEMENTED\n", __func__); fprintf(stderr, "%s: NOT IMPLEMENTED\n", __func__);
exit(-1);
#else #else
fprintf(stderr, "%s: operation upload not supported, please build with -DLLAMA_CURL\n", __func__); fprintf(stderr, "%s: operation upload not supported, please build with -DLLAMA_CURL\n", __func__);
exit(1);
#endif // LLAMA_USE_CURL #endif // LLAMA_USE_CURL
} }
@ -497,14 +493,15 @@ int main(int argc, const char ** argv) {
split_params_parse(argc, argv, params); split_params_parse(argc, argv, params);
switch (params.operation) { switch (params.operation) {
case SPLIT_OP_SPLIT:gguf_split(params); case SPLIT_OP_SPLIT: gguf_split(params);
break; break;
case SPLIT_OP_MERGE:gguf_merge(params); case SPLIT_OP_MERGE: gguf_merge(params);
break; break;
case SPLIT_OP_UPLOAD:gguf_upload(params); case SPLIT_OP_UPLOAD: gguf_upload(params);
break; break;
default:split_print_usage(argv[0]); default:split_print_usage(argv[0]);
exit(1); exit(1);
} }
exit(0);
return 0;
} }