add dry run option

This commit is contained in:
ngxson 2024-03-27 15:25:24 +01:00
parent 583022c5c7
commit 8569ba30c3

View file

@ -32,6 +32,7 @@ struct split_params {
int n_split_tensors = 128; int n_split_tensors = 128;
std::string input; std::string input;
std::string output; std::string output;
bool dry_run = false;
}; };
static void split_print_usage(const char * executable) { static void split_print_usage(const char * executable) {
@ -48,6 +49,7 @@ static void split_print_usage(const char * executable) {
printf(" --merge merge multiple GGUF to a single GGUF\n"); printf(" --merge merge multiple GGUF to a single GGUF\n");
printf(" --split-max-tensors max tensors in each split (default: %d)\n", default_params.n_split_tensors); printf(" --split-max-tensors max tensors in each split (default: %d)\n", default_params.n_split_tensors);
printf(" --split-max-size N(M|G) max size per split\n"); printf(" --split-max-size N(M|G) max size per split\n");
printf(" --dry-run only print out a split plan and exit, without writing any new files\n");
printf("\n"); printf("\n");
} }
@ -91,6 +93,10 @@ static void split_params_parse_ex(int argc, const char ** argv, split_params & p
fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET); fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
exit(0); exit(0);
} }
if (arg == "--dry-run") {
arg_found = true;
params.dry_run = true;
}
if (is_op_set) { if (is_op_set) {
throw std::invalid_argument("error: either --split or --merge can be specified, but not both"); throw std::invalid_argument("error: either --split or --merge can be specified, but not both");
@ -214,8 +220,7 @@ struct split_strategy {
for (int i = 0; i < n_tensors; ++i) { for (int i = 0; i < n_tensors; ++i) {
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i)); struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
// calculate the "imaginary" size including this tensor // calculate the "imaginary" size including this tensor
size_t n_bytes = ggml_nbytes(t); size_t n_bytes = GGML_PAD(ggml_nbytes(t), GGUF_DEFAULT_ALIGNMENT);
//n_bytes += GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT);
size_t next_tensors_size = curr_tensors_size + n_bytes; size_t next_tensors_size = curr_tensors_size + n_bytes;
size_t next_metadata_size = gguf_get_meta_size(ctx_out) size_t next_metadata_size = gguf_get_meta_size(ctx_out)
+ GGUF_DEFAULT_ALIGNMENT + GGUF_DEFAULT_ALIGNMENT
@ -314,7 +319,7 @@ struct split_strategy {
} }
void copy_file_to_file(std::ifstream & f_in, std::ofstream & f_out, const size_t in_offset, const size_t len) { void copy_file_to_file(std::ifstream & f_in, std::ofstream & f_out, const size_t in_offset, const size_t len) {
// TODO: prevent copying buffer to user space then write it back // TODO: detect OS and use copy_file_range() here for better performance
if (read_buf.size() < len) { if (read_buf.size() < len) {
read_buf.resize(len); read_buf.resize(len);
} }
@ -349,8 +354,10 @@ static void gguf_split(const split_params & split_params) {
int n_split = strategy.ctx_outs.size(); int n_split = strategy.ctx_outs.size();
strategy.print_info(); strategy.print_info();
// write all output splits if (!split_params.dry_run) {
strategy.write(); // write all output splits
strategy.write();
}
// done, clean up // done, clean up
gguf_free(ctx_gguf); gguf_free(ctx_gguf);