add dry run option

2024-03-27 15:25:24 +01:00 · 2024-03-27 15:25:24 +01:00 · 8569ba30c3
commit 8569ba30c3
parent 583022c5c7
1 changed files with 12 additions and 5 deletions
--- a/examples/gguf-split/gguf-split.cpp
+++ b/examples/gguf-split/gguf-split.cpp
@ -32,6 +32,7 @@ struct split_params {
    int n_split_tensors = 128;
    std::string input;
    std::string output;
    bool dry_run = false;
 };
 static void split_print_usage(const char * executable) {
@ -48,6 +49,7 @@ static void split_print_usage(const char * executable) {
    printf("  --merge                 merge multiple GGUF to a single GGUF\n");
    printf("  --split-max-tensors     max tensors in each split (default: %d)\n", default_params.n_split_tensors);
    printf("  --split-max-size N(M|G) max size per split\n");
    printf("  --dry-run               only print out a split plan and exit, without writing any new files\n");
    printf("\n");
 }
@ -91,6 +93,10 @@ static void split_params_parse_ex(int argc, const char ** argv, split_params & p
            fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
            exit(0);
        }
        if (arg == "--dry-run") {
            arg_found = true;
            params.dry_run = true;
        }
        if (is_op_set) {
            throw std::invalid_argument("error: either --split or --merge can be specified, but not both");
@ -214,8 +220,7 @@ struct split_strategy {
        for (int i = 0; i < n_tensors; ++i) {
            struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
            // calculate the "imaginary" size including this tensor
-            size_t n_bytes = ggml_nbytes(t);
+            size_t n_bytes = GGML_PAD(ggml_nbytes(t), GGUF_DEFAULT_ALIGNMENT);
            //n_bytes += GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT);
            size_t next_tensors_size = curr_tensors_size + n_bytes;
            size_t next_metadata_size = gguf_get_meta_size(ctx_out)
                + GGUF_DEFAULT_ALIGNMENT
@ -314,7 +319,7 @@ struct split_strategy {
    }
    void copy_file_to_file(std::ifstream & f_in, std::ofstream & f_out, const size_t in_offset, const size_t len) {
-        // TODO: prevent copying buffer to user space then write it back
+        // TODO: detect OS and use copy_file_range() here for better performance
        if (read_buf.size() < len) {
            read_buf.resize(len);
        }
@ -349,8 +354,10 @@ static void gguf_split(const split_params & split_params) {
    int n_split = strategy.ctx_outs.size();
    strategy.print_info();
-    // write all output splits
+    if (!split_params.dry_run) {
-    strategy.write();
+        // write all output splits
        strategy.write();
    }
    // done, clean up
    gguf_free(ctx_gguf);