diff --git a/examples/gguf-split/gguf-split.cpp b/examples/gguf-split/gguf-split.cpp
index 419b01ca5..044d63a67 100644
--- a/examples/gguf-split/gguf-split.cpp
+++ b/examples/gguf-split/gguf-split.cpp
@@ -32,6 +32,7 @@ struct split_params {
     int n_split_tensors = 128;
     std::string input;
     std::string output;
+    bool dry_run = false;
 };
 
 static void split_print_usage(const char * executable) {
@@ -48,6 +49,7 @@ static void split_print_usage(const char * executable) {
     printf("  --merge                 merge multiple GGUF to a single GGUF\n");
     printf("  --split-max-tensors     max tensors in each split (default: %d)\n", default_params.n_split_tensors);
     printf("  --split-max-size N(M|G) max size per split\n");
+    printf("  --dry-run               only print out a split plan and exit, without writing any new files\n");
     printf("\n");
 }
 
@@ -91,6 +93,10 @@ static void split_params_parse_ex(int argc, const char ** argv, split_params & p
             fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET);
             exit(0);
         }
+        if (arg == "--dry-run") {
+            arg_found = true;
+            params.dry_run = true;
+        }
 
         if (is_op_set) {
             throw std::invalid_argument("error: either --split or --merge can be specified, but not both");
@@ -214,8 +220,7 @@ struct split_strategy {
         for (int i = 0; i < n_tensors; ++i) {
             struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
             // calculate the "imaginary" size including this tensor
-            size_t n_bytes = ggml_nbytes(t);
-            //n_bytes += GGML_PAD(n_bytes, GGUF_DEFAULT_ALIGNMENT);
+            size_t n_bytes = GGML_PAD(ggml_nbytes(t), GGUF_DEFAULT_ALIGNMENT);
             size_t next_tensors_size = curr_tensors_size + n_bytes;
             size_t next_metadata_size = gguf_get_meta_size(ctx_out)
                 + GGUF_DEFAULT_ALIGNMENT
@@ -314,7 +319,7 @@ struct split_strategy {
     }
 
     void copy_file_to_file(std::ifstream & f_in, std::ofstream & f_out, const size_t in_offset, const size_t len) {
-        // TODO: prevent copying buffer to user space then write it back
+        // TODO: detect OS and use copy_file_range() here for better performance
         if (read_buf.size() < len) {
             read_buf.resize(len);
         }
@@ -349,8 +354,10 @@ static void gguf_split(const split_params & split_params) {
     int n_split = strategy.ctx_outs.size();
     strategy.print_info();
 
-    // write all output splits
-    strategy.write();
+    if (!split_params.dry_run) {
+        // write all output splits
+        strategy.write();
+    }
 
     // done, clean up
     gguf_free(ctx_gguf);