add customized split functionality, define tensor names set and split by name set

2024-07-09 11:13:17 -07:00 · 2024-07-09 11:13:17 -07:00 · 55fbe831ef
commit 55fbe831ef
parent 0ab112abdb
3 changed files with 82 additions and 14 deletions
--- a/examples/gguf-split/gguf-split.cpp
+++ b/examples/gguf-split/gguf-split.cpp
@ -32,8 +32,10 @@ struct split_params {
    int n_split_tensors = 128;
    std::string input;
    std::string output;
    std::string tensor_set_file;
    bool no_tensor_first_split = false;
    bool dry_run = false;
    bool customized_split = false;
 };
 static void split_print_usage(const char * executable) {
@ -47,6 +49,7 @@ static void split_print_usage(const char * executable) {
    printf("  -h, --help              show this help message and exit\n");
    printf("  --version               show version and build info\n");
    printf("  --split                 split GGUF to multiple GGUF (enabled by default)\n");
    printf("  --tensor-set            customize tensor set used to split. File contains modules, e.g. 'ffn_up.weight'");
    printf("  --merge                 merge multiple GGUF to a single GGUF\n");
    printf("  --split-max-tensors     max tensors in each split (default: %d)\n", default_params.n_split_tensors);
    printf("  --split-max-size N(M|G) max size per split\n");
@ -121,6 +124,16 @@ static void split_params_parse_ex(int argc, const char ** argv, split_params & p
            params.operation = SPLIT_OP_SPLIT;
        }
        if (arg == "--tensor-set") {
            arg_found = true;
            if (++arg_idx >= argc) {
                invalid_param = true;
                break;
            }
            params.tensor_set_file = argv[arg_idx];
            params.customized_split = true;
        }
        if (is_mode_set) {
            throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both");
        }
@ -180,12 +193,27 @@ static void zeros(std::ofstream & file, size_t n) {
    }
 }
 static std::vector<std::string> read_customized_tensors(const std::string & tensor_set_file) {
    std::vector<std::string> tensor_set;
    std::ifstream f_tensor_set(tensor_set_file);
    if (!f_tensor_set.is_open()) {
        fprintf(stderr, "error: failed to open tensor set file %s\n", tensor_set_file.c_str());
        exit(EXIT_FAILURE);
    }
    std::string line;
    while (std::getline(f_tensor_set, line)) {
        tensor_set.push_back(line);
    }
    return tensor_set;
 }
 struct split_strategy {
    const split_params params;
    std::ifstream & f_input;
    struct gguf_context * ctx_gguf;
    struct ggml_context * ctx_meta = NULL;
    const int n_tensors;
    std::string tensor_set_file;
    // one ctx_out per one output file
    std::vector<struct gguf_context *> ctx_outs;
@ -233,20 +261,45 @@ struct split_strategy {
            new_ctx_out(true);
        }
-        // process tensors one by one
+        if (!params.customized_split) {
-        size_t curr_tensors_size = 0; // current size by counting only tensors size (without metadata)
+            // process tensors one by one
-        for (int i = 0; i < n_tensors; ++i) {
+            size_t curr_tensors_size = 0; // current size by counting only tensors size (without metadata)
-            struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
+            for (int i = 0; i < n_tensors; ++i) {
-            // calculate the "imaginary" size = the current size + next tensor size
+                struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
-            size_t n_bytes = GGML_PAD(ggml_nbytes(t), GGUF_DEFAULT_ALIGNMENT);
+                // calculate the "imaginary" size = the current size + next tensor size
-            size_t next_tensors_size = curr_tensors_size + n_bytes;
+                size_t n_bytes = GGML_PAD(ggml_nbytes(t), GGUF_DEFAULT_ALIGNMENT);
-            if (should_split(i, next_tensors_size)) {
+                size_t next_tensors_size = curr_tensors_size + n_bytes;
-                new_ctx_out(false);
+                if (should_split(i, next_tensors_size)) {
-                curr_tensors_size = n_bytes;
+                    new_ctx_out(false);
-            } else {
+                    curr_tensors_size = n_bytes;
-                curr_tensors_size = next_tensors_size;
+                } else {
                    curr_tensors_size = next_tensors_size;
                }
                gguf_add_tensor(ctx_out, t);
            }
        } else {
            // custom split based on tensor set
            std::vector<std::string> tensor_set = read_customized_tensors(params.tensor_set_file);
            if(tensor_set.empty()) {
                fprintf(stderr, "error: tensor set is empty\n");
                exit(EXIT_FAILURE);
            }
            for (int i = 0; i < n_tensors; ++i) {
                const char * t_name = gguf_get_tensor_name(ctx_gguf, i);
                if (is_tensor_in_customized_set(t_name, tensor_set)) {
                    struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
                    gguf_add_tensor(ctx_out, t);
                }
            }
            new_ctx_out(false);
            // add left tensors to the next split
            for (int i = 0; i < n_tensors; ++i) {
                const char * t_name = gguf_get_tensor_name(ctx_gguf, i);
                if (!is_tensor_in_customized_set(t_name, tensor_set)) {
                    struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
                    gguf_add_tensor(ctx_out, t);
                }
            }
            gguf_add_tensor(ctx_out, t);
        }
        // push the last ctx_out
@ -274,6 +327,16 @@ struct split_strategy {
        }
    }
    bool is_tensor_in_customized_set(const char * t_name, std::vector<std::string> tensor_set) {
        for (auto & s : tensor_set) {
            if (strstr(t_name, s.c_str()) != NULL) {
                return true;
            }
        }
        return false;
    }
    void print_info() {
        printf("n_split: %ld\n", ctx_outs.size());
        int i_split = 0;
--- a/include/llama.h
+++ b/include/llama.h
@ -11,6 +11,11 @@
 #include <map>
 #include <string>
 #ifdef _WIN32
 #else
 #include <sys/stat.h>
 #endif // _WIN32
 #ifdef LLAMA_SHARED
 #    if defined(_WIN32) && !defined(__MINGW32__)
 #        ifdef LLAMA_BUILD
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -18022,7 +18022,7 @@ bool llama_switch_derived_model(struct llama_context* ctx, const std::string der
    auto& cparams = ctx->cparams;
    cparams.derived_model_name = (ctx->derived_models.find(derived_model_name) == ctx->derived_models.end()) ? BASE_MODEL : derived_model_name;
-    LLAMA_LOG_INFO("%s: %s\n", __func__, cparams.derived_model_name);
+    LLAMA_LOG_INFO("%s: %s\n", __func__, cparams.derived_model_name.c_str());
    return true;
 }