add customized split functionality, define tensor names set and split by name set

This commit is contained in:
zhhan 2024-07-09 11:13:17 -07:00
parent 0ab112abdb
commit 55fbe831ef
3 changed files with 82 additions and 14 deletions

View file

@ -32,8 +32,10 @@ struct split_params {
int n_split_tensors = 128; int n_split_tensors = 128;
std::string input; std::string input;
std::string output; std::string output;
std::string tensor_set_file;
bool no_tensor_first_split = false; bool no_tensor_first_split = false;
bool dry_run = false; bool dry_run = false;
bool customized_split = false;
}; };
static void split_print_usage(const char * executable) { static void split_print_usage(const char * executable) {
@ -47,6 +49,7 @@ static void split_print_usage(const char * executable) {
printf(" -h, --help show this help message and exit\n"); printf(" -h, --help show this help message and exit\n");
printf(" --version show version and build info\n"); printf(" --version show version and build info\n");
printf(" --split split GGUF to multiple GGUF (enabled by default)\n"); printf(" --split split GGUF to multiple GGUF (enabled by default)\n");
printf(" --tensor-set customize tensor set used to split. File contains modules, e.g. 'ffn_up.weight'");
printf(" --merge merge multiple GGUF to a single GGUF\n"); printf(" --merge merge multiple GGUF to a single GGUF\n");
printf(" --split-max-tensors max tensors in each split (default: %d)\n", default_params.n_split_tensors); printf(" --split-max-tensors max tensors in each split (default: %d)\n", default_params.n_split_tensors);
printf(" --split-max-size N(M|G) max size per split\n"); printf(" --split-max-size N(M|G) max size per split\n");
@ -121,6 +124,16 @@ static void split_params_parse_ex(int argc, const char ** argv, split_params & p
params.operation = SPLIT_OP_SPLIT; params.operation = SPLIT_OP_SPLIT;
} }
if (arg == "--tensor-set") {
arg_found = true;
if (++arg_idx >= argc) {
invalid_param = true;
break;
}
params.tensor_set_file = argv[arg_idx];
params.customized_split = true;
}
if (is_mode_set) { if (is_mode_set) {
throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both"); throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both");
} }
@ -180,12 +193,27 @@ static void zeros(std::ofstream & file, size_t n) {
} }
} }
static std::vector<std::string> read_customized_tensors(const std::string & tensor_set_file) {
std::vector<std::string> tensor_set;
std::ifstream f_tensor_set(tensor_set_file);
if (!f_tensor_set.is_open()) {
fprintf(stderr, "error: failed to open tensor set file %s\n", tensor_set_file.c_str());
exit(EXIT_FAILURE);
}
std::string line;
while (std::getline(f_tensor_set, line)) {
tensor_set.push_back(line);
}
return tensor_set;
}
struct split_strategy { struct split_strategy {
const split_params params; const split_params params;
std::ifstream & f_input; std::ifstream & f_input;
struct gguf_context * ctx_gguf; struct gguf_context * ctx_gguf;
struct ggml_context * ctx_meta = NULL; struct ggml_context * ctx_meta = NULL;
const int n_tensors; const int n_tensors;
std::string tensor_set_file;
// one ctx_out per one output file // one ctx_out per one output file
std::vector<struct gguf_context *> ctx_outs; std::vector<struct gguf_context *> ctx_outs;
@ -233,20 +261,45 @@ struct split_strategy {
new_ctx_out(true); new_ctx_out(true);
} }
// process tensors one by one if (!params.customized_split) {
size_t curr_tensors_size = 0; // current size by counting only tensors size (without metadata) // process tensors one by one
for (int i = 0; i < n_tensors; ++i) { size_t curr_tensors_size = 0; // current size by counting only tensors size (without metadata)
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i)); for (int i = 0; i < n_tensors; ++i) {
// calculate the "imaginary" size = the current size + next tensor size struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
size_t n_bytes = GGML_PAD(ggml_nbytes(t), GGUF_DEFAULT_ALIGNMENT); // calculate the "imaginary" size = the current size + next tensor size
size_t next_tensors_size = curr_tensors_size + n_bytes; size_t n_bytes = GGML_PAD(ggml_nbytes(t), GGUF_DEFAULT_ALIGNMENT);
if (should_split(i, next_tensors_size)) { size_t next_tensors_size = curr_tensors_size + n_bytes;
new_ctx_out(false); if (should_split(i, next_tensors_size)) {
curr_tensors_size = n_bytes; new_ctx_out(false);
} else { curr_tensors_size = n_bytes;
curr_tensors_size = next_tensors_size; } else {
curr_tensors_size = next_tensors_size;
}
gguf_add_tensor(ctx_out, t);
}
} else {
// custom split based on tensor set
std::vector<std::string> tensor_set = read_customized_tensors(params.tensor_set_file);
if(tensor_set.empty()) {
fprintf(stderr, "error: tensor set is empty\n");
exit(EXIT_FAILURE);
}
for (int i = 0; i < n_tensors; ++i) {
const char * t_name = gguf_get_tensor_name(ctx_gguf, i);
if (is_tensor_in_customized_set(t_name, tensor_set)) {
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
gguf_add_tensor(ctx_out, t);
}
}
new_ctx_out(false);
// add left tensors to the next split
for (int i = 0; i < n_tensors; ++i) {
const char * t_name = gguf_get_tensor_name(ctx_gguf, i);
if (!is_tensor_in_customized_set(t_name, tensor_set)) {
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
gguf_add_tensor(ctx_out, t);
}
} }
gguf_add_tensor(ctx_out, t);
} }
// push the last ctx_out // push the last ctx_out
@ -274,6 +327,16 @@ struct split_strategy {
} }
} }
bool is_tensor_in_customized_set(const char * t_name, std::vector<std::string> tensor_set) {
for (auto & s : tensor_set) {
if (strstr(t_name, s.c_str()) != NULL) {
return true;
}
}
return false;
}
void print_info() { void print_info() {
printf("n_split: %ld\n", ctx_outs.size()); printf("n_split: %ld\n", ctx_outs.size());
int i_split = 0; int i_split = 0;

View file

@ -11,6 +11,11 @@
#include <map> #include <map>
#include <string> #include <string>
#ifdef _WIN32
#else
#include <sys/stat.h>
#endif // _WIN32
#ifdef LLAMA_SHARED #ifdef LLAMA_SHARED
# if defined(_WIN32) && !defined(__MINGW32__) # if defined(_WIN32) && !defined(__MINGW32__)
# ifdef LLAMA_BUILD # ifdef LLAMA_BUILD

View file

@ -18022,7 +18022,7 @@ bool llama_switch_derived_model(struct llama_context* ctx, const std::string der
auto& cparams = ctx->cparams; auto& cparams = ctx->cparams;
cparams.derived_model_name = (ctx->derived_models.find(derived_model_name) == ctx->derived_models.end()) ? BASE_MODEL : derived_model_name; cparams.derived_model_name = (ctx->derived_models.find(derived_model_name) == ctx->derived_models.end()) ? BASE_MODEL : derived_model_name;
LLAMA_LOG_INFO("%s: %s\n", __func__, cparams.derived_model_name); LLAMA_LOG_INFO("%s: %s\n", __func__, cparams.derived_model_name.c_str());
return true; return true;
} }