add customized split functionality, define tensor names set and split by name set
This commit is contained in:
parent
0ab112abdb
commit
55fbe831ef
3 changed files with 82 additions and 14 deletions
|
@ -32,8 +32,10 @@ struct split_params {
|
||||||
int n_split_tensors = 128;
|
int n_split_tensors = 128;
|
||||||
std::string input;
|
std::string input;
|
||||||
std::string output;
|
std::string output;
|
||||||
|
std::string tensor_set_file;
|
||||||
bool no_tensor_first_split = false;
|
bool no_tensor_first_split = false;
|
||||||
bool dry_run = false;
|
bool dry_run = false;
|
||||||
|
bool customized_split = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void split_print_usage(const char * executable) {
|
static void split_print_usage(const char * executable) {
|
||||||
|
@ -47,6 +49,7 @@ static void split_print_usage(const char * executable) {
|
||||||
printf(" -h, --help show this help message and exit\n");
|
printf(" -h, --help show this help message and exit\n");
|
||||||
printf(" --version show version and build info\n");
|
printf(" --version show version and build info\n");
|
||||||
printf(" --split split GGUF to multiple GGUF (enabled by default)\n");
|
printf(" --split split GGUF to multiple GGUF (enabled by default)\n");
|
||||||
|
printf(" --tensor-set customize tensor set used to split. File contains modules, e.g. 'ffn_up.weight'");
|
||||||
printf(" --merge merge multiple GGUF to a single GGUF\n");
|
printf(" --merge merge multiple GGUF to a single GGUF\n");
|
||||||
printf(" --split-max-tensors max tensors in each split (default: %d)\n", default_params.n_split_tensors);
|
printf(" --split-max-tensors max tensors in each split (default: %d)\n", default_params.n_split_tensors);
|
||||||
printf(" --split-max-size N(M|G) max size per split\n");
|
printf(" --split-max-size N(M|G) max size per split\n");
|
||||||
|
@ -121,6 +124,16 @@ static void split_params_parse_ex(int argc, const char ** argv, split_params & p
|
||||||
params.operation = SPLIT_OP_SPLIT;
|
params.operation = SPLIT_OP_SPLIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (arg == "--tensor-set") {
|
||||||
|
arg_found = true;
|
||||||
|
if (++arg_idx >= argc) {
|
||||||
|
invalid_param = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
params.tensor_set_file = argv[arg_idx];
|
||||||
|
params.customized_split = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (is_mode_set) {
|
if (is_mode_set) {
|
||||||
throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both");
|
throw std::invalid_argument("error: either --split-max-tensors or --split-max-size can be specified, but not both");
|
||||||
}
|
}
|
||||||
|
@ -180,12 +193,27 @@ static void zeros(std::ofstream & file, size_t n) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::vector<std::string> read_customized_tensors(const std::string & tensor_set_file) {
|
||||||
|
std::vector<std::string> tensor_set;
|
||||||
|
std::ifstream f_tensor_set(tensor_set_file);
|
||||||
|
if (!f_tensor_set.is_open()) {
|
||||||
|
fprintf(stderr, "error: failed to open tensor set file %s\n", tensor_set_file.c_str());
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
std::string line;
|
||||||
|
while (std::getline(f_tensor_set, line)) {
|
||||||
|
tensor_set.push_back(line);
|
||||||
|
}
|
||||||
|
return tensor_set;
|
||||||
|
}
|
||||||
|
|
||||||
struct split_strategy {
|
struct split_strategy {
|
||||||
const split_params params;
|
const split_params params;
|
||||||
std::ifstream & f_input;
|
std::ifstream & f_input;
|
||||||
struct gguf_context * ctx_gguf;
|
struct gguf_context * ctx_gguf;
|
||||||
struct ggml_context * ctx_meta = NULL;
|
struct ggml_context * ctx_meta = NULL;
|
||||||
const int n_tensors;
|
const int n_tensors;
|
||||||
|
std::string tensor_set_file;
|
||||||
|
|
||||||
// one ctx_out per one output file
|
// one ctx_out per one output file
|
||||||
std::vector<struct gguf_context *> ctx_outs;
|
std::vector<struct gguf_context *> ctx_outs;
|
||||||
|
@ -233,20 +261,45 @@ struct split_strategy {
|
||||||
new_ctx_out(true);
|
new_ctx_out(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// process tensors one by one
|
if (!params.customized_split) {
|
||||||
size_t curr_tensors_size = 0; // current size by counting only tensors size (without metadata)
|
// process tensors one by one
|
||||||
for (int i = 0; i < n_tensors; ++i) {
|
size_t curr_tensors_size = 0; // current size by counting only tensors size (without metadata)
|
||||||
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
|
for (int i = 0; i < n_tensors; ++i) {
|
||||||
// calculate the "imaginary" size = the current size + next tensor size
|
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, gguf_get_tensor_name(ctx_gguf, i));
|
||||||
size_t n_bytes = GGML_PAD(ggml_nbytes(t), GGUF_DEFAULT_ALIGNMENT);
|
// calculate the "imaginary" size = the current size + next tensor size
|
||||||
size_t next_tensors_size = curr_tensors_size + n_bytes;
|
size_t n_bytes = GGML_PAD(ggml_nbytes(t), GGUF_DEFAULT_ALIGNMENT);
|
||||||
if (should_split(i, next_tensors_size)) {
|
size_t next_tensors_size = curr_tensors_size + n_bytes;
|
||||||
new_ctx_out(false);
|
if (should_split(i, next_tensors_size)) {
|
||||||
curr_tensors_size = n_bytes;
|
new_ctx_out(false);
|
||||||
} else {
|
curr_tensors_size = n_bytes;
|
||||||
curr_tensors_size = next_tensors_size;
|
} else {
|
||||||
|
curr_tensors_size = next_tensors_size;
|
||||||
|
}
|
||||||
|
gguf_add_tensor(ctx_out, t);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// custom split based on tensor set
|
||||||
|
std::vector<std::string> tensor_set = read_customized_tensors(params.tensor_set_file);
|
||||||
|
if(tensor_set.empty()) {
|
||||||
|
fprintf(stderr, "error: tensor set is empty\n");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < n_tensors; ++i) {
|
||||||
|
const char * t_name = gguf_get_tensor_name(ctx_gguf, i);
|
||||||
|
if (is_tensor_in_customized_set(t_name, tensor_set)) {
|
||||||
|
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
|
||||||
|
gguf_add_tensor(ctx_out, t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
new_ctx_out(false);
|
||||||
|
// add left tensors to the next split
|
||||||
|
for (int i = 0; i < n_tensors; ++i) {
|
||||||
|
const char * t_name = gguf_get_tensor_name(ctx_gguf, i);
|
||||||
|
if (!is_tensor_in_customized_set(t_name, tensor_set)) {
|
||||||
|
struct ggml_tensor * t = ggml_get_tensor(ctx_meta, t_name);
|
||||||
|
gguf_add_tensor(ctx_out, t);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
gguf_add_tensor(ctx_out, t);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// push the last ctx_out
|
// push the last ctx_out
|
||||||
|
@ -274,6 +327,16 @@ struct split_strategy {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool is_tensor_in_customized_set(const char * t_name, std::vector<std::string> tensor_set) {
|
||||||
|
for (auto & s : tensor_set) {
|
||||||
|
if (strstr(t_name, s.c_str()) != NULL) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void print_info() {
|
void print_info() {
|
||||||
printf("n_split: %ld\n", ctx_outs.size());
|
printf("n_split: %ld\n", ctx_outs.size());
|
||||||
int i_split = 0;
|
int i_split = 0;
|
||||||
|
|
|
@ -11,6 +11,11 @@
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#else
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#endif // _WIN32
|
||||||
|
|
||||||
#ifdef LLAMA_SHARED
|
#ifdef LLAMA_SHARED
|
||||||
# if defined(_WIN32) && !defined(__MINGW32__)
|
# if defined(_WIN32) && !defined(__MINGW32__)
|
||||||
# ifdef LLAMA_BUILD
|
# ifdef LLAMA_BUILD
|
||||||
|
|
|
@ -18022,7 +18022,7 @@ bool llama_switch_derived_model(struct llama_context* ctx, const std::string der
|
||||||
|
|
||||||
auto& cparams = ctx->cparams;
|
auto& cparams = ctx->cparams;
|
||||||
cparams.derived_model_name = (ctx->derived_models.find(derived_model_name) == ctx->derived_models.end()) ? BASE_MODEL : derived_model_name;
|
cparams.derived_model_name = (ctx->derived_models.find(derived_model_name) == ctx->derived_models.end()) ? BASE_MODEL : derived_model_name;
|
||||||
LLAMA_LOG_INFO("%s: %s\n", __func__, cparams.derived_model_name);
|
LLAMA_LOG_INFO("%s: %s\n", __func__, cparams.derived_model_name.c_str());
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue