quantize : be able to override metadata by key (#6321)
* quantize: be able to override metadata by key * minor : spacing --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
deb7240100
commit
d25b1c31b0
3 changed files with 96 additions and 27 deletions
38
llama.cpp
38
llama.cpp
|
@ -12776,7 +12776,12 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||
constexpr bool use_mmap = false;
|
||||
#endif
|
||||
|
||||
llama_model_loader ml(fname_inp, use_mmap, NULL);
|
||||
llama_model_kv_override * kv_overrides = nullptr;
|
||||
if (params->kv_overrides) {
|
||||
auto v = (std::vector<llama_model_kv_override>*)params->kv_overrides;
|
||||
kv_overrides = v->data();
|
||||
}
|
||||
llama_model_loader ml(fname_inp, use_mmap, kv_overrides);
|
||||
ml.init_mappings(false); // no prefetching?
|
||||
|
||||
llama_model model;
|
||||
|
@ -12805,6 +12810,22 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||
gguf_set_val_u32(ctx_out, "general.quantization_version", GGML_QNT_VERSION);
|
||||
gguf_set_val_u32(ctx_out, "general.file_type", ftype);
|
||||
|
||||
if (params->kv_overrides) {
|
||||
const std::vector<llama_model_kv_override> & overrides = *(const std::vector<llama_model_kv_override> *)params->kv_overrides;
|
||||
for (auto & o : overrides) {
|
||||
if (o.key[0] == 0) break;
|
||||
if (o.tag == LLAMA_KV_OVERRIDE_TYPE_FLOAT) {
|
||||
gguf_set_val_f32(ctx_out, o.key, o.float_value);
|
||||
} else if (o.tag == LLAMA_KV_OVERRIDE_TYPE_INT) {
|
||||
gguf_set_val_i32(ctx_out, o.key, o.int_value);
|
||||
} else if (o.tag == LLAMA_KV_OVERRIDE_TYPE_BOOL) {
|
||||
gguf_set_val_bool(ctx_out, o.key, o.bool_value);
|
||||
} else {
|
||||
LLAMA_LOG_WARN("%s: unknown KV override type for key %s\n", __func__, o.key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < ml.n_tensors; ++i) {
|
||||
const struct ggml_tensor * meta = ml.get_tensor_meta(i);
|
||||
|
||||
|
@ -12813,21 +12834,17 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||
// TODO: avoid hardcoded tensor names - use the TN_* constants
|
||||
if (name.find("attn_v.weight") != std::string::npos || name.find("attn_qkv.weight") != std::string::npos) {
|
||||
++qs.n_attention_wv;
|
||||
}
|
||||
else if (name.find("ffn_down") != std::string::npos) {
|
||||
} else if (name.find("ffn_down") != std::string::npos) {
|
||||
++qs.n_ffn_down;
|
||||
}
|
||||
else if (name.find("ffn_gate") != std::string::npos) {
|
||||
} else if (name.find("ffn_gate") != std::string::npos) {
|
||||
++qs.n_ffn_gate;
|
||||
}
|
||||
else if (name.find("ffn_up") != std::string::npos) {
|
||||
} else if (name.find("ffn_up") != std::string::npos) {
|
||||
++qs.n_ffn_up;
|
||||
}
|
||||
else if (name == LLM_TN(model.arch)(LLM_TENSOR_OUTPUT, "weight")) {
|
||||
} else if (name == LLM_TN(model.arch)(LLM_TENSOR_OUTPUT, "weight")) {
|
||||
qs.has_output = true;
|
||||
}
|
||||
}
|
||||
if (qs.n_attention_wv != qs.n_ffn_down || (uint32_t)qs.n_attention_wv != model.hparams.n_layer) {
|
||||
if (qs.n_attention_wv != qs.n_ffn_down || (uint32_t) qs.n_attention_wv != model.hparams.n_layer) {
|
||||
LLAMA_LOG_WARN("%s ============ Strange model: n_attention_wv = %d, n_ffn_down = %d, hparams.n_layer = %d\n",
|
||||
__func__, qs.n_attention_wv, qs.n_ffn_down, model.hparams.n_layer);
|
||||
}
|
||||
|
@ -13363,6 +13380,7 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
|
|||
/*.only_copy =*/ false,
|
||||
/*.pure =*/ false,
|
||||
/*.imatrix =*/ nullptr,
|
||||
/*.kv_overrides =*/ nullptr,
|
||||
};
|
||||
|
||||
return result;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue