llama : add gguf_remove_key + remove split meta during quantize (#6591)

* Remove split metadata when quantize model shards

* Find metadata key by enum

* Correct loop range for gguf_remove_key and code format

* Free kv memory

---------

Co-authored-by: z5269887 <z5269887@unsw.edu.au>
This commit is contained in:
jiez 2024-04-12 18:45:06 +08:00 committed by GitHub
parent 5c4d767ac0
commit 91c736015b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 47 additions and 25 deletions

View file

@ -13535,6 +13535,10 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
gguf_set_kv (ctx_out, ml.meta);
gguf_set_val_u32(ctx_out, "general.quantization_version", GGML_QNT_VERSION);
gguf_set_val_u32(ctx_out, "general.file_type", ftype);
// Remove split metadata
gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_NO).c_str());
gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_COUNT).c_str());
gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_TENSORS_COUNT).c_str());
if (params->kv_overrides) {
const std::vector<llama_model_kv_override> & overrides = *(const std::vector<llama_model_kv_override> *)params->kv_overrides;