diff --git a/llama.cpp b/llama.cpp index f4d290d47..720786206 100644 --- a/llama.cpp +++ b/llama.cpp @@ -13515,9 +13515,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s gguf_set_val_u32(ctx_out, "general.quantization_version", GGML_QNT_VERSION); gguf_set_val_u32(ctx_out, "general.file_type", ftype); // Remove split metadata - gguf_remove_key(ctx_out, "split.no"); - gguf_remove_key(ctx_out, "split.count"); - gguf_remove_key(ctx_out, "split.tensors.count"); + gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_NO).c_str()); + gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_COUNT).c_str()); + gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_TENSORS_COUNT).c_str()); if (params->kv_overrides) { const std::vector & overrides = *(const std::vector *)params->kv_overrides;