diff --git a/ggml.c b/ggml.c
index 793b67f4c..15d822789 100644
--- a/ggml.c
+++ b/ggml.c
@@ -21148,6 +21148,17 @@ static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
     return n_kv;
 }
 
+void gguf_remove_key(struct gguf_context * ctx, const char * key) {
+    const int idx = gguf_find_key(ctx, key);
+    if (idx >= 0) {
+        const int n_kv = gguf_get_n_kv(ctx);
+        for (int i = idx; i < n_kv; ++i)
+            ctx->kv[i] = ctx->kv[i+1];
+        ctx->kv = realloc(ctx->kv, (n_kv - 1) * sizeof(struct gguf_kv));
+        ctx->header.n_kv--;
+    }
+}
+
 void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
     const int idx = gguf_get_or_add_key(ctx, key);
 
diff --git a/ggml.h b/ggml.h
index abe3767f2..e9ed8eeee 100644
--- a/ggml.h
+++ b/ggml.h
@@ -2289,6 +2289,9 @@ extern "C" {
     GGML_API char *         gguf_get_tensor_name  (const struct gguf_context * ctx, int i);
     GGML_API enum ggml_type gguf_get_tensor_type  (const struct gguf_context * ctx, int i);
 
+    // removes key if it exists
+    GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
+
     // overrides existing values or adds a new one
     GGML_API void gguf_set_val_u8  (struct gguf_context * ctx, const char * key, uint8_t  val);
     GGML_API void gguf_set_val_i8  (struct gguf_context * ctx, const char * key, int8_t   val);
diff --git a/llama.cpp b/llama.cpp
index 8dbf47486..f4d290d47 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -13514,6 +13514,10 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
     gguf_set_kv     (ctx_out, ml.meta);
     gguf_set_val_u32(ctx_out, "general.quantization_version", GGML_QNT_VERSION);
     gguf_set_val_u32(ctx_out, "general.file_type", ftype);
+    // Remove split metadata
+    gguf_remove_key(ctx_out, "split.no");
+    gguf_remove_key(ctx_out, "split.count");
+    gguf_remove_key(ctx_out, "split.tensors.count");
 
     if (params->kv_overrides) {
         const std::vector<llama_model_kv_override> & overrides = *(const std::vector<llama_model_kv_override> *)params->kv_overrides;