Remove split metadata when quantize model shards

2024-04-11 00:39:15 +08:00 · 2024-04-11 00:39:15 +08:00 · 502d069b14
commit 502d069b14
parent 65c64dc36f
3 changed files with 18 additions and 0 deletions
--- a/ggml.c
+++ b/ggml.c
@ -21148,6 +21148,17 @@ static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
    return n_kv;
 }

+void gguf_remove_key(struct gguf_context * ctx, const char * key) {
+    const int idx = gguf_find_key(ctx, key);
+    if (idx >= 0) {
+        const int n_kv = gguf_get_n_kv(ctx);
+        for (int i = idx; i < n_kv; ++i)
+            ctx->kv[i] = ctx->kv[i+1];
+        ctx->kv = realloc(ctx->kv, (n_kv - 1) * sizeof(struct gguf_kv));
+        ctx->header.n_kv--;
+    }
+}
+
 void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
    const int idx = gguf_get_or_add_key(ctx, key);

--- a/ggml.h
+++ b/ggml.h
@ -2289,6 +2289,9 @@ extern "C" {
    GGML_API char *         gguf_get_tensor_name  (const struct gguf_context * ctx, int i);
    GGML_API enum ggml_type gguf_get_tensor_type  (const struct gguf_context * ctx, int i);

+    // removes key if it exists
+    GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
+
    // overrides existing values or adds a new one
    GGML_API void gguf_set_val_u8  (struct gguf_context * ctx, const char * key, uint8_t  val);
    GGML_API void gguf_set_val_i8  (struct gguf_context * ctx, const char * key, int8_t   val);
--- a/llama.cpp
+++ b/llama.cpp
@ -13514,6 +13514,10 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
    gguf_set_kv     (ctx_out, ml.meta);
    gguf_set_val_u32(ctx_out, "general.quantization_version", GGML_QNT_VERSION);
    gguf_set_val_u32(ctx_out, "general.file_type", ftype);
+    // Remove split metadata
+    gguf_remove_key(ctx_out, "split.no");
+    gguf_remove_key(ctx_out, "split.count");
+    gguf_remove_key(ctx_out, "split.tensors.count");

    if (params->kv_overrides) {
        const std::vector<llama_model_kv_override> & overrides = *(const std::vector<llama_model_kv_override> *)params->kv_overrides;