Remove split metadata when quantize model shards
This commit is contained in:
parent
65c64dc36f
commit
502d069b14
3 changed files with 18 additions and 0 deletions
11
ggml.c
11
ggml.c
|
@ -21148,6 +21148,17 @@ static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
|
|||
return n_kv;
|
||||
}
|
||||
|
||||
void gguf_remove_key(struct gguf_context * ctx, const char * key) {
|
||||
const int idx = gguf_find_key(ctx, key);
|
||||
if (idx >= 0) {
|
||||
const int n_kv = gguf_get_n_kv(ctx);
|
||||
for (int i = idx; i < n_kv; ++i)
|
||||
ctx->kv[i] = ctx->kv[i+1];
|
||||
ctx->kv = realloc(ctx->kv, (n_kv - 1) * sizeof(struct gguf_kv));
|
||||
ctx->header.n_kv--;
|
||||
}
|
||||
}
|
||||
|
||||
void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
|
||||
const int idx = gguf_get_or_add_key(ctx, key);
|
||||
|
||||
|
|
3
ggml.h
3
ggml.h
|
@ -2289,6 +2289,9 @@ extern "C" {
|
|||
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
|
||||
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
|
||||
|
||||
// removes key if it exists
|
||||
GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
|
||||
|
||||
// overrides existing values or adds a new one
|
||||
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
|
||||
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
|
||||
|
|
|
@ -13514,6 +13514,10 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||
gguf_set_kv (ctx_out, ml.meta);
|
||||
gguf_set_val_u32(ctx_out, "general.quantization_version", GGML_QNT_VERSION);
|
||||
gguf_set_val_u32(ctx_out, "general.file_type", ftype);
|
||||
// Remove split metadata
|
||||
gguf_remove_key(ctx_out, "split.no");
|
||||
gguf_remove_key(ctx_out, "split.count");
|
||||
gguf_remove_key(ctx_out, "split.tensors.count");
|
||||
|
||||
if (params->kv_overrides) {
|
||||
const std::vector<llama_model_kv_override> & overrides = *(const std::vector<llama_model_kv_override> *)params->kv_overrides;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue