llama : add gguf_remove_key + remove split meta during quantize (#6591)
* Remove split metadata when quantize model shards * Find metadata key by enum * Correct loop range for gguf_remove_key and code format * Free kv memory --------- Co-authored-by: z5269887 <z5269887@unsw.edu.au>
This commit is contained in:
parent
5c4d767ac0
commit
91c736015b
3 changed files with 47 additions and 25 deletions
65
ggml.c
65
ggml.c
|
@ -20550,6 +20550,32 @@ static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
|
|||
return ok;
|
||||
}
|
||||
|
||||
static void gguf_free_kv(struct gguf_kv * kv) {
|
||||
if (kv->key.data) {
|
||||
GGML_FREE(kv->key.data);
|
||||
}
|
||||
|
||||
if (kv->type == GGUF_TYPE_STRING) {
|
||||
if (kv->value.str.data) {
|
||||
GGML_FREE(kv->value.str.data);
|
||||
}
|
||||
}
|
||||
|
||||
if (kv->type == GGUF_TYPE_ARRAY) {
|
||||
if (kv->value.arr.data) {
|
||||
if (kv->value.arr.type == GGUF_TYPE_STRING) {
|
||||
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
||||
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
|
||||
if (str->data) {
|
||||
GGML_FREE(str->data);
|
||||
}
|
||||
}
|
||||
}
|
||||
GGML_FREE(kv->value.arr.data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct gguf_context * gguf_init_empty(void) {
|
||||
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
|
||||
|
||||
|
@ -20899,31 +20925,7 @@ void gguf_free(struct gguf_context * ctx) {
|
|||
if (ctx->kv) {
|
||||
// free string memory - not great..
|
||||
for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
|
||||
struct gguf_kv * kv = &ctx->kv[i];
|
||||
|
||||
if (kv->key.data) {
|
||||
GGML_FREE(kv->key.data);
|
||||
}
|
||||
|
||||
if (kv->type == GGUF_TYPE_STRING) {
|
||||
if (kv->value.str.data) {
|
||||
GGML_FREE(kv->value.str.data);
|
||||
}
|
||||
}
|
||||
|
||||
if (kv->type == GGUF_TYPE_ARRAY) {
|
||||
if (kv->value.arr.data) {
|
||||
if (kv->value.arr.type == GGUF_TYPE_STRING) {
|
||||
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
||||
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
|
||||
if (str->data) {
|
||||
GGML_FREE(str->data);
|
||||
}
|
||||
}
|
||||
}
|
||||
GGML_FREE(kv->value.arr.data);
|
||||
}
|
||||
}
|
||||
gguf_free_kv(&ctx->kv[i]);
|
||||
}
|
||||
|
||||
GGML_FREE(ctx->kv);
|
||||
|
@ -21148,6 +21150,19 @@ static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
|
|||
return n_kv;
|
||||
}
|
||||
|
||||
void gguf_remove_key(struct gguf_context * ctx, const char * key) {
|
||||
const int idx = gguf_find_key(ctx, key);
|
||||
if (idx >= 0) {
|
||||
const int n_kv = gguf_get_n_kv(ctx);
|
||||
gguf_free_kv(&ctx->kv[idx]);
|
||||
for (int i = idx; i < n_kv-1; ++i) {
|
||||
ctx->kv[i] = ctx->kv[i+1];
|
||||
}
|
||||
ctx->kv = realloc(ctx->kv, (n_kv - 1) * sizeof(struct gguf_kv));
|
||||
ctx->header.n_kv--;
|
||||
}
|
||||
}
|
||||
|
||||
void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
|
||||
const int idx = gguf_get_or_add_key(ctx, key);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue