llama : refactor model loading code (#2620)

* llama : style formatting + remove helper methods

* llama : fix quantization using gguf tool

* llama : simplify gguf_file_saver

* llama : fix method names

* llama : simplify write_header()

* llama : no need to pass full file loader to the file saver

just gguf_ctx

* llama : gguf_file_saver write I32

* llama : refactor tensor names (#2622)

* gguf: update tensor names searched in quantization

* gguf : define tensor names as constants

* gguf : initial write API (not tested yet)

* gguf : write to file API (not tested)

* gguf : initial write API ready + example

* gguf : fix header write

* gguf : fixes + simplify example + add ggml_nbytes_pad()

* gguf : minor

* llama : replace gguf_file_saver with new gguf write API

* gguf : streaming support when writing files

* gguf : remove oboslete write methods

* gguf : remove obosolete gguf_get_arr_xxx API

* llama : simplify gguf_file_loader

* llama : move hparams and vocab from gguf_file_loader to llama_model_loader

* llama : merge gguf-util.h in llama.cpp

* llama : reorder definitions in .cpp to match .h

* llama : minor simplifications

* llama : refactor llama_model_loader (WIP)

wip : remove ggml_ctx from llama_model_loader

wip : merge gguf_file_loader in llama_model_loader

* llama : fix shape prints

* llama : fix Windows build + fix norm_rms_eps key

* llama : throw error on missing KV paris in model meta data

* llama : improve printing + log meta data

* llama : switch print order of meta data

---------

Co-authored-by: M. Yusuf Sarıgöz <yusufsarigoz@gmail.com>
This commit is contained in:
Georgi Gerganov 2023-08-16 14:34:03 +03:00 committed by GitHub
parent ea5615a03a
commit 758ff1bbb5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 1944 additions and 1889 deletions

597
ggml.c
View file

@ -213,10 +213,10 @@ inline static void * ggml_aligned_malloc(size_t size) {
error_desc = "insufficient memory";
break;
}
GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n",
__func__, error_desc, size/(1024.0*1024.0));
GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
return NULL;
}
return aligned_memory;
}
#define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size)
@ -4109,7 +4109,11 @@ size_t ggml_nbytes(const struct ggml_tensor * tensor) {
//
// is enough, but just in case, adding the second part
return GGML_PAD(MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]), GGML_MEM_ALIGN);
return MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]);
}
size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
}
size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
@ -16899,7 +16903,7 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
// compute size of intermediate results
// TODO: does not take into account scratch buffers !!!!
for (int i = 0; i < cgraph->n_nodes; ++i) {
size_eval += ggml_nbytes(cgraph->nodes[i]);
size_eval += ggml_nbytes_pad(cgraph->nodes[i]);
}
// print
@ -18579,6 +18583,19 @@ static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
};
static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_UINT8] = "uint8",
[GGUF_TYPE_INT8] = "int8",
[GGUF_TYPE_UINT16] = "uint16",
[GGUF_TYPE_INT16] = "int16",
[GGUF_TYPE_UINT32] = "uint32",
[GGUF_TYPE_INT32] = "int32",
[GGUF_TYPE_FLOAT32] = "float32",
[GGUF_TYPE_BOOL] = "bool",
[GGUF_TYPE_STRING] = "string",
[GGUF_TYPE_ARRAY] = "array",
};
union gguf_value {
uint8_t uint8;
int8_t int8;
@ -18613,8 +18630,6 @@ struct gguf_header {
uint32_t version;
uint32_t n_tensors;
uint32_t n_kv;
struct gguf_kv * kv;
};
struct gguf_tensor_info {
@ -18622,44 +18637,69 @@ struct gguf_tensor_info {
uint32_t n_dims;
uint32_t ne[GGML_MAX_DIMS];
uint32_t n_elms; // TODO: is this needed?
enum ggml_type type;
uint64_t offset; // offset from start of `data`, must be a multiple of `ALIGNMENT`
// for writing API
const void * data;
size_t size;
};
struct gguf_context {
struct gguf_header header;
struct gguf_header header;
struct gguf_kv * kv;
struct gguf_tensor_info * infos;
size_t alignment;
size_t offset; // offset of `data` from beginning of file
size_t size_data; // size of `data` in bytes
size_t size; // size of `data` in bytes
//uint8_t * padding;
uint8_t * data;
void * data;
};
static bool gguf_fread_el(void * dst, size_t size, FILE * file, size_t * offset) {
static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) {
const size_t n = fread(dst, 1, size, file);
*offset += n;
return n == size;
}
static bool gguf_fread_str(struct gguf_str * p, FILE * file, size_t * offset) {
static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
p->n = 0;
p->data = NULL;
bool ok = true;
// TODO: how to avoid mallocs for strings?
ok = ok && gguf_fread_el(&p->n, sizeof(p->n), file, offset); p->data = calloc(p->n + 1, 1);
ok = ok && gguf_fread_el( p->data, p->n, file, offset);
ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset); p->data = calloc(p->n + 1, 1);
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
return ok;
}
struct gguf_context * gguf_init_empty(void) {
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
ctx->header.magic = GGUF_MAGIC;
ctx->header.version = GGUF_VERSION;
ctx->header.n_tensors = 0;
ctx->header.n_kv = 0;
ctx->kv = NULL;
ctx->infos = NULL;
ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
ctx->offset = 0;
ctx->size = 0;
ctx->data = NULL;
return ctx;
}
struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
FILE * file = fopen(fname, "rb");
if (!file) {
@ -18673,7 +18713,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
// check the magic before making allocations
{
gguf_fread_el(&magic, sizeof(magic), file, &offset);
gguf_fread_el(file, &magic, sizeof(magic), &offset);
if (magic != GGUF_MAGIC) {
fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
@ -18689,14 +18729,14 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
// read the header
{
ctx->header.magic = magic;
ctx->header.kv = NULL;
ctx->kv = NULL;
ctx->infos = NULL;
ctx->data = NULL;
ok = ok && gguf_fread_el(&ctx->header.version, sizeof(ctx->header.version), file, &offset);
ok = ok && gguf_fread_el(&ctx->header.n_tensors, sizeof(ctx->header.n_tensors), file, &offset);
ok = ok && gguf_fread_el(&ctx->header.n_kv, sizeof(ctx->header.n_kv), file, &offset);
ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset);
ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
if (!ok) {
fprintf(stderr, "%s: failed to read header\n", __func__);
@ -18708,33 +18748,33 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
// read the kv pairs
{
ctx->header.kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
ctx->kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
struct gguf_kv * kv = &ctx->header.kv[i];
struct gguf_kv * kv = &ctx->kv[i];
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
ok = ok && gguf_fread_str(&kv->key, file, &offset);
//ok = ok && gguf_fread_el (&kv->n_bytes, sizeof(kv->n_bytes), file, &offset);
ok = ok && gguf_fread_el (&kv->type, sizeof(kv->type), file, &offset);
ok = ok && gguf_fread_str(file, &kv->key, &offset);
//ok = ok && gguf_fread_el (file, &kv->n_bytes, sizeof(kv->n_bytes), &offset);
ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
//fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
switch (kv->type) {
case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (&kv->value.uint8, sizeof(kv->value.uint8), file, &offset); break;
case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (&kv->value.int8, sizeof(kv->value.int8), file, &offset); break;
case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (&kv->value.uint16, sizeof(kv->value.uint16), file, &offset); break;
case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (&kv->value.int16, sizeof(kv->value.int16), file, &offset); break;
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (&kv->value.uint32, sizeof(kv->value.uint32), file, &offset); break;
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (&kv->value.int32, sizeof(kv->value.int32), file, &offset); break;
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (&kv->value.float32, sizeof(kv->value.float32), file, &offset); break;
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (&kv->value.bool_, sizeof(kv->value.bool_), file, &offset); break;
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(&kv->value.str, file, &offset); break;
case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (file, &kv->value.uint8, sizeof(kv->value.uint8), &offset); break;
case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (file, &kv->value.int8, sizeof(kv->value.int8), &offset); break;
case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (file, &kv->value.uint16, sizeof(kv->value.uint16), &offset); break;
case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (file, &kv->value.int16, sizeof(kv->value.int16), &offset); break;
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
case GGUF_TYPE_ARRAY:
{
ok = ok && gguf_fread_el(&kv->value.arr.type, sizeof(kv->value.arr.type), file, &offset);
ok = ok && gguf_fread_el(&kv->value.arr.n, sizeof(kv->value.arr.n), file, &offset);
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
switch (kv->value.arr.type) {
case GGUF_TYPE_UINT8:
@ -18747,17 +18787,17 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
case GGUF_TYPE_BOOL:
{
kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
ok = ok && gguf_fread_el(kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], file, &offset);
ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], &offset);
} break;
case GGUF_TYPE_STRING:
{
kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str));
for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
ok = ok && gguf_fread_str(&((struct gguf_str *) kv->value.arr.data)[j], file, &offset);
ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
}
} break;
case GGUF_TYPE_ARRAY:
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
};
} break;
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
@ -18787,14 +18827,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
info->ne[j] = 1;
}
ok = ok && gguf_fread_str(&info->name, file, &offset);
ok = ok && gguf_fread_el (&info->n_dims, sizeof(info->n_dims), file, &offset);
ok = ok && gguf_fread_str(file, &info->name, &offset);
ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
for (uint32_t j = 0; j < info->n_dims; ++j) {
ok = ok && gguf_fread_el(&info->ne[j], sizeof(info->ne[j]), file, &offset);
ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
}
//ok = ok && gguf_fread_el (&info->n_elms, sizeof(info->n_elms), file, &offset);
ok = ok && gguf_fread_el (&info->type, sizeof(info->type), file, &offset);
ok = ok && gguf_fread_el (&info->offset, sizeof(info->offset), file, &offset);
ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
if (!ok) {
fprintf(stderr, "%s: failed to read tensor info\n", __func__);
@ -18827,8 +18866,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
// compute the total size of the data section, taking into account the alignment
{
ctx->size_data = 0;
ctx->size = 0;
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
struct gguf_tensor_info * info = &ctx->infos[i];
@ -18848,7 +18886,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type);
ctx->size_data += GGML_PAD(size_cur, ctx->alignment);
ctx->size += GGML_PAD(size_cur, ctx->alignment);
}
}
@ -18862,7 +18900,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
const size_t mem_size =
params.no_alloc ?
(ctx->header.n_tensors )*ggml_tensor_overhead() :
(ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size_data;
(ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size;
struct ggml_init_params pdata = {
.mem_size = mem_size,
@ -18877,12 +18915,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
struct ggml_tensor * data = NULL;
if (params.no_alloc == false) {
data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size_data);
data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size);
ok = ok && data != NULL;
// read the binary blob with the tensor data
ok = ok && gguf_fread_el(data->data, ctx->size_data, file, &offset);
ok = ok && gguf_fread_el(file, data->data, ctx->size, &offset);
if (!ok) {
fprintf(stderr, "%s: failed to read tensor data\n", __func__);
@ -18944,10 +18982,10 @@ void gguf_free(struct gguf_context * ctx) {
return;
}
if (ctx->header.kv) {
if (ctx->kv) {
// free string memory - not great..
for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
struct gguf_kv * kv = &ctx->header.kv[i];
struct gguf_kv * kv = &ctx->kv[i];
if (kv->key.data) {
free(kv->key.data);
@ -18974,7 +19012,7 @@ void gguf_free(struct gguf_context * ctx) {
}
}
GGML_ALIGNED_FREE(ctx->header.kv);
GGML_ALIGNED_FREE(ctx->kv);
}
if (ctx->infos) {
@ -18992,6 +19030,10 @@ void gguf_free(struct gguf_context * ctx) {
GGML_ALIGNED_FREE(ctx);
}
const char * gguf_type_name(enum gguf_type type) {
return GGUF_TYPE_NAME[type];
}
int gguf_get_version(struct gguf_context * ctx) {
return ctx->header.version;
}
@ -19014,8 +19056,9 @@ int gguf_get_n_kv(struct gguf_context * ctx) {
int gguf_find_key(struct gguf_context * ctx, const char * key) {
// return -1 if key not found
int keyfound = -1;
const int n_kv = gguf_get_n_kv(ctx);
int keyfound = -1;
for (int i = 0; i < n_kv; ++i) {
if (strcmp(key, gguf_get_key(ctx, i)) == 0) {
@ -19028,71 +19071,87 @@ int gguf_find_key(struct gguf_context * ctx, const char * key) {
}
const char * gguf_get_key(struct gguf_context * ctx, int i) {
return ctx->header.kv[i].key.data;
return ctx->kv[i].key.data;
}
enum gguf_type gguf_get_kv_type(struct gguf_context * ctx, int i) {
return ctx->header.kv[i].type;
return ctx->kv[i].type;
}
enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i) {
return ctx->header.kv[i].value.arr.type;
return ctx->kv[i].value.arr.type;
}
const void * gguf_get_arr_data(struct gguf_context * ctx, int i) {
return ctx->kv[i].value.arr.data;
}
const char * gguf_get_arr_str(struct gguf_context * ctx, int key_id, int i) {
struct gguf_kv * kv = &ctx->header.kv[key_id];
struct gguf_kv * kv = &ctx->kv[key_id];
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
return str->data;
}
float gguf_get_arr_f32(struct gguf_context * ctx, int key_id, int i) {
return ((float *) ctx->header.kv[key_id].value.arr.data)[i];
}
int gguf_get_arr_n(struct gguf_context * ctx, int i) {
return ctx->header.kv[i].value.arr.n;
return ctx->kv[i].value.arr.n;
}
uint8_t gguf_get_val_u8(struct gguf_context * ctx, int i) {
return ctx->header.kv[i].value.uint8;
return ctx->kv[i].value.uint8;
}
int8_t gguf_get_val_i8(struct gguf_context * ctx, int i) {
return ctx->header.kv[i].value.int8;
return ctx->kv[i].value.int8;
}
uint16_t gguf_get_val_u16(struct gguf_context * ctx, int i) {
return ctx->header.kv[i].value.uint16;
return ctx->kv[i].value.uint16;
}
int16_t gguf_get_val_i16(struct gguf_context * ctx, int i) {
return ctx->header.kv[i].value.int16;
return ctx->kv[i].value.int16;
}
uint32_t gguf_get_val_u32(struct gguf_context * ctx, int i) {
return ctx->header.kv[i].value.uint32;
return ctx->kv[i].value.uint32;
}
int32_t gguf_get_val_i32(struct gguf_context * ctx, int i) {
return ctx->header.kv[i].value.int32;
return ctx->kv[i].value.int32;
}
float gguf_get_val_f32(struct gguf_context * ctx, int i) {
return ctx->header.kv[i].value.float32;
return ctx->kv[i].value.float32;
}
bool gguf_get_val_bool(struct gguf_context * ctx, int i) {
return ctx->header.kv[i].value.bool_;
return ctx->kv[i].value.bool_;
}
const char * gguf_get_val_str (struct gguf_context * ctx, int i) {
return ctx->header.kv[i].value.str.data;
return ctx->kv[i].value.str.data;
}
int gguf_get_n_tensors(struct gguf_context * ctx) {
return ctx->header.n_tensors;
}
int gguf_find_tensor(struct gguf_context * ctx, const char * name) {
// return -1 if tensor not found
int tensorfound = -1;
const int n_tensors = gguf_get_n_tensors(ctx);
for (int i = 0; i < n_tensors; ++i) {
if (strcmp(name, gguf_get_tensor_name(ctx, i)) == 0) {
tensorfound = i;
break;
}
}
return tensorfound;
}
size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i) {
return ctx->infos[i].offset;
}
@ -19101,6 +19160,400 @@ char * gguf_get_tensor_name(struct gguf_context * ctx, int i) {
return ctx->infos[i].name.data;
}
// returns the index
static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
const int idx = gguf_find_key(ctx, key);
if (idx >= 0) {
return idx;
}
const int n_kv = gguf_get_n_kv(ctx);
ctx->kv = realloc(ctx->kv, (n_kv + 1) * sizeof(struct gguf_kv));
ctx->kv[n_kv].key.n = strlen(key) + 1;
ctx->kv[n_kv].key.data = strdup(key);
ctx->header.n_kv++;
return n_kv;
}
void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_UINT8;
ctx->kv[idx].value.uint8 = val;
}
void gguf_set_val_i8(struct gguf_context * ctx, const char * key, int8_t val) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_INT8;
ctx->kv[idx].value.int8 = val;
}
void gguf_set_val_u16(struct gguf_context * ctx, const char * key, uint16_t val) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_UINT16;
ctx->kv[idx].value.uint16 = val;
}
void gguf_set_val_i16(struct gguf_context * ctx, const char * key, int16_t val) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_INT16;
ctx->kv[idx].value.int16 = val;
}
void gguf_set_val_u32(struct gguf_context * ctx, const char * key, uint32_t val) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_UINT32;
ctx->kv[idx].value.uint32 = val;
}
void gguf_set_val_i32(struct gguf_context * ctx, const char * key, int32_t val) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_INT32;
ctx->kv[idx].value.int32 = val;
}
void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_FLOAT32;
ctx->kv[idx].value.float32 = val;
}
void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_BOOL;
ctx->kv[idx].value.bool_ = val;
}
void gguf_set_val_str(struct gguf_context * ctx, const char * key, const char * val) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_STRING;
ctx->kv[idx].value.str.n = strlen(val) + 1;
ctx->kv[idx].value.str.data = strdup(val);
}
void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
ctx->kv[idx].value.arr.type = type;
ctx->kv[idx].value.arr.n = n;
ctx->kv[idx].value.arr.data = malloc(n*GGUF_TYPE_SIZE[type]);
memcpy(ctx->kv[idx].value.arr.data, data, n*GGUF_TYPE_SIZE[type]);
}
void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, int n) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING;
ctx->kv[idx].value.arr.n = n;
ctx->kv[idx].value.arr.data = malloc(n*sizeof(struct gguf_str));
for (int i = 0; i < n; i++) {
struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
str->n = strlen(data[i]) + 1;
str->data = strdup(data[i]);
}
}
// set or add KV pairs from another context
void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
for (uint32_t i = 0; i < src->header.n_kv; i++) {
switch (src->kv[i].type) {
case GGUF_TYPE_UINT8: gguf_set_val_u8 (ctx, src->kv[i].key.data, src->kv[i].value.uint8); break;
case GGUF_TYPE_INT8: gguf_set_val_i8 (ctx, src->kv[i].key.data, src->kv[i].value.int8); break;
case GGUF_TYPE_UINT16: gguf_set_val_u16 (ctx, src->kv[i].key.data, src->kv[i].value.uint16); break;
case GGUF_TYPE_INT16: gguf_set_val_i16 (ctx, src->kv[i].key.data, src->kv[i].value.int16); break;
case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break;
case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break;
case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break;
case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break;
case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
case GGUF_TYPE_ARRAY:
{
if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) {
const char ** data = malloc(src->kv[i].value.arr.n*sizeof(char *));
for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
}
gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
free(data);
} if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
GGML_ASSERT(false && "nested arrays not supported");
} else {
gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
}
} break;
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
}
}
}
void gguf_add_tensor(
struct gguf_context * ctx,
const struct ggml_tensor * tensor) {
const int idx = ctx->header.n_tensors;
ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info));
ctx->infos[idx].name.n = strlen(tensor->name) + 1;
ctx->infos[idx].name.data = strdup(tensor->name);
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
ctx->infos[idx].ne[i] = 1;
}
ctx->infos[idx].n_dims = tensor->n_dims;
for (int i = 0; i < tensor->n_dims; i++) {
ctx->infos[idx].ne[i] = tensor->ne[i];
}
ctx->infos[idx].type = tensor->type;
ctx->infos[idx].offset = 0;
ctx->infos[idx].data = tensor->data;
ctx->infos[idx].size = ggml_nbytes(tensor);
if (ctx->header.n_tensors > 0) {
ctx->infos[idx].offset = ctx->infos[idx - 1].offset + GGML_PAD(ctx->infos[idx - 1].size, ctx->alignment);
}
ctx->header.n_tensors++;
}
void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type) {
const int idx = gguf_find_tensor(ctx, name);
if (idx < 0) {
GGML_ASSERT(false && "tensor not found");
}
ctx->infos[idx].type = type;
}
void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size) {
const int idx = gguf_find_tensor(ctx, name);
if (idx < 0) {
GGML_ASSERT(false && "tensor not found");
}
ctx->infos[idx].data = data;
ctx->infos[idx].size = size;
// update offsets
for (uint32_t i = idx + 1; i < ctx->header.n_tensors; ++i) {
ctx->infos[i].offset = ctx->infos[i - 1].offset + GGML_PAD(ctx->infos[i - 1].size, ctx->alignment);
}
}
//static void gguf_fwrite_str(FILE * file, const struct gguf_str * val) {
// fwrite(&val->n, sizeof(val->n), 1, file);
// fwrite(val->data, sizeof(char), val->n, file);
//}
//
//static void gguf_fwrite_el(FILE * file, const void * val, size_t size) {
// fwrite(val, sizeof(char), size, file);
//}
struct gguf_buf {
void * data;
size_t size;
size_t offset;
};
static struct gguf_buf gguf_buf_init(size_t size) {
struct gguf_buf buf = {
/*buf.data =*/ size == 0 ? NULL : malloc(size),
/*buf.size =*/ size,
/*buf.offset =*/ 0,
};
return buf;
}
static void gguf_buf_free(struct gguf_buf buf) {
if (buf.data) {
free(buf.data);
}
}
static void gguf_buf_grow(struct gguf_buf * buf, size_t size) {
if (buf->offset + size > buf->size) {
buf->size = 1.5*(buf->offset + size);
if (buf->data) {
buf->data = realloc(buf->data, buf->size);
}
}
}
static void gguf_bwrite_str(struct gguf_buf * buf, const struct gguf_str * val) {
gguf_buf_grow(buf, sizeof(val->n) + val->n);
buf->data && memcpy((char *) buf->data + buf->offset, &val->n, sizeof(val->n));
buf->offset += sizeof(val->n);
buf->data && memcpy((char *) buf->data + buf->offset, val->data, val->n);
buf->offset += val->n;
}
static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_size) {
gguf_buf_grow(buf, el_size);
buf->data && memcpy((char *) buf->data + buf->offset, val, el_size);
buf->offset += el_size;
}
static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
// write header
gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic));
gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version));
gguf_bwrite_el(buf, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors));
gguf_bwrite_el(buf, &ctx->header.n_kv, sizeof(ctx->header.n_kv));
// write key-value pairs
for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
struct gguf_kv * kv = &ctx->kv[i];
gguf_bwrite_str(buf, &kv->key);
gguf_bwrite_el (buf, &kv->type, sizeof(kv->type));
switch (kv->type) {
case GGUF_TYPE_UINT8: gguf_bwrite_el( buf, &kv->value.uint8, sizeof(kv->value.uint8) ); break;
case GGUF_TYPE_INT8: gguf_bwrite_el (buf, &kv->value.int8, sizeof(kv->value.int8) ); break;
case GGUF_TYPE_UINT16: gguf_bwrite_el (buf, &kv->value.uint16, sizeof(kv->value.uint16) ); break;
case GGUF_TYPE_INT16: gguf_bwrite_el (buf, &kv->value.int16, sizeof(kv->value.int16) ); break;
case GGUF_TYPE_UINT32: gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break;
case GGUF_TYPE_INT32: gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break;
case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break;
case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break;
case GGUF_TYPE_ARRAY:
{
gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type));
gguf_bwrite_el(buf, &kv->value.arr.n, sizeof(kv->value.arr.n) );
switch (kv->value.arr.type) {
case GGUF_TYPE_UINT8:
case GGUF_TYPE_INT8:
case GGUF_TYPE_UINT16:
case GGUF_TYPE_INT16:
case GGUF_TYPE_UINT32:
case GGUF_TYPE_INT32:
case GGUF_TYPE_FLOAT32:
case GGUF_TYPE_BOOL:
{
gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
} break;
case GGUF_TYPE_STRING:
{
for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
gguf_bwrite_str(buf, &((struct gguf_str *) kv->value.arr.data)[j]);
}
} break;
case GGUF_TYPE_ARRAY:
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
};
} break;
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
};
}
// write tensor infos
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
struct gguf_tensor_info * info = &ctx->infos[i];
gguf_bwrite_str(buf, &info->name);
gguf_bwrite_el (buf, &info->n_dims, sizeof(info->n_dims));
for (uint32_t j = 0; j < info->n_dims; ++j) {
gguf_bwrite_el(buf, &info->ne[j], sizeof(info->ne[j]));
}
gguf_bwrite_el(buf, &info->type, sizeof(info->type));
gguf_bwrite_el(buf, &info->offset, sizeof(info->offset));
}
// we require the data section to be aligned, so take into account any padding
{
const size_t offset = buf->offset;
const size_t offset_pad = GGML_PAD(offset, ctx->alignment);
if (offset_pad != offset) {
uint8_t pad = 0;
for (size_t i = 0; i < offset_pad - offset; ++i) {
gguf_bwrite_el(buf, &pad, sizeof(pad));
}
}
}
if (only_meta) {
return;
}
size_t offset = 0;
// write tensor data
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
struct gguf_tensor_info * info = &ctx->infos[i];
const size_t size = info->size;
const size_t size_pad = GGML_PAD(size, ctx->alignment);
gguf_bwrite_el(buf, info->data, size);
if (size_pad != size) {
uint8_t pad = 0;
for (size_t j = 0; j < size_pad - size; ++j) {
gguf_bwrite_el(buf, &pad, sizeof(pad));
}
}
GGML_ASSERT(offset == info->offset);
offset += size_pad;
}
}
void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta) {
FILE * file = fopen(fname, "wb");
if (!file) {
GGML_ASSERT(false && "failed to open file for writing");
}
struct gguf_buf buf = gguf_buf_init(16*1024);
gguf_write_to_buf(ctx, &buf, only_meta);
fwrite(buf.data, 1, buf.offset, file);
gguf_buf_free(buf);
fclose(file);
}
size_t gguf_get_meta_size(struct gguf_context * ctx) {
// no allocs - only compute size
struct gguf_buf buf = gguf_buf_init(0);
gguf_write_to_buf(ctx, &buf, true);
return buf.offset;
}
void gguf_get_meta_data(struct gguf_context * ctx, void * data) {
struct gguf_buf buf = gguf_buf_init(16*1024);
gguf_write_to_buf(ctx, &buf, true);
memcpy(data, buf.data, buf.offset);
gguf_buf_free(buf);
}
////////////////////////////////////////////////////////////////////////////////
int ggml_cpu_has_avx(void) {