gguf : streaming support when writing files
This commit is contained in:
parent
4ef5e792e3
commit
f7a6aa9911
4 changed files with 233 additions and 70 deletions
|
@ -72,7 +72,7 @@ bool gguf_ex_write(const std::string & fname) {
|
||||||
gguf_add_tensor(ctx, cur);
|
gguf_add_tensor(ctx, cur);
|
||||||
}
|
}
|
||||||
|
|
||||||
gguf_write_to_file(ctx, fname.c_str());
|
gguf_write_to_file(ctx, fname.c_str(), false);
|
||||||
|
|
||||||
fprintf(stdout, "%s: wrote file '%s;\n", __func__, fname.c_str());
|
fprintf(stdout, "%s: wrote file '%s;\n", __func__, fname.c_str());
|
||||||
|
|
||||||
|
|
208
ggml.c
208
ggml.c
|
@ -19123,6 +19123,22 @@ int gguf_get_n_tensors(struct gguf_context * ctx) {
|
||||||
return ctx->header.n_tensors;
|
return ctx->header.n_tensors;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int gguf_find_tensor(struct gguf_context * ctx, const char * name) {
|
||||||
|
// return -1 if tensor not found
|
||||||
|
int tensorfound = -1;
|
||||||
|
|
||||||
|
const int n_tensors = gguf_get_n_tensors(ctx);
|
||||||
|
|
||||||
|
for (int i = 0; i < n_tensors; ++i) {
|
||||||
|
if (strcmp(name, gguf_get_tensor_name(ctx, i)) == 0) {
|
||||||
|
tensorfound = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tensorfound;
|
||||||
|
}
|
||||||
|
|
||||||
size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i) {
|
size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i) {
|
||||||
return ctx->infos[i].offset;
|
return ctx->infos[i].offset;
|
||||||
}
|
}
|
||||||
|
@ -19269,12 +19285,9 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void gguf_add_tensor_ex(
|
void gguf_add_tensor(
|
||||||
struct gguf_context * ctx,
|
struct gguf_context * ctx,
|
||||||
const struct ggml_tensor * tensor,
|
const struct ggml_tensor * tensor) {
|
||||||
enum ggml_type type,
|
|
||||||
const void * data,
|
|
||||||
size_t size) {
|
|
||||||
const int idx = ctx->header.n_tensors;
|
const int idx = ctx->header.n_tensors;
|
||||||
ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info));
|
ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info));
|
||||||
|
|
||||||
|
@ -19290,10 +19303,10 @@ void gguf_add_tensor_ex(
|
||||||
ctx->infos[idx].ne[i] = tensor->ne[i];
|
ctx->infos[idx].ne[i] = tensor->ne[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx->infos[idx].type = type;
|
ctx->infos[idx].type = tensor->type;
|
||||||
ctx->infos[idx].offset = 0;
|
ctx->infos[idx].offset = 0;
|
||||||
ctx->infos[idx].data = data;
|
ctx->infos[idx].data = tensor->data;
|
||||||
ctx->infos[idx].size = size;
|
ctx->infos[idx].size = ggml_nbytes(tensor);
|
||||||
|
|
||||||
if (ctx->header.n_tensors > 0) {
|
if (ctx->header.n_tensors > 0) {
|
||||||
ctx->infos[idx].offset = ctx->infos[idx - 1].offset + GGML_PAD(ctx->infos[idx - 1].size, ctx->alignment);
|
ctx->infos[idx].offset = ctx->infos[idx - 1].offset + GGML_PAD(ctx->infos[idx - 1].size, ctx->alignment);
|
||||||
|
@ -19302,52 +19315,115 @@ void gguf_add_tensor_ex(
|
||||||
ctx->header.n_tensors++;
|
ctx->header.n_tensors++;
|
||||||
}
|
}
|
||||||
|
|
||||||
void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor) {
|
void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type) {
|
||||||
gguf_add_tensor_ex(ctx, tensor, tensor->type, tensor->data, ggml_nbytes(tensor));
|
const int idx = gguf_find_tensor(ctx, name);
|
||||||
|
if (idx < 0) {
|
||||||
|
GGML_ASSERT(false && "tensor not found");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gguf_fwrite_str(FILE * file, const struct gguf_str * val) {
|
ctx->infos[idx].type = type;
|
||||||
fwrite(&val->n, sizeof(val->n), 1, file);
|
|
||||||
fwrite(val->data, sizeof(char), val->n, file);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gguf_fwrite_el(FILE * file, const void * val, size_t size) {
|
void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size) {
|
||||||
fwrite(val, sizeof(char), size, file);
|
const int idx = gguf_find_tensor(ctx, name);
|
||||||
|
if (idx < 0) {
|
||||||
|
GGML_ASSERT(false && "tensor not found");
|
||||||
}
|
}
|
||||||
|
|
||||||
void gguf_write_to_file(struct gguf_context * ctx, const char * fname) {
|
ctx->infos[idx].data = data;
|
||||||
FILE * file = fopen(fname, "wb");
|
ctx->infos[idx].size = size;
|
||||||
if (!file) {
|
|
||||||
GGML_ASSERT(false && "failed to open file for writing");
|
// update offsets
|
||||||
|
for (uint32_t i = idx + 1; i < ctx->header.n_tensors; ++i) {
|
||||||
|
ctx->infos[i].offset = ctx->infos[i - 1].offset + GGML_PAD(ctx->infos[i - 1].size, ctx->alignment);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//static void gguf_fwrite_str(FILE * file, const struct gguf_str * val) {
|
||||||
|
// fwrite(&val->n, sizeof(val->n), 1, file);
|
||||||
|
// fwrite(val->data, sizeof(char), val->n, file);
|
||||||
|
//}
|
||||||
|
//
|
||||||
|
//static void gguf_fwrite_el(FILE * file, const void * val, size_t size) {
|
||||||
|
// fwrite(val, sizeof(char), size, file);
|
||||||
|
//}
|
||||||
|
|
||||||
|
struct gguf_buf {
|
||||||
|
void * data;
|
||||||
|
size_t size;
|
||||||
|
size_t offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct gguf_buf gguf_buf_init(size_t size) {
|
||||||
|
struct gguf_buf buf = {
|
||||||
|
/*buf.data =*/ size == 0 ? NULL : malloc(size),
|
||||||
|
/*buf.size =*/ size,
|
||||||
|
/*buf.offset =*/ 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gguf_buf_free(struct gguf_buf buf) {
|
||||||
|
if (buf.data) {
|
||||||
|
free(buf.data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gguf_buf_grow(struct gguf_buf * buf, size_t size) {
|
||||||
|
if (buf->offset + size > buf->size) {
|
||||||
|
buf->size = 1.5*(buf->offset + size);
|
||||||
|
if (buf->data) {
|
||||||
|
buf->data = realloc(buf->data, buf->size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gguf_bwrite_str(struct gguf_buf * buf, const struct gguf_str * val) {
|
||||||
|
gguf_buf_grow(buf, sizeof(val->n) + val->n);
|
||||||
|
|
||||||
|
buf->data && memcpy((char *) buf->data + buf->offset, &val->n, sizeof(val->n));
|
||||||
|
buf->offset += sizeof(val->n);
|
||||||
|
|
||||||
|
buf->data && memcpy((char *) buf->data + buf->offset, val->data, val->n);
|
||||||
|
buf->offset += val->n;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_size) {
|
||||||
|
gguf_buf_grow(buf, el_size);
|
||||||
|
|
||||||
|
buf->data && memcpy((char *) buf->data + buf->offset, val, el_size);
|
||||||
|
buf->offset += el_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
|
||||||
// write header
|
// write header
|
||||||
gguf_fwrite_el(file, &ctx->header.magic, sizeof(ctx->header.magic));
|
gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic));
|
||||||
gguf_fwrite_el(file, &ctx->header.version, sizeof(ctx->header.version));
|
gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version));
|
||||||
gguf_fwrite_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors));
|
gguf_bwrite_el(buf, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors));
|
||||||
gguf_fwrite_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv));
|
gguf_bwrite_el(buf, &ctx->header.n_kv, sizeof(ctx->header.n_kv));
|
||||||
|
|
||||||
// write key-value pairs
|
// write key-value pairs
|
||||||
for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
|
for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
|
||||||
struct gguf_kv * kv = &ctx->kv[i];
|
struct gguf_kv * kv = &ctx->kv[i];
|
||||||
|
|
||||||
gguf_fwrite_str(file, &kv->key);
|
gguf_bwrite_str(buf, &kv->key);
|
||||||
gguf_fwrite_el (file, &kv->type, sizeof(kv->type));
|
gguf_bwrite_el (buf, &kv->type, sizeof(kv->type));
|
||||||
|
|
||||||
switch (kv->type) {
|
switch (kv->type) {
|
||||||
case GGUF_TYPE_UINT8: gguf_fwrite_el (file, &kv->value.uint8, sizeof(kv->value.uint8) ); break;
|
case GGUF_TYPE_UINT8: gguf_bwrite_el( buf, &kv->value.uint8, sizeof(kv->value.uint8) ); break;
|
||||||
case GGUF_TYPE_INT8: gguf_fwrite_el (file, &kv->value.int8, sizeof(kv->value.int8) ); break;
|
case GGUF_TYPE_INT8: gguf_bwrite_el (buf, &kv->value.int8, sizeof(kv->value.int8) ); break;
|
||||||
case GGUF_TYPE_UINT16: gguf_fwrite_el (file, &kv->value.uint16, sizeof(kv->value.uint16) ); break;
|
case GGUF_TYPE_UINT16: gguf_bwrite_el (buf, &kv->value.uint16, sizeof(kv->value.uint16) ); break;
|
||||||
case GGUF_TYPE_INT16: gguf_fwrite_el (file, &kv->value.int16, sizeof(kv->value.int16) ); break;
|
case GGUF_TYPE_INT16: gguf_bwrite_el (buf, &kv->value.int16, sizeof(kv->value.int16) ); break;
|
||||||
case GGUF_TYPE_UINT32: gguf_fwrite_el (file, &kv->value.uint32, sizeof(kv->value.uint32) ); break;
|
case GGUF_TYPE_UINT32: gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break;
|
||||||
case GGUF_TYPE_INT32: gguf_fwrite_el (file, &kv->value.int32, sizeof(kv->value.int32) ); break;
|
case GGUF_TYPE_INT32: gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break;
|
||||||
case GGUF_TYPE_FLOAT32: gguf_fwrite_el (file, &kv->value.float32, sizeof(kv->value.float32)); break;
|
case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break;
|
||||||
case GGUF_TYPE_BOOL: gguf_fwrite_el (file, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
|
case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
|
||||||
case GGUF_TYPE_STRING: gguf_fwrite_str(file, &kv->value.str ); break;
|
case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break;
|
||||||
case GGUF_TYPE_ARRAY:
|
case GGUF_TYPE_ARRAY:
|
||||||
{
|
{
|
||||||
gguf_fwrite_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type));
|
gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type));
|
||||||
gguf_fwrite_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n) );
|
gguf_bwrite_el(buf, &kv->value.arr.n, sizeof(kv->value.arr.n) );
|
||||||
|
|
||||||
switch (kv->value.arr.type) {
|
switch (kv->value.arr.type) {
|
||||||
case GGUF_TYPE_UINT8:
|
case GGUF_TYPE_UINT8:
|
||||||
|
@ -19359,12 +19435,12 @@ void gguf_write_to_file(struct gguf_context * ctx, const char * fname) {
|
||||||
case GGUF_TYPE_FLOAT32:
|
case GGUF_TYPE_FLOAT32:
|
||||||
case GGUF_TYPE_BOOL:
|
case GGUF_TYPE_BOOL:
|
||||||
{
|
{
|
||||||
gguf_fwrite_el(file, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
|
gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
|
||||||
} break;
|
} break;
|
||||||
case GGUF_TYPE_STRING:
|
case GGUF_TYPE_STRING:
|
||||||
{
|
{
|
||||||
for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
|
for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
|
||||||
gguf_fwrite_str(file, &((struct gguf_str *) kv->value.arr.data)[j]);
|
gguf_bwrite_str(buf, &((struct gguf_str *) kv->value.arr.data)[j]);
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case GGUF_TYPE_ARRAY:
|
case GGUF_TYPE_ARRAY:
|
||||||
|
@ -19379,28 +19455,32 @@ void gguf_write_to_file(struct gguf_context * ctx, const char * fname) {
|
||||||
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
|
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
|
||||||
struct gguf_tensor_info * info = &ctx->infos[i];
|
struct gguf_tensor_info * info = &ctx->infos[i];
|
||||||
|
|
||||||
gguf_fwrite_str(file, &info->name);
|
gguf_bwrite_str(buf, &info->name);
|
||||||
gguf_fwrite_el (file, &info->n_dims, sizeof(info->n_dims));
|
gguf_bwrite_el (buf, &info->n_dims, sizeof(info->n_dims));
|
||||||
for (uint32_t j = 0; j < info->n_dims; ++j) {
|
for (uint32_t j = 0; j < info->n_dims; ++j) {
|
||||||
gguf_fwrite_el(file, &info->ne[j], sizeof(info->ne[j]));
|
gguf_bwrite_el(buf, &info->ne[j], sizeof(info->ne[j]));
|
||||||
}
|
}
|
||||||
gguf_fwrite_el (file, &info->type, sizeof(info->type));
|
gguf_bwrite_el(buf, &info->type, sizeof(info->type));
|
||||||
gguf_fwrite_el (file, &info->offset, sizeof(info->offset));
|
gguf_bwrite_el(buf, &info->offset, sizeof(info->offset));
|
||||||
}
|
}
|
||||||
|
|
||||||
// we require the data section to be aligned, so take into account any padding
|
// we require the data section to be aligned, so take into account any padding
|
||||||
{
|
{
|
||||||
const size_t offset = ftell(file);
|
const size_t offset = buf->offset;
|
||||||
const size_t offset_pad = GGML_PAD(offset, ctx->alignment);
|
const size_t offset_pad = GGML_PAD(offset, ctx->alignment);
|
||||||
|
|
||||||
if (offset_pad != offset) {
|
if (offset_pad != offset) {
|
||||||
uint8_t pad = 0;
|
uint8_t pad = 0;
|
||||||
for (size_t i = 0; i < offset_pad - offset; ++i) {
|
for (size_t i = 0; i < offset_pad - offset; ++i) {
|
||||||
gguf_fwrite_el(file, &pad, sizeof(pad));
|
gguf_bwrite_el(buf, &pad, sizeof(pad));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (only_meta) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
|
|
||||||
// write tensor data
|
// write tensor data
|
||||||
|
@ -19410,12 +19490,12 @@ void gguf_write_to_file(struct gguf_context * ctx, const char * fname) {
|
||||||
const size_t size = info->size;
|
const size_t size = info->size;
|
||||||
const size_t size_pad = GGML_PAD(size, ctx->alignment);
|
const size_t size_pad = GGML_PAD(size, ctx->alignment);
|
||||||
|
|
||||||
gguf_fwrite_el(file, info->data, size);
|
gguf_bwrite_el(buf, info->data, size);
|
||||||
|
|
||||||
if (size_pad != size) {
|
if (size_pad != size) {
|
||||||
uint8_t pad = 0;
|
uint8_t pad = 0;
|
||||||
for (size_t j = 0; j < size_pad - size; ++j) {
|
for (size_t j = 0; j < size_pad - size; ++j) {
|
||||||
gguf_fwrite_el(file, &pad, sizeof(pad));
|
gguf_bwrite_el(buf, &pad, sizeof(pad));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -19423,10 +19503,44 @@ void gguf_write_to_file(struct gguf_context * ctx, const char * fname) {
|
||||||
|
|
||||||
offset += size_pad;
|
offset += size_pad;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta) {
|
||||||
|
FILE * file = fopen(fname, "wb");
|
||||||
|
if (!file) {
|
||||||
|
GGML_ASSERT(false && "failed to open file for writing");
|
||||||
|
}
|
||||||
|
|
||||||
|
struct gguf_buf buf = gguf_buf_init(16*1024);
|
||||||
|
|
||||||
|
gguf_write_to_buf(ctx, &buf, only_meta);
|
||||||
|
|
||||||
|
fwrite(buf.data, 1, buf.offset, file);
|
||||||
|
|
||||||
|
gguf_buf_free(buf);
|
||||||
|
|
||||||
fclose(file);
|
fclose(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t gguf_get_meta_size(struct gguf_context * ctx) {
|
||||||
|
// no allocs - only compute size
|
||||||
|
struct gguf_buf buf = gguf_buf_init(0);
|
||||||
|
|
||||||
|
gguf_write_to_buf(ctx, &buf, true);
|
||||||
|
|
||||||
|
return buf.offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gguf_get_meta_data(struct gguf_context * ctx, void * data) {
|
||||||
|
struct gguf_buf buf = gguf_buf_init(16*1024);
|
||||||
|
|
||||||
|
gguf_write_to_buf(ctx, &buf, true);
|
||||||
|
|
||||||
|
memcpy(data, buf.data, buf.offset);
|
||||||
|
|
||||||
|
gguf_buf_free(buf);
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
int ggml_cpu_has_avx(void) {
|
int ggml_cpu_has_avx(void) {
|
||||||
|
|
37
ggml.h
37
ggml.h
|
@ -1712,7 +1712,6 @@ extern "C" {
|
||||||
// gguf
|
// gguf
|
||||||
//
|
//
|
||||||
|
|
||||||
// TODO: can be removed if the API is extended for writing
|
|
||||||
enum gguf_type {
|
enum gguf_type {
|
||||||
GGUF_TYPE_UINT8 = 0,
|
GGUF_TYPE_UINT8 = 0,
|
||||||
GGUF_TYPE_INT8 = 1,
|
GGUF_TYPE_INT8 = 1,
|
||||||
|
@ -1739,6 +1738,7 @@ extern "C" {
|
||||||
GGML_API struct gguf_context * gguf_init_empty(void);
|
GGML_API struct gguf_context * gguf_init_empty(void);
|
||||||
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
|
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
|
||||||
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
|
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
|
||||||
|
|
||||||
GGML_API void gguf_free(struct gguf_context * ctx);
|
GGML_API void gguf_free(struct gguf_context * ctx);
|
||||||
|
|
||||||
GGML_API int gguf_get_version (struct gguf_context * ctx);
|
GGML_API int gguf_get_version (struct gguf_context * ctx);
|
||||||
|
@ -1770,6 +1770,7 @@ extern "C" {
|
||||||
GGML_API const char * gguf_get_arr_str (struct gguf_context * ctx, int key_id, int i);
|
GGML_API const char * gguf_get_arr_str (struct gguf_context * ctx, int key_id, int i);
|
||||||
|
|
||||||
GGML_API int gguf_get_n_tensors (struct gguf_context * ctx);
|
GGML_API int gguf_get_n_tensors (struct gguf_context * ctx);
|
||||||
|
GGML_API int gguf_find_tensor (struct gguf_context * ctx, const char * name);
|
||||||
GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);
|
GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);
|
||||||
GGML_API char * gguf_get_tensor_name (struct gguf_context * ctx, int i);
|
GGML_API char * gguf_get_tensor_name (struct gguf_context * ctx, int i);
|
||||||
|
|
||||||
|
@ -1789,17 +1790,35 @@ extern "C" {
|
||||||
// set or add KV pairs from another context
|
// set or add KV pairs from another context
|
||||||
GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
|
GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
|
||||||
|
|
||||||
|
// manage tensor info
|
||||||
GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
|
GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
|
||||||
|
GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
|
||||||
|
GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
|
||||||
|
|
||||||
// same as gguf_add_tensor, but allows to override tensor data
|
// writing gguf files can be done in 2 ways:
|
||||||
GGML_API void gguf_add_tensor_ex(
|
//
|
||||||
struct gguf_context * ctx,
|
// - write the entire gguf_context to a binary file in a single pass:
|
||||||
const struct ggml_tensor * tensor,
|
//
|
||||||
enum ggml_type type,
|
// gguf_write_to_file(ctx, fname);
|
||||||
const void * data,
|
//
|
||||||
size_t size);
|
// - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
|
||||||
|
//
|
||||||
|
// FILE * f = fopen(fname, "wb");
|
||||||
|
// fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
|
||||||
|
// fwrite(f, ...);
|
||||||
|
// void * data = gguf_meta_get_meta_data(ctx);
|
||||||
|
// fseek(f, 0, SEEK_SET);
|
||||||
|
// fwrite(f, data, gguf_get_meta_size(ctx));
|
||||||
|
// free(data);
|
||||||
|
// fclose(f);
|
||||||
|
//
|
||||||
|
|
||||||
GGML_API void gguf_write_to_file(struct gguf_context * ctx, const char * fname);
|
// write the entire context to a binary file
|
||||||
|
GGML_API void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta);
|
||||||
|
|
||||||
|
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
|
||||||
|
GGML_API size_t gguf_get_meta_size(struct gguf_context * ctx);
|
||||||
|
GGML_API void gguf_get_meta_data(struct gguf_context * ctx, void * data);
|
||||||
|
|
||||||
//
|
//
|
||||||
// system info
|
// system info
|
||||||
|
|
|
@ -83,6 +83,13 @@ static std::string to_string(const T & val) {
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void zeros(std::ofstream & file, size_t n) {
|
||||||
|
char zero = 0;
|
||||||
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
file.write(&zero, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#if !defined(GGML_USE_CUBLAS) && !defined(GGML_USE_METAL)
|
#if !defined(GGML_USE_CUBLAS) && !defined(GGML_USE_METAL)
|
||||||
#include "ggml-alloc.h"
|
#include "ggml-alloc.h"
|
||||||
#define LLAMA_USE_ALLOCATOR
|
#define LLAMA_USE_ALLOCATOR
|
||||||
|
@ -3049,7 +3056,6 @@ static void llama_convert_tensor_internal(const gguf_load_tensor & tensor, std::
|
||||||
for (auto & worker : workers) {
|
for (auto & worker : workers) {
|
||||||
worker.join();
|
worker.join();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void llama_model_quantize_internal(const std::string & fname_inp, const std::string & fname_out, const llama_model_quantize_params * params) {
|
static void llama_model_quantize_internal(const std::string & fname_inp, const std::string & fname_out, const llama_model_quantize_params * params) {
|
||||||
|
@ -3087,6 +3093,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||||
|
|
||||||
std::unique_ptr<llama_model_loader> model_loader(new llama_model_loader(fname_inp, /*use_mmap*/ false));
|
std::unique_ptr<llama_model_loader> model_loader(new llama_model_loader(fname_inp, /*use_mmap*/ false));
|
||||||
|
|
||||||
|
const size_t align = GGUF_DEFAULT_ALIGNMENT;
|
||||||
struct gguf_context * ctx_out = gguf_init_empty();
|
struct gguf_context * ctx_out = gguf_init_empty();
|
||||||
|
|
||||||
// copy the KV pairs from the input file
|
// copy the KV pairs from the input file
|
||||||
|
@ -3125,7 +3132,18 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||||
std::vector<uint8_t> read_data;
|
std::vector<uint8_t> read_data;
|
||||||
std::vector<uint8_t> work;
|
std::vector<uint8_t> work;
|
||||||
|
|
||||||
std::vector<std::vector<uint8_t>> work_map(model_loader->tensors_map.tensors.size());
|
for (gguf_load_tensor & tensor : model_loader->tensors_map.tensors) {
|
||||||
|
gguf_add_tensor(ctx_out, tensor.ggml_tensor);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ofstream fout(fname_out, std::ios::binary);
|
||||||
|
|
||||||
|
const size_t meta_size = gguf_get_meta_size(ctx_out);
|
||||||
|
|
||||||
|
LLAMA_LOG_INFO("%s: meta size = %zu bytes\n", __func__, meta_size);
|
||||||
|
|
||||||
|
// placeholder for the meta data
|
||||||
|
::zeros(fout, meta_size);
|
||||||
|
|
||||||
for (gguf_load_tensor & tensor : model_loader->tensors_map.tensors) {
|
for (gguf_load_tensor & tensor : model_loader->tensors_map.tensors) {
|
||||||
read_data.resize(tensor.size);
|
read_data.resize(tensor.size);
|
||||||
|
@ -3286,13 +3304,25 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||||
total_size_org += tensor.size;
|
total_size_org += tensor.size;
|
||||||
total_size_new += new_size;
|
total_size_new += new_size;
|
||||||
|
|
||||||
// TODO: temp fix until we have stream support in gguf
|
// update the gguf meta data as we go
|
||||||
work_map[idx - 1] = std::vector<uint8_t>((char *) new_data, (char *) new_data + new_size);
|
gguf_set_tensor_type(ctx_out, tensor.name.c_str(), new_type);
|
||||||
|
gguf_set_tensor_data(ctx_out, tensor.name.c_str(), new_data, new_size);
|
||||||
|
|
||||||
gguf_add_tensor_ex(ctx_out, tensor.ggml_tensor, new_type, work_map[idx - 1].data(), new_size);
|
// write tensor data + padding
|
||||||
|
fout.write((const char *) new_data, new_size);
|
||||||
|
zeros(fout, GGML_PAD(new_size, align) - new_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
gguf_write_to_file(ctx_out, fname_out.c_str());
|
// go back to beginning of file and write the updated meta data
|
||||||
|
{
|
||||||
|
fout.seekp(0);
|
||||||
|
std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
|
||||||
|
gguf_get_meta_data(ctx_out, data.data());
|
||||||
|
fout.write((const char *) data.data(), data.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
fout.close();
|
||||||
|
|
||||||
gguf_free(ctx_out);
|
gguf_free(ctx_out);
|
||||||
|
|
||||||
LLAMA_LOG_INFO("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);
|
LLAMA_LOG_INFO("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue