llama : load model from buffer
This commit is contained in:
parent
fc54ef0d1c
commit
112b6647c4
2 changed files with 79 additions and 54 deletions
|
@ -2284,7 +2284,7 @@ extern "C" {
|
|||
|
||||
GGML_API struct gguf_context * gguf_init_empty(void);
|
||||
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
|
||||
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
|
||||
GGML_API struct gguf_context * gguf_init_from_buffer(const char * buffer, size_t size, struct gguf_init_params params);
|
||||
|
||||
GGML_API void gguf_free(struct gguf_context * ctx);
|
||||
|
||||
|
|
131
ggml/src/ggml.c
131
ggml/src/ggml.c
|
@ -20801,6 +20801,13 @@ struct gguf_context {
|
|||
void * data;
|
||||
};
|
||||
|
||||
struct gguf_src {
|
||||
FILE * file;
|
||||
// for reading gguf from a buffer instead of a file
|
||||
const char * buffer;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
static size_t gguf_type_size(enum gguf_type type) {
|
||||
GGML_ASSERT(0 <= type && type < GGUF_TYPE_COUNT);
|
||||
return GGUF_TYPE_SIZE[type];
|
||||
|
@ -20820,19 +20827,25 @@ static void gguf_tensor_info_sanitize(struct gguf_tensor_info * info) {
|
|||
GGML_ASSERT(INT64_MAX/info->ne[3] > info->ne[0]*info->ne[1]*info->ne[2]);
|
||||
}
|
||||
|
||||
static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) {
|
||||
const size_t n = fread(dst, 1, size, file);
|
||||
static bool gguf_fread_el(struct gguf_src * src, void * dst, size_t size, size_t * offset) {
|
||||
size_t n;
|
||||
if (src->file) {
|
||||
n = fread(dst, 1, size, src->file);
|
||||
} else {
|
||||
n = MIN(src->size - *offset, size);
|
||||
memcpy(dst, src->buffer + *offset, n);
|
||||
}
|
||||
*offset += n;
|
||||
return n == size;
|
||||
}
|
||||
|
||||
static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
|
||||
static bool gguf_fread_str(struct gguf_src * src, struct gguf_str * p, size_t * offset) {
|
||||
p->n = 0;
|
||||
p->data = NULL;
|
||||
|
||||
bool ok = true;
|
||||
|
||||
ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset);
|
||||
ok = ok && gguf_fread_el(src, &p->n, sizeof(p->n), offset);
|
||||
|
||||
// early exit if string length is invalid, prevents from integer overflow
|
||||
if (p->n == SIZE_MAX) {
|
||||
|
@ -20842,7 +20855,7 @@ static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
|
|||
|
||||
p->data = GGML_CALLOC(p->n + 1, 1);
|
||||
|
||||
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
|
||||
ok = ok && gguf_fread_el(src, p->data, p->n, offset);
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
@ -20893,13 +20906,7 @@ struct gguf_context * gguf_init_empty(void) {
|
|||
return ctx;
|
||||
}
|
||||
|
||||
struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
|
||||
FILE * file = ggml_fopen(fname, "rb");
|
||||
if (!file) {
|
||||
fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct gguf_context * gguf_init_internal(struct gguf_src * src, struct gguf_init_params params) {
|
||||
// offset from start of file
|
||||
size_t offset = 0;
|
||||
|
||||
|
@ -20907,12 +20914,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
|
||||
// check the magic before making allocations
|
||||
{
|
||||
gguf_fread_el(file, &magic, sizeof(magic), &offset);
|
||||
gguf_fread_el(src, &magic, sizeof(magic), &offset);
|
||||
|
||||
for (uint32_t i = 0; i < sizeof(magic); i++) {
|
||||
if (magic[i] != GGUF_MAGIC[i]) {
|
||||
fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
|
||||
fclose(file);
|
||||
if (src->file) fclose(src->file);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
@ -20930,13 +20937,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
ctx->infos = NULL;
|
||||
ctx->data = NULL;
|
||||
|
||||
ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset);
|
||||
ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
|
||||
ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
|
||||
ok = ok && gguf_fread_el(src, &ctx->header.version, sizeof(ctx->header.version), &offset);
|
||||
ok = ok && gguf_fread_el(src, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
|
||||
ok = ok && gguf_fread_el(src, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
|
||||
|
||||
if (ctx->header.version == 1) {
|
||||
fprintf(stderr, "%s: GGUFv1 is no longer supported. please use a more up-to-date version\n", __func__);
|
||||
fclose(file);
|
||||
if (src->file) fclose(src->file);
|
||||
gguf_free(ctx);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -20949,7 +20956,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
|
||||
if (!ok) {
|
||||
fprintf(stderr, "%s: failed to read header\n", __func__);
|
||||
fclose(file);
|
||||
if (src->file) fclose(src->file);
|
||||
gguf_free(ctx);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -20968,28 +20975,28 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
|
||||
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
|
||||
|
||||
ok = ok && gguf_fread_str(file, &kv->key, &offset);
|
||||
ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
|
||||
ok = ok && gguf_fread_str(src, &kv->key, &offset);
|
||||
ok = ok && gguf_fread_el (src, &kv->type, sizeof(kv->type), &offset);
|
||||
|
||||
//fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
|
||||
|
||||
switch (kv->type) {
|
||||
case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (file, &kv->value.uint8, sizeof(kv->value.uint8), &offset); break;
|
||||
case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (file, &kv->value.int8, sizeof(kv->value.int8), &offset); break;
|
||||
case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (file, &kv->value.uint16, sizeof(kv->value.uint16), &offset); break;
|
||||
case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (file, &kv->value.int16, sizeof(kv->value.int16), &offset); break;
|
||||
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
|
||||
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
|
||||
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
|
||||
case GGUF_TYPE_UINT64: ok = ok && gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break;
|
||||
case GGUF_TYPE_INT64: ok = ok && gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break;
|
||||
case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
|
||||
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
|
||||
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
|
||||
case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (src, &kv->value.uint8, sizeof(kv->value.uint8), &offset); break;
|
||||
case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (src, &kv->value.int8, sizeof(kv->value.int8), &offset); break;
|
||||
case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (src, &kv->value.uint16, sizeof(kv->value.uint16), &offset); break;
|
||||
case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (src, &kv->value.int16, sizeof(kv->value.int16), &offset); break;
|
||||
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (src, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
|
||||
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (src, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
|
||||
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (src, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
|
||||
case GGUF_TYPE_UINT64: ok = ok && gguf_fread_el (src, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break;
|
||||
case GGUF_TYPE_INT64: ok = ok && gguf_fread_el (src, &kv->value.int64, sizeof(kv->value.int64), &offset); break;
|
||||
case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (src, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
|
||||
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (src, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
|
||||
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(src, &kv->value.str, &offset); break;
|
||||
case GGUF_TYPE_ARRAY:
|
||||
{
|
||||
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
|
||||
ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
|
||||
ok = ok && gguf_fread_el(src, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
|
||||
ok = ok && gguf_fread_el(src, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
|
||||
|
||||
switch (kv->value.arr.type) {
|
||||
case GGUF_TYPE_UINT8:
|
||||
|
@ -21007,21 +21014,21 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
// prevent from integer overflow in the malloc below
|
||||
if (kv->value.arr.n >= SIZE_MAX/gguf_type_size(kv->value.arr.type)) {
|
||||
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
|
||||
fclose(file);
|
||||
if (src->file) fclose(src->file);
|
||||
gguf_free(ctx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
kv->value.arr.data = GGML_CALLOC(kv->value.arr.n, gguf_type_size(kv->value.arr.type));
|
||||
|
||||
ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type), &offset);
|
||||
ok = ok && gguf_fread_el(src, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type), &offset);
|
||||
} break;
|
||||
case GGUF_TYPE_STRING:
|
||||
{
|
||||
// prevent from integer overflow in the malloc below
|
||||
if (kv->value.arr.n >= SIZE_MAX/sizeof(struct gguf_str)) {
|
||||
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
|
||||
fclose(file);
|
||||
if (src->file) fclose(src->file);
|
||||
gguf_free(ctx);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -21029,7 +21036,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
kv->value.arr.data = GGML_CALLOC(kv->value.arr.n, sizeof(struct gguf_str));
|
||||
|
||||
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
||||
ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
|
||||
ok = ok && gguf_fread_str(src, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
|
||||
}
|
||||
} break;
|
||||
case GGUF_TYPE_ARRAY:
|
||||
|
@ -21048,7 +21055,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
|
||||
if (!ok) {
|
||||
fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
|
||||
fclose(file);
|
||||
if (src->file) fclose(src->file);
|
||||
gguf_free(ctx);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -21065,17 +21072,17 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
info->ne[j] = 1;
|
||||
}
|
||||
|
||||
ok = ok && gguf_fread_str(file, &info->name, &offset);
|
||||
ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
|
||||
ok = ok && gguf_fread_str(src, &info->name, &offset);
|
||||
ok = ok && gguf_fread_el (src, &info->n_dims, sizeof(info->n_dims), &offset);
|
||||
|
||||
ok = ok && (info->n_dims <= GGML_MAX_DIMS);
|
||||
|
||||
for (uint32_t j = 0; j < info->n_dims; ++j) {
|
||||
ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
|
||||
ok = ok && gguf_fread_el(src, &info->ne[j], sizeof(info->ne[j]), &offset);
|
||||
}
|
||||
|
||||
ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
|
||||
ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
|
||||
ok = ok && gguf_fread_el (src, &info->type, sizeof(info->type), &offset);
|
||||
ok = ok && gguf_fread_el (src, &info->offset, sizeof(info->offset), &offset);
|
||||
|
||||
// TODO: return an error instead of crashing with GGML_ASSERT
|
||||
gguf_tensor_info_sanitize(info);
|
||||
|
@ -21090,7 +21097,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
|
||||
if (!ok) {
|
||||
fprintf(stderr, "%s: failed to read tensor info\n", __func__);
|
||||
fclose(file);
|
||||
if (src->file) fclose(src->file);
|
||||
gguf_free(ctx);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -21110,7 +21117,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
|
||||
if (offset_pad != 0) {
|
||||
offset += ctx->alignment - offset_pad;
|
||||
fseek(file, offset, SEEK_SET);
|
||||
if (src->file) fseek(src->file, offset, SEEK_SET);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -21132,7 +21139,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
if (ggml_blck_size(info->type) == 0 || ne % ggml_blck_size(info->type) != 0) {
|
||||
fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
|
||||
__func__, info->name.data, (int) info->type, ggml_type_name(info->type), ne, ggml_blck_size(info->type));
|
||||
fclose(file);
|
||||
if (src->file) fclose(src->file);
|
||||
gguf_free(ctx);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -21164,7 +21171,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
*params.ctx = ggml_init(pdata);
|
||||
if (*params.ctx == NULL) {
|
||||
fprintf(stderr, "%s: failed to initialize context\n", __func__);
|
||||
fclose(file);
|
||||
if (src->file) fclose(src->file);
|
||||
gguf_free(ctx);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -21179,11 +21186,11 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
ok = ok && data != NULL;
|
||||
|
||||
// read the binary blob with the tensor data
|
||||
ok = ok && gguf_fread_el(file, data->data, ctx->size, &offset);
|
||||
ok = ok && gguf_fread_el(src, data->data, ctx->size, &offset);
|
||||
|
||||
if (!ok) {
|
||||
fprintf(stderr, "%s: failed to read tensor data\n", __func__);
|
||||
fclose(file);
|
||||
if (src->file) fclose(src->file);
|
||||
ggml_free(ctx_data);
|
||||
gguf_free(ctx);
|
||||
return NULL;
|
||||
|
@ -21222,7 +21229,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
|
||||
if (!ok) {
|
||||
fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
|
||||
fclose(file);
|
||||
if (src->file) fclose(src->file);
|
||||
ggml_free(ctx_data);
|
||||
gguf_free(ctx);
|
||||
return NULL;
|
||||
|
@ -21231,11 +21238,29 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
ggml_set_no_alloc(ctx_data, params.no_alloc);
|
||||
}
|
||||
|
||||
fclose(file);
|
||||
if (src->file) fclose(src->file);
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
|
||||
struct gguf_src src = {ggml_fopen(fname, "rb"), NULL, 0};
|
||||
if (!src.file) {
|
||||
fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
|
||||
return NULL;
|
||||
}
|
||||
return gguf_init_internal(&src, params);
|
||||
}
|
||||
|
||||
struct gguf_context * gguf_init_from_buffer(const char * buffer, size_t size, struct gguf_init_params params) {
|
||||
if (!buffer) {
|
||||
fprintf(stderr, "%s: buffer cannot be null\n", __func__);
|
||||
return NULL;
|
||||
}
|
||||
struct gguf_src src = {NULL, buffer, size};
|
||||
return gguf_init_internal(&src, params);
|
||||
}
|
||||
|
||||
void gguf_free(struct gguf_context * ctx) {
|
||||
if (ctx == NULL) {
|
||||
return;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue