Merge ad1af06737
into 581c305186
This commit is contained in:
commit
9ecc19ae39
4 changed files with 217 additions and 74 deletions
|
@ -2387,7 +2387,7 @@ extern "C" {
|
||||||
|
|
||||||
GGML_API struct gguf_context * gguf_init_empty(void);
|
GGML_API struct gguf_context * gguf_init_empty(void);
|
||||||
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
|
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
|
||||||
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
|
GGML_API struct gguf_context * gguf_init_from_buffer(const char * buffer, size_t size, struct gguf_init_params params);
|
||||||
|
|
||||||
GGML_API void gguf_free(struct gguf_context * ctx);
|
GGML_API void gguf_free(struct gguf_context * ctx);
|
||||||
|
|
||||||
|
|
131
ggml/src/ggml.c
131
ggml/src/ggml.c
|
@ -22015,6 +22015,13 @@ struct gguf_context {
|
||||||
void * data;
|
void * data;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct gguf_src {
|
||||||
|
FILE * file;
|
||||||
|
// for reading gguf from a buffer instead of a file
|
||||||
|
const char * buffer;
|
||||||
|
size_t size;
|
||||||
|
};
|
||||||
|
|
||||||
static size_t gguf_type_size(enum gguf_type type) {
|
static size_t gguf_type_size(enum gguf_type type) {
|
||||||
GGML_ASSERT(0 <= type && type < GGUF_TYPE_COUNT);
|
GGML_ASSERT(0 <= type && type < GGUF_TYPE_COUNT);
|
||||||
return GGUF_TYPE_SIZE[type];
|
return GGUF_TYPE_SIZE[type];
|
||||||
|
@ -22034,19 +22041,25 @@ static void gguf_tensor_info_sanitize(struct gguf_tensor_info * info) {
|
||||||
GGML_ASSERT(INT64_MAX/info->ne[3] > info->ne[0]*info->ne[1]*info->ne[2]);
|
GGML_ASSERT(INT64_MAX/info->ne[3] > info->ne[0]*info->ne[1]*info->ne[2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) {
|
static bool gguf_fread_el(struct gguf_src * src, void * dst, size_t size, size_t * offset) {
|
||||||
const size_t n = fread(dst, 1, size, file);
|
size_t n;
|
||||||
|
if (src->file) {
|
||||||
|
n = fread(dst, 1, size, src->file);
|
||||||
|
} else {
|
||||||
|
n = MIN(src->size - *offset, size);
|
||||||
|
memcpy(dst, src->buffer + *offset, n);
|
||||||
|
}
|
||||||
*offset += n;
|
*offset += n;
|
||||||
return n == size;
|
return n == size;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
|
static bool gguf_fread_str(struct gguf_src * src, struct gguf_str * p, size_t * offset) {
|
||||||
p->n = 0;
|
p->n = 0;
|
||||||
p->data = NULL;
|
p->data = NULL;
|
||||||
|
|
||||||
bool ok = true;
|
bool ok = true;
|
||||||
|
|
||||||
ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset);
|
ok = ok && gguf_fread_el(src, &p->n, sizeof(p->n), offset);
|
||||||
|
|
||||||
// early exit if string length is invalid, prevents from integer overflow
|
// early exit if string length is invalid, prevents from integer overflow
|
||||||
if (p->n == SIZE_MAX) {
|
if (p->n == SIZE_MAX) {
|
||||||
|
@ -22056,7 +22069,7 @@ static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
|
||||||
|
|
||||||
p->data = GGML_CALLOC(p->n + 1, 1);
|
p->data = GGML_CALLOC(p->n + 1, 1);
|
||||||
|
|
||||||
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
|
ok = ok && gguf_fread_el(src, p->data, p->n, offset);
|
||||||
|
|
||||||
return ok;
|
return ok;
|
||||||
}
|
}
|
||||||
|
@ -22107,13 +22120,7 @@ struct gguf_context * gguf_init_empty(void) {
|
||||||
return ctx;
|
return ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
|
static struct gguf_context * gguf_init_internal(struct gguf_src * src, struct gguf_init_params params) {
|
||||||
FILE * file = ggml_fopen(fname, "rb");
|
|
||||||
if (!file) {
|
|
||||||
fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
// offset from start of file
|
// offset from start of file
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
|
|
||||||
|
@ -22121,12 +22128,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
|
|
||||||
// check the magic before making allocations
|
// check the magic before making allocations
|
||||||
{
|
{
|
||||||
gguf_fread_el(file, &magic, sizeof(magic), &offset);
|
gguf_fread_el(src, &magic, sizeof(magic), &offset);
|
||||||
|
|
||||||
for (uint32_t i = 0; i < sizeof(magic); i++) {
|
for (uint32_t i = 0; i < sizeof(magic); i++) {
|
||||||
if (magic[i] != GGUF_MAGIC[i]) {
|
if (magic[i] != GGUF_MAGIC[i]) {
|
||||||
fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
|
fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
|
||||||
fclose(file);
|
if (src->file) fclose(src->file);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -22144,13 +22151,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
ctx->infos = NULL;
|
ctx->infos = NULL;
|
||||||
ctx->data = NULL;
|
ctx->data = NULL;
|
||||||
|
|
||||||
ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset);
|
ok = ok && gguf_fread_el(src, &ctx->header.version, sizeof(ctx->header.version), &offset);
|
||||||
ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
|
ok = ok && gguf_fread_el(src, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
|
||||||
ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
|
ok = ok && gguf_fread_el(src, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
|
||||||
|
|
||||||
if (ctx->header.version == 1) {
|
if (ctx->header.version == 1) {
|
||||||
fprintf(stderr, "%s: GGUFv1 is no longer supported. please use a more up-to-date version\n", __func__);
|
fprintf(stderr, "%s: GGUFv1 is no longer supported. please use a more up-to-date version\n", __func__);
|
||||||
fclose(file);
|
if (src->file) fclose(src->file);
|
||||||
gguf_free(ctx);
|
gguf_free(ctx);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -22163,7 +22170,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
|
|
||||||
if (!ok) {
|
if (!ok) {
|
||||||
fprintf(stderr, "%s: failed to read header\n", __func__);
|
fprintf(stderr, "%s: failed to read header\n", __func__);
|
||||||
fclose(file);
|
if (src->file) fclose(src->file);
|
||||||
gguf_free(ctx);
|
gguf_free(ctx);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -22182,28 +22189,28 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
|
|
||||||
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
|
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
|
||||||
|
|
||||||
ok = ok && gguf_fread_str(file, &kv->key, &offset);
|
ok = ok && gguf_fread_str(src, &kv->key, &offset);
|
||||||
ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
|
ok = ok && gguf_fread_el (src, &kv->type, sizeof(kv->type), &offset);
|
||||||
|
|
||||||
//fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
|
//fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
|
||||||
|
|
||||||
switch (kv->type) {
|
switch (kv->type) {
|
||||||
case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (file, &kv->value.uint8, sizeof(kv->value.uint8), &offset); break;
|
case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (src, &kv->value.uint8, sizeof(kv->value.uint8), &offset); break;
|
||||||
case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (file, &kv->value.int8, sizeof(kv->value.int8), &offset); break;
|
case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (src, &kv->value.int8, sizeof(kv->value.int8), &offset); break;
|
||||||
case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (file, &kv->value.uint16, sizeof(kv->value.uint16), &offset); break;
|
case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (src, &kv->value.uint16, sizeof(kv->value.uint16), &offset); break;
|
||||||
case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (file, &kv->value.int16, sizeof(kv->value.int16), &offset); break;
|
case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (src, &kv->value.int16, sizeof(kv->value.int16), &offset); break;
|
||||||
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
|
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (src, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
|
||||||
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
|
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (src, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
|
||||||
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
|
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (src, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
|
||||||
case GGUF_TYPE_UINT64: ok = ok && gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break;
|
case GGUF_TYPE_UINT64: ok = ok && gguf_fread_el (src, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break;
|
||||||
case GGUF_TYPE_INT64: ok = ok && gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break;
|
case GGUF_TYPE_INT64: ok = ok && gguf_fread_el (src, &kv->value.int64, sizeof(kv->value.int64), &offset); break;
|
||||||
case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
|
case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (src, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
|
||||||
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
|
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (src, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
|
||||||
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
|
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(src, &kv->value.str, &offset); break;
|
||||||
case GGUF_TYPE_ARRAY:
|
case GGUF_TYPE_ARRAY:
|
||||||
{
|
{
|
||||||
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
|
ok = ok && gguf_fread_el(src, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
|
||||||
ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
|
ok = ok && gguf_fread_el(src, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
|
||||||
|
|
||||||
switch (kv->value.arr.type) {
|
switch (kv->value.arr.type) {
|
||||||
case GGUF_TYPE_UINT8:
|
case GGUF_TYPE_UINT8:
|
||||||
|
@ -22221,21 +22228,21 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
// prevent from integer overflow in the malloc below
|
// prevent from integer overflow in the malloc below
|
||||||
if (kv->value.arr.n >= SIZE_MAX/gguf_type_size(kv->value.arr.type)) {
|
if (kv->value.arr.n >= SIZE_MAX/gguf_type_size(kv->value.arr.type)) {
|
||||||
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
|
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
|
||||||
fclose(file);
|
if (src->file) fclose(src->file);
|
||||||
gguf_free(ctx);
|
gguf_free(ctx);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
kv->value.arr.data = GGML_CALLOC(kv->value.arr.n, gguf_type_size(kv->value.arr.type));
|
kv->value.arr.data = GGML_CALLOC(kv->value.arr.n, gguf_type_size(kv->value.arr.type));
|
||||||
|
|
||||||
ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type), &offset);
|
ok = ok && gguf_fread_el(src, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type), &offset);
|
||||||
} break;
|
} break;
|
||||||
case GGUF_TYPE_STRING:
|
case GGUF_TYPE_STRING:
|
||||||
{
|
{
|
||||||
// prevent from integer overflow in the malloc below
|
// prevent from integer overflow in the malloc below
|
||||||
if (kv->value.arr.n >= SIZE_MAX/sizeof(struct gguf_str)) {
|
if (kv->value.arr.n >= SIZE_MAX/sizeof(struct gguf_str)) {
|
||||||
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
|
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
|
||||||
fclose(file);
|
if (src->file) fclose(src->file);
|
||||||
gguf_free(ctx);
|
gguf_free(ctx);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -22243,7 +22250,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
kv->value.arr.data = GGML_CALLOC(kv->value.arr.n, sizeof(struct gguf_str));
|
kv->value.arr.data = GGML_CALLOC(kv->value.arr.n, sizeof(struct gguf_str));
|
||||||
|
|
||||||
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
||||||
ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
|
ok = ok && gguf_fread_str(src, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case GGUF_TYPE_ARRAY:
|
case GGUF_TYPE_ARRAY:
|
||||||
|
@ -22262,7 +22269,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
|
|
||||||
if (!ok) {
|
if (!ok) {
|
||||||
fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
|
fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
|
||||||
fclose(file);
|
if (src->file) fclose(src->file);
|
||||||
gguf_free(ctx);
|
gguf_free(ctx);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -22279,17 +22286,17 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
info->ne[j] = 1;
|
info->ne[j] = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ok = ok && gguf_fread_str(file, &info->name, &offset);
|
ok = ok && gguf_fread_str(src, &info->name, &offset);
|
||||||
ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
|
ok = ok && gguf_fread_el (src, &info->n_dims, sizeof(info->n_dims), &offset);
|
||||||
|
|
||||||
ok = ok && (info->n_dims <= GGML_MAX_DIMS);
|
ok = ok && (info->n_dims <= GGML_MAX_DIMS);
|
||||||
|
|
||||||
for (uint32_t j = 0; j < info->n_dims; ++j) {
|
for (uint32_t j = 0; j < info->n_dims; ++j) {
|
||||||
ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
|
ok = ok && gguf_fread_el(src, &info->ne[j], sizeof(info->ne[j]), &offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
|
ok = ok && gguf_fread_el (src, &info->type, sizeof(info->type), &offset);
|
||||||
ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
|
ok = ok && gguf_fread_el (src, &info->offset, sizeof(info->offset), &offset);
|
||||||
|
|
||||||
// TODO: return an error instead of crashing with GGML_ASSERT
|
// TODO: return an error instead of crashing with GGML_ASSERT
|
||||||
gguf_tensor_info_sanitize(info);
|
gguf_tensor_info_sanitize(info);
|
||||||
|
@ -22304,7 +22311,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
|
|
||||||
if (!ok) {
|
if (!ok) {
|
||||||
fprintf(stderr, "%s: failed to read tensor info\n", __func__);
|
fprintf(stderr, "%s: failed to read tensor info\n", __func__);
|
||||||
fclose(file);
|
if (src->file) fclose(src->file);
|
||||||
gguf_free(ctx);
|
gguf_free(ctx);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -22324,7 +22331,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
|
|
||||||
if (offset_pad != 0) {
|
if (offset_pad != 0) {
|
||||||
offset += ctx->alignment - offset_pad;
|
offset += ctx->alignment - offset_pad;
|
||||||
fseek(file, offset, SEEK_SET);
|
if (src->file) fseek(src->file, offset, SEEK_SET);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22346,7 +22353,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
if (ggml_blck_size(info->type) == 0 || ne % ggml_blck_size(info->type) != 0) {
|
if (ggml_blck_size(info->type) == 0 || ne % ggml_blck_size(info->type) != 0) {
|
||||||
fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
|
fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
|
||||||
__func__, info->name.data, (int) info->type, ggml_type_name(info->type), ne, ggml_blck_size(info->type));
|
__func__, info->name.data, (int) info->type, ggml_type_name(info->type), ne, ggml_blck_size(info->type));
|
||||||
fclose(file);
|
if (src->file) fclose(src->file);
|
||||||
gguf_free(ctx);
|
gguf_free(ctx);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -22378,7 +22385,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
*params.ctx = ggml_init(pdata);
|
*params.ctx = ggml_init(pdata);
|
||||||
if (*params.ctx == NULL) {
|
if (*params.ctx == NULL) {
|
||||||
fprintf(stderr, "%s: failed to initialize context\n", __func__);
|
fprintf(stderr, "%s: failed to initialize context\n", __func__);
|
||||||
fclose(file);
|
if (src->file) fclose(src->file);
|
||||||
gguf_free(ctx);
|
gguf_free(ctx);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -22393,11 +22400,11 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
ok = ok && data != NULL;
|
ok = ok && data != NULL;
|
||||||
|
|
||||||
// read the binary blob with the tensor data
|
// read the binary blob with the tensor data
|
||||||
ok = ok && gguf_fread_el(file, data->data, ctx->size, &offset);
|
ok = ok && gguf_fread_el(src, data->data, ctx->size, &offset);
|
||||||
|
|
||||||
if (!ok) {
|
if (!ok) {
|
||||||
fprintf(stderr, "%s: failed to read tensor data\n", __func__);
|
fprintf(stderr, "%s: failed to read tensor data\n", __func__);
|
||||||
fclose(file);
|
if (src->file) fclose(src->file);
|
||||||
ggml_free(ctx_data);
|
ggml_free(ctx_data);
|
||||||
gguf_free(ctx);
|
gguf_free(ctx);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -22436,7 +22443,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
|
|
||||||
if (!ok) {
|
if (!ok) {
|
||||||
fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
|
fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
|
||||||
fclose(file);
|
if (src->file) fclose(src->file);
|
||||||
ggml_free(ctx_data);
|
ggml_free(ctx_data);
|
||||||
gguf_free(ctx);
|
gguf_free(ctx);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -22445,11 +22452,29 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
ggml_set_no_alloc(ctx_data, params.no_alloc);
|
ggml_set_no_alloc(ctx_data, params.no_alloc);
|
||||||
}
|
}
|
||||||
|
|
||||||
fclose(file);
|
if (src->file) fclose(src->file);
|
||||||
|
|
||||||
return ctx;
|
return ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
|
||||||
|
struct gguf_src src = {ggml_fopen(fname, "rb"), NULL, 0};
|
||||||
|
if (!src.file) {
|
||||||
|
fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return gguf_init_internal(&src, params);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct gguf_context * gguf_init_from_buffer(const char * buffer, size_t size, struct gguf_init_params params) {
|
||||||
|
if (!buffer) {
|
||||||
|
fprintf(stderr, "%s: buffer cannot be null\n", __func__);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
struct gguf_src src = {NULL, buffer, size};
|
||||||
|
return gguf_init_internal(&src, params);
|
||||||
|
}
|
||||||
|
|
||||||
void gguf_free(struct gguf_context * ctx) {
|
void gguf_free(struct gguf_context * ctx) {
|
||||||
if (ctx == NULL) {
|
if (ctx == NULL) {
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -416,6 +416,12 @@ extern "C" {
|
||||||
// lora adapter
|
// lora adapter
|
||||||
struct llama_lora_adapter;
|
struct llama_lora_adapter;
|
||||||
|
|
||||||
|
// to be used by llama_load_model_from_buffers
|
||||||
|
struct llama_model_shard_buffer {
|
||||||
|
const char * data;
|
||||||
|
size_t size;
|
||||||
|
};
|
||||||
|
|
||||||
// Helpers for getting default parameters
|
// Helpers for getting default parameters
|
||||||
LLAMA_API struct llama_model_params llama_model_default_params(void);
|
LLAMA_API struct llama_model_params llama_model_default_params(void);
|
||||||
LLAMA_API struct llama_context_params llama_context_default_params(void);
|
LLAMA_API struct llama_context_params llama_context_default_params(void);
|
||||||
|
@ -441,7 +447,12 @@ extern "C" {
|
||||||
|
|
||||||
LLAMA_API struct llama_model * llama_load_model_from_file(
|
LLAMA_API struct llama_model * llama_load_model_from_file(
|
||||||
const char * path_model,
|
const char * path_model,
|
||||||
struct llama_model_params params);
|
struct llama_model_params params);
|
||||||
|
|
||||||
|
LLAMA_API struct llama_model * llama_load_model_from_buffers(
|
||||||
|
struct llama_model_shard_buffer * shards,
|
||||||
|
size_t n_shards,
|
||||||
|
struct llama_model_params params);
|
||||||
|
|
||||||
LLAMA_API void llama_free_model(struct llama_model * model);
|
LLAMA_API void llama_free_model(struct llama_model * model);
|
||||||
|
|
||||||
|
|
145
src/llama.cpp
145
src/llama.cpp
|
@ -1696,9 +1696,14 @@ public:
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
// use FILE * so we don't have to re-open the file to mmap
|
// use FILE * so we don't have to re-open the file to mmap
|
||||||
FILE * fp;
|
FILE * fp = nullptr;
|
||||||
size_t size;
|
size_t size;
|
||||||
|
|
||||||
|
// when a buffer is used instead of a real file, we store the pointer here
|
||||||
|
const char * buffer = nullptr;
|
||||||
|
// curr is used as replacement for tell() when file is loaded from buffer
|
||||||
|
size_t curr = 0;
|
||||||
|
|
||||||
llama_file(const char * fname, const char * mode) {
|
llama_file(const char * fname, const char * mode) {
|
||||||
fp = ggml_fopen(fname, mode);
|
fp = ggml_fopen(fname, mode);
|
||||||
if (fp == NULL) {
|
if (fp == NULL) {
|
||||||
|
@ -1709,7 +1714,12 @@ public:
|
||||||
seek(0, SEEK_SET);
|
seek(0, SEEK_SET);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
llama_file(const char * buffer, size_t size) : size(size), buffer(buffer) {}
|
||||||
|
|
||||||
size_t tell() const {
|
size_t tell() const {
|
||||||
|
if (buffer) {
|
||||||
|
return curr;
|
||||||
|
}
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
__int64 ret = _ftelli64(fp);
|
__int64 ret = _ftelli64(fp);
|
||||||
#else
|
#else
|
||||||
|
@ -1722,7 +1732,17 @@ public:
|
||||||
return (size_t) ret;
|
return (size_t) ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void seek(size_t offset, int whence) const {
|
void seek(size_t offset, int whence) {
|
||||||
|
if (buffer) {
|
||||||
|
if (whence == SEEK_END) {
|
||||||
|
curr = size;
|
||||||
|
} else if (whence == SEEK_SET) {
|
||||||
|
curr = offset;
|
||||||
|
} else {
|
||||||
|
throw std::runtime_error(format("invalid whence: %d", whence));
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
int ret = _fseeki64(fp, (__int64) offset, whence);
|
int ret = _fseeki64(fp, (__int64) offset, whence);
|
||||||
#else
|
#else
|
||||||
|
@ -1737,6 +1757,13 @@ public:
|
||||||
if (len == 0) {
|
if (len == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (buffer) {
|
||||||
|
if (curr + len > size) {
|
||||||
|
throw std::runtime_error("unexpectedly reached end of buffer");
|
||||||
|
}
|
||||||
|
memcpy(ptr, buffer + curr, len);
|
||||||
|
return;
|
||||||
|
}
|
||||||
errno = 0;
|
errno = 0;
|
||||||
std::size_t ret = std::fread(ptr, len, 1, fp);
|
std::size_t ret = std::fread(ptr, len, 1, fp);
|
||||||
if (ferror(fp)) {
|
if (ferror(fp)) {
|
||||||
|
@ -1757,6 +1784,9 @@ public:
|
||||||
if (len == 0) {
|
if (len == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (buffer) {
|
||||||
|
throw std::runtime_error("cannot write to read-only buffer");
|
||||||
|
}
|
||||||
errno = 0;
|
errno = 0;
|
||||||
size_t ret = std::fwrite(ptr, len, 1, fp);
|
size_t ret = std::fwrite(ptr, len, 1, fp);
|
||||||
if (ret != 1) {
|
if (ret != 1) {
|
||||||
|
@ -1777,9 +1807,15 @@ public:
|
||||||
};
|
};
|
||||||
using llama_files = std::vector<std::unique_ptr<llama_file>>;
|
using llama_files = std::vector<std::unique_ptr<llama_file>>;
|
||||||
|
|
||||||
|
struct llama_shard_src {
|
||||||
|
std::string fname;
|
||||||
|
std::vector<llama_model_shard_buffer *> buffers;
|
||||||
|
};
|
||||||
|
|
||||||
struct llama_mmap {
|
struct llama_mmap {
|
||||||
void * addr;
|
void * addr;
|
||||||
size_t size;
|
size_t size;
|
||||||
|
bool file_is_buffer = false;
|
||||||
|
|
||||||
llama_mmap(const llama_mmap &) = delete;
|
llama_mmap(const llama_mmap &) = delete;
|
||||||
|
|
||||||
|
@ -1790,6 +1826,11 @@ struct llama_mmap {
|
||||||
std::vector<std::pair<size_t, size_t>> mapped_fragments;
|
std::vector<std::pair<size_t, size_t>> mapped_fragments;
|
||||||
|
|
||||||
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) {
|
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) {
|
||||||
|
if (file->buffer) {
|
||||||
|
// in-memory buffer doesn't need to be mapped
|
||||||
|
file_is_buffer = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
size = file->size;
|
size = file->size;
|
||||||
int fd = fileno(file->fp);
|
int fd = fileno(file->fp);
|
||||||
int flags = MAP_SHARED;
|
int flags = MAP_SHARED;
|
||||||
|
@ -1844,6 +1885,10 @@ struct llama_mmap {
|
||||||
|
|
||||||
// partially unmap the file in the range [first, last)
|
// partially unmap the file in the range [first, last)
|
||||||
void unmap_fragment(size_t first, size_t last) {
|
void unmap_fragment(size_t first, size_t last) {
|
||||||
|
if (file_is_buffer) {
|
||||||
|
// in-memory buffer doesn't need to be unmapped
|
||||||
|
return;
|
||||||
|
}
|
||||||
// note: this function must not be called multiple times with overlapping ranges
|
// note: this function must not be called multiple times with overlapping ranges
|
||||||
// otherwise, there is a risk of invalidating addresses that have been repurposed for other mappings
|
// otherwise, there is a risk of invalidating addresses that have been repurposed for other mappings
|
||||||
int page_size = sysconf(_SC_PAGESIZE);
|
int page_size = sysconf(_SC_PAGESIZE);
|
||||||
|
@ -1889,6 +1934,9 @@ struct llama_mmap {
|
||||||
}
|
}
|
||||||
|
|
||||||
~llama_mmap() {
|
~llama_mmap() {
|
||||||
|
if (file_is_buffer) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
for (const auto & frag : mapped_fragments) {
|
for (const auto & frag : mapped_fragments) {
|
||||||
if (munmap((char *) addr + frag.first, frag.second - frag.first)) {
|
if (munmap((char *) addr + frag.first, frag.second - frag.first)) {
|
||||||
LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno));
|
LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno));
|
||||||
|
@ -1901,6 +1949,12 @@ struct llama_mmap {
|
||||||
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1, bool numa = false) {
|
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1, bool numa = false) {
|
||||||
GGML_UNUSED(numa);
|
GGML_UNUSED(numa);
|
||||||
|
|
||||||
|
if (file->buffer) {
|
||||||
|
// in-memory buffer doesn't need to be mapped
|
||||||
|
file_is_buffer = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
size = file->size;
|
size = file->size;
|
||||||
|
|
||||||
HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp));
|
HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp));
|
||||||
|
@ -1952,6 +2006,9 @@ struct llama_mmap {
|
||||||
}
|
}
|
||||||
|
|
||||||
~llama_mmap() {
|
~llama_mmap() {
|
||||||
|
if (file->buffer) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (!UnmapViewOfFile(addr)) {
|
if (!UnmapViewOfFile(addr)) {
|
||||||
LLAMA_LOG_WARN("warning: UnmapViewOfFile failed: %s\n",
|
LLAMA_LOG_WARN("warning: UnmapViewOfFile failed: %s\n",
|
||||||
llama_format_win_err(GetLastError()).c_str());
|
llama_format_win_err(GetLastError()).c_str());
|
||||||
|
@ -4292,7 +4349,7 @@ struct llama_model_loader {
|
||||||
std::string arch_name;
|
std::string arch_name;
|
||||||
LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN);
|
LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN);
|
||||||
|
|
||||||
llama_model_loader(const std::string & fname, bool use_mmap, bool check_tensors, const struct llama_model_kv_override * param_overrides_p) {
|
llama_model_loader(const llama_shard_src & src, bool use_mmap, bool check_tensors, const struct llama_model_kv_override * param_overrides_p) {
|
||||||
int trace = 0;
|
int trace = 0;
|
||||||
if (getenv("LLAMA_TRACE")) {
|
if (getenv("LLAMA_TRACE")) {
|
||||||
trace = atoi(getenv("LLAMA_TRACE"));
|
trace = atoi(getenv("LLAMA_TRACE"));
|
||||||
|
@ -4304,21 +4361,32 @@ struct llama_model_loader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool file_is_buffer = src.fname.empty();
|
||||||
struct ggml_context * ctx = NULL;
|
struct ggml_context * ctx = NULL;
|
||||||
struct gguf_init_params params = {
|
struct gguf_init_params params = {
|
||||||
/*.no_alloc = */ true,
|
/*.no_alloc = */ true,
|
||||||
/*.ctx = */ &ctx,
|
/*.ctx = */ &ctx,
|
||||||
};
|
};
|
||||||
|
|
||||||
meta = gguf_init_from_file(fname.c_str(), params);
|
if (file_is_buffer) {
|
||||||
|
if (src.buffers.empty()) {
|
||||||
|
throw std::runtime_error("list of shard buffers must not be empty");
|
||||||
|
}
|
||||||
|
meta = gguf_init_from_buffer(src.buffers[0]->data, src.buffers[0]->size, params);
|
||||||
|
} else {
|
||||||
|
meta = gguf_init_from_file(src.fname.c_str(), params);
|
||||||
|
}
|
||||||
if (!meta) {
|
if (!meta) {
|
||||||
throw std::runtime_error(format("%s: failed to load model from %s\n", __func__, fname.c_str()));
|
throw std::runtime_error(format(
|
||||||
|
"%s: failed to load model from %s\n", __func__, file_is_buffer ? "buffer" : src.fname.c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
|
get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
|
||||||
llm_kv = LLM_KV(llm_arch_from_string(arch_name));
|
llm_kv = LLM_KV(llm_arch_from_string(arch_name));
|
||||||
|
|
||||||
files.emplace_back(new llama_file(fname.c_str(), "rb"));
|
files.emplace_back(file_is_buffer
|
||||||
|
? new llama_file(src.buffers[0]->data, src.buffers[0]->size)
|
||||||
|
: new llama_file(src.fname.c_str(), "rb"));
|
||||||
contexts.emplace_back(ctx);
|
contexts.emplace_back(ctx);
|
||||||
|
|
||||||
// Save tensors data offset of the main file.
|
// Save tensors data offset of the main file.
|
||||||
|
@ -4338,9 +4406,16 @@ struct llama_model_loader {
|
||||||
throw std::runtime_error(format("illegal split file: %d, model must be loaded with the first split", idx));
|
throw std::runtime_error(format("illegal split file: %d, model must be loaded with the first split", idx));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (n_split < src.buffers.size()) {
|
||||||
|
throw std::runtime_error(format("expecting %d buffers, but only have %d", n_split, (int) src.buffers.size()));
|
||||||
|
}
|
||||||
|
|
||||||
char split_prefix[PATH_MAX] = {0};
|
char split_prefix[PATH_MAX] = {0};
|
||||||
if (!llama_split_prefix(split_prefix, sizeof(split_prefix), fname.c_str(), idx, n_split)) {
|
if (!file_is_buffer) {
|
||||||
throw std::runtime_error(format("invalid split file: %s", fname.c_str()));
|
int ret = llama_split_prefix(split_prefix, sizeof(split_prefix), src.fname.c_str(), idx, n_split);
|
||||||
|
if (!ret) {
|
||||||
|
throw std::runtime_error(format("invalid split file: %s", src.fname.c_str()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (trace > 0) {
|
if (trace > 0) {
|
||||||
|
@ -4349,18 +4424,28 @@ struct llama_model_loader {
|
||||||
|
|
||||||
char split_path[PATH_MAX] = {0};
|
char split_path[PATH_MAX] = {0};
|
||||||
for (idx = 1; idx < n_split; idx++) {
|
for (idx = 1; idx < n_split; idx++) {
|
||||||
llama_split_path(split_path, sizeof(split_path), split_prefix, idx, n_split);
|
if (file_is_buffer) {
|
||||||
|
if (idx >= src.buffers.size()) {
|
||||||
|
throw std::runtime_error(format("missing buffer for shard number %d", idx+1));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
llama_split_path(split_path, sizeof(split_path), split_prefix, idx, n_split);
|
||||||
|
}
|
||||||
|
|
||||||
struct gguf_init_params split_params = {
|
struct gguf_init_params split_params = {
|
||||||
/*.no_alloc = */ true,
|
/*.no_alloc = */ true,
|
||||||
/*.ctx = */ &ctx,
|
/*.ctx = */ &ctx,
|
||||||
};
|
};
|
||||||
struct gguf_context * ctx_gguf = gguf_init_from_file(split_path, split_params);
|
struct gguf_context * ctx_gguf = file_is_buffer
|
||||||
|
? gguf_init_from_buffer(src.buffers[idx]->data, src.buffers[idx]->size, split_params)
|
||||||
|
: gguf_init_from_file(split_path, split_params);
|
||||||
if (!ctx_gguf) {
|
if (!ctx_gguf) {
|
||||||
throw std::runtime_error(format("%s: failed to load GGUF split from %s\n", __func__, split_path));
|
throw std::runtime_error(format("%s: failed to load GGUF split from %s\n", __func__, split_path));
|
||||||
}
|
}
|
||||||
|
|
||||||
files.emplace_back(new llama_file(split_path, "rb"));
|
files.emplace_back(file_is_buffer
|
||||||
|
? new llama_file(src.buffers[idx]->data, src.buffers[idx]->size)
|
||||||
|
: new llama_file(split_path, "rb"));
|
||||||
contexts.emplace_back(ctx);
|
contexts.emplace_back(ctx);
|
||||||
|
|
||||||
// Save tensors data offset info of the shard.
|
// Save tensors data offset info of the shard.
|
||||||
|
@ -4403,7 +4488,7 @@ struct llama_model_loader {
|
||||||
}
|
}
|
||||||
|
|
||||||
LLAMA_LOG_INFO("%s: loaded meta data with %d key-value pairs and %d tensors from %s (version %s)\n",
|
LLAMA_LOG_INFO("%s: loaded meta data with %d key-value pairs and %d tensors from %s (version %s)\n",
|
||||||
__func__, n_kv, n_tensors, fname.c_str(), llama_file_version_name(fver));
|
__func__, n_kv, n_tensors, file_is_buffer ? "buffer" : src.fname.c_str(), llama_file_version_name(fver));
|
||||||
|
|
||||||
// determine file type based on the number of tensors for each quantization and print meta data
|
// determine file type based on the number of tensors for each quantization and print meta data
|
||||||
// TODO: make optional
|
// TODO: make optional
|
||||||
|
@ -8584,9 +8669,9 @@ static bool llm_load_tensors(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns 0 on success, -1 on error, and -2 on cancellation via llama_progress_callback
|
// Returns 0 on success, -1 on error, and -2 on cancellation via llama_progress_callback
|
||||||
static int llama_model_load(const std::string & fname, llama_model & model, llama_model_params & params) {
|
static int llama_model_load_internal(const llama_shard_src & src, llama_model & model, llama_model_params & params) {
|
||||||
try {
|
try {
|
||||||
llama_model_loader ml(fname, params.use_mmap, params.check_tensors, params.kv_overrides);
|
llama_model_loader ml(src, params.use_mmap, params.check_tensors, params.kv_overrides);
|
||||||
|
|
||||||
model.hparams.vocab_only = params.vocab_only;
|
model.hparams.vocab_only = params.vocab_only;
|
||||||
|
|
||||||
|
@ -17274,7 +17359,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||||
auto v = (std::vector<llama_model_kv_override>*)params->kv_overrides;
|
auto v = (std::vector<llama_model_kv_override>*)params->kv_overrides;
|
||||||
kv_overrides = v->data();
|
kv_overrides = v->data();
|
||||||
}
|
}
|
||||||
llama_model_loader ml(fname_inp, use_mmap, /*check_tensors*/ true, kv_overrides);
|
llama_shard_src src;
|
||||||
|
src.fname = fname_inp;
|
||||||
|
llama_model_loader ml(src, use_mmap, /*check_tensors*/ true, kv_overrides);
|
||||||
ml.init_mappings(false); // no prefetching
|
ml.init_mappings(false); // no prefetching
|
||||||
|
|
||||||
llama_model model;
|
llama_model model;
|
||||||
|
@ -17996,9 +18083,9 @@ int64_t llama_time_us(void) {
|
||||||
return ggml_time_us();
|
return ggml_time_us();
|
||||||
}
|
}
|
||||||
|
|
||||||
struct llama_model * llama_load_model_from_file(
|
static struct llama_model * llama_load_model_with_params(
|
||||||
const char * path_model,
|
const llama_shard_src & src,
|
||||||
struct llama_model_params params) {
|
struct llama_model_params params) {
|
||||||
ggml_time_init();
|
ggml_time_init();
|
||||||
|
|
||||||
llama_model * model = new llama_model;
|
llama_model * model = new llama_model;
|
||||||
|
@ -18030,7 +18117,7 @@ struct llama_model * llama_load_model_from_file(
|
||||||
}
|
}
|
||||||
model->rpc_servers.push_back(servers);
|
model->rpc_servers.push_back(servers);
|
||||||
}
|
}
|
||||||
int status = llama_model_load(path_model, *model, params);
|
int status = llama_model_load_internal(src, *model, params);
|
||||||
GGML_ASSERT(status <= 0);
|
GGML_ASSERT(status <= 0);
|
||||||
if (status < 0) {
|
if (status < 0) {
|
||||||
if (status == -1) {
|
if (status == -1) {
|
||||||
|
@ -18045,6 +18132,26 @@ struct llama_model * llama_load_model_from_file(
|
||||||
return model;
|
return model;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct llama_model * llama_load_model_from_file(
|
||||||
|
const char * path_model,
|
||||||
|
struct llama_model_params params) {
|
||||||
|
llama_shard_src src;
|
||||||
|
src.fname = path_model;
|
||||||
|
return llama_load_model_with_params(src, params);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct llama_model * llama_load_model_from_buffers(
|
||||||
|
struct llama_model_shard_buffer * shards,
|
||||||
|
size_t n_shards,
|
||||||
|
struct llama_model_params params) {
|
||||||
|
llama_shard_src src;
|
||||||
|
src.buffers.reserve(n_shards);
|
||||||
|
for (size_t i = 0; i < n_shards; i++) {
|
||||||
|
src.buffers[i] = &shards[i];
|
||||||
|
}
|
||||||
|
return llama_load_model_with_params(src, params);
|
||||||
|
}
|
||||||
|
|
||||||
void llama_free_model(struct llama_model * model) {
|
void llama_free_model(struct llama_model * model) {
|
||||||
delete model;
|
delete model;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue