Merge branch 'master' into gg/flash-attn
This commit is contained in:
commit
3d03bcb7af
68 changed files with 6348 additions and 435 deletions
189
ggml.c
189
ggml.c
|
@ -218,6 +218,7 @@ inline static void * ggml_aligned_malloc(size_t size) {
|
|||
break;
|
||||
}
|
||||
GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
|
||||
GGML_ASSERT(false);
|
||||
return NULL;
|
||||
}
|
||||
return aligned_memory;
|
||||
|
@ -230,6 +231,38 @@ inline static void * ggml_aligned_malloc(size_t size) {
|
|||
#endif
|
||||
#endif
|
||||
|
||||
inline static void * ggml_malloc(size_t size) {
|
||||
if (size == 0) {
|
||||
GGML_PRINT("WARNING: Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n");
|
||||
return NULL;
|
||||
}
|
||||
void * result = malloc(size);
|
||||
if (result == NULL) {
|
||||
GGML_PRINT("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
|
||||
GGML_ASSERT(false);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// calloc
|
||||
inline static void * ggml_calloc(size_t num, size_t size) {
|
||||
if (num == 0 || size == 0) {
|
||||
GGML_PRINT("WARNING: Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n");
|
||||
return NULL;
|
||||
}
|
||||
void * result = calloc(num, size);
|
||||
if (result == NULL) {
|
||||
GGML_PRINT("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
|
||||
GGML_ASSERT(false);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
#define GGML_MALLOC(size) ggml_malloc(size)
|
||||
#define GGML_CALLOC(num, size) ggml_calloc(num, size)
|
||||
|
||||
#define GGML_FREE(ptr) free(ptr)
|
||||
|
||||
#define UNUSED GGML_UNUSED
|
||||
#define SWAP(x, y, T) do { T SWAP = x; x = y; y = SWAP; } while (0)
|
||||
|
||||
|
@ -599,6 +632,17 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|||
.vec_dot = ggml_vec_dot_iq2_xs_q8_K,
|
||||
.vec_dot_type = GGML_TYPE_Q8_K,
|
||||
},
|
||||
[GGML_TYPE_IQ3_XXS] = {
|
||||
.type_name = "iq3_xxs",
|
||||
.blck_size = QK_K,
|
||||
.type_size = sizeof(block_iq3_xxs),
|
||||
.is_quantized = true,
|
||||
.to_float = (ggml_to_float_t) dequantize_row_iq3_xxs,
|
||||
.from_float = quantize_row_iq3_xxs,
|
||||
.from_float_reference = (ggml_from_float_t)quantize_row_iq3_xxs_reference,
|
||||
.vec_dot = ggml_vec_dot_iq3_xxs_q8_K,
|
||||
.vec_dot_type = GGML_TYPE_Q8_K,
|
||||
},
|
||||
[GGML_TYPE_Q8_K] = {
|
||||
.type_name = "q8_K",
|
||||
.blck_size = QK_K,
|
||||
|
@ -2206,6 +2250,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|||
case GGML_FTYPE_MOSTLY_Q6_K: wtype = GGML_TYPE_Q6_K; break;
|
||||
case GGML_FTYPE_MOSTLY_IQ2_XXS: wtype = GGML_TYPE_IQ2_XXS; break;
|
||||
case GGML_FTYPE_MOSTLY_IQ2_XS: wtype = GGML_TYPE_IQ2_XS; break;
|
||||
case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break;
|
||||
case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
|
||||
case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
|
||||
}
|
||||
|
@ -7652,6 +7697,7 @@ static void ggml_compute_forward_add(
|
|||
case GGML_TYPE_Q6_K:
|
||||
case GGML_TYPE_IQ2_XXS:
|
||||
case GGML_TYPE_IQ2_XS:
|
||||
case GGML_TYPE_IQ3_XXS:
|
||||
{
|
||||
ggml_compute_forward_add_q_f32(params, src0, src1, dst);
|
||||
} break;
|
||||
|
@ -7918,6 +7964,7 @@ static void ggml_compute_forward_add1(
|
|||
case GGML_TYPE_Q6_K:
|
||||
case GGML_TYPE_IQ2_XXS:
|
||||
case GGML_TYPE_IQ2_XS:
|
||||
case GGML_TYPE_IQ3_XXS:
|
||||
{
|
||||
ggml_compute_forward_add1_q_f32(params, src0, src1, dst);
|
||||
} break;
|
||||
|
@ -8037,6 +8084,7 @@ static void ggml_compute_forward_acc(
|
|||
case GGML_TYPE_Q6_K:
|
||||
case GGML_TYPE_IQ2_XXS:
|
||||
case GGML_TYPE_IQ2_XS:
|
||||
case GGML_TYPE_IQ3_XXS:
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
|
@ -10788,6 +10836,7 @@ static void ggml_compute_forward_out_prod(
|
|||
case GGML_TYPE_Q6_K:
|
||||
case GGML_TYPE_IQ2_XXS:
|
||||
case GGML_TYPE_IQ2_XS:
|
||||
case GGML_TYPE_IQ3_XXS:
|
||||
{
|
||||
ggml_compute_forward_out_prod_q_f32(params, src0, src1, dst);
|
||||
} break;
|
||||
|
@ -10967,6 +11016,7 @@ static void ggml_compute_forward_set(
|
|||
case GGML_TYPE_Q6_K:
|
||||
case GGML_TYPE_IQ2_XXS:
|
||||
case GGML_TYPE_IQ2_XS:
|
||||
case GGML_TYPE_IQ3_XXS:
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
|
@ -11163,6 +11213,7 @@ static void ggml_compute_forward_get_rows(
|
|||
case GGML_TYPE_Q6_K:
|
||||
case GGML_TYPE_IQ2_XXS:
|
||||
case GGML_TYPE_IQ2_XS:
|
||||
case GGML_TYPE_IQ3_XXS:
|
||||
{
|
||||
ggml_compute_forward_get_rows_q(params, src0, src1, dst);
|
||||
} break;
|
||||
|
@ -11810,6 +11861,7 @@ static void ggml_compute_forward_alibi(
|
|||
case GGML_TYPE_Q6_K:
|
||||
case GGML_TYPE_IQ2_XXS:
|
||||
case GGML_TYPE_IQ2_XS:
|
||||
case GGML_TYPE_IQ3_XXS:
|
||||
case GGML_TYPE_Q8_K:
|
||||
case GGML_TYPE_I8:
|
||||
case GGML_TYPE_I16:
|
||||
|
@ -11886,6 +11938,7 @@ static void ggml_compute_forward_clamp(
|
|||
case GGML_TYPE_Q6_K:
|
||||
case GGML_TYPE_IQ2_XXS:
|
||||
case GGML_TYPE_IQ2_XS:
|
||||
case GGML_TYPE_IQ3_XXS:
|
||||
case GGML_TYPE_Q8_K:
|
||||
case GGML_TYPE_I8:
|
||||
case GGML_TYPE_I16:
|
||||
|
@ -15439,13 +15492,13 @@ struct ggml_hash_set ggml_hash_set_new(size_t size) {
|
|||
size = ggml_hash_size(size);
|
||||
struct ggml_hash_set result;
|
||||
result.size = size;
|
||||
result.keys = malloc(sizeof(struct ggml_tensor *) * size);
|
||||
result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
|
||||
memset(result.keys, 0, sizeof(struct ggml_tensor *) * size);
|
||||
return result;
|
||||
}
|
||||
|
||||
static void ggml_hash_set_free(struct ggml_hash_set hash_set) {
|
||||
free(hash_set.keys);
|
||||
GGML_FREE(hash_set.keys);
|
||||
}
|
||||
|
||||
struct hash_map {
|
||||
|
@ -15454,17 +15507,17 @@ struct hash_map {
|
|||
};
|
||||
|
||||
static struct hash_map * ggml_new_hash_map(size_t size) {
|
||||
struct hash_map * result = malloc(sizeof(struct hash_map));
|
||||
struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
|
||||
result->set = ggml_hash_set_new(size);
|
||||
result->vals = malloc(sizeof(struct ggml_tensor *) * result->set.size);
|
||||
result->vals = GGML_MALLOC(sizeof(struct ggml_tensor *) * result->set.size);
|
||||
memset(result->vals, 0, sizeof(struct ggml_tensor *) * result->set.size);
|
||||
return result;
|
||||
}
|
||||
|
||||
static void ggml_hash_map_free(struct hash_map * map) {
|
||||
ggml_hash_set_free(map->set);
|
||||
free(map->vals);
|
||||
free(map);
|
||||
GGML_FREE(map->vals);
|
||||
GGML_FREE(map);
|
||||
}
|
||||
|
||||
// gradient checkpointing
|
||||
|
@ -19145,6 +19198,7 @@ void ggml_quantize_init(enum ggml_type type) {
|
|||
switch (type) {
|
||||
case GGML_TYPE_IQ2_XXS: iq2xs_init_impl(256); break;
|
||||
case GGML_TYPE_IQ2_XS: iq2xs_init_impl(512); break;
|
||||
case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
|
||||
default: // nothing
|
||||
break;
|
||||
}
|
||||
|
@ -19407,6 +19461,15 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|||
result = quantize_iq2_xs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
||||
GGML_ASSERT(result == row_size * nrows);
|
||||
} break;
|
||||
case GGML_TYPE_IQ3_XXS:
|
||||
{
|
||||
GGML_ASSERT(start % QK_K == 0);
|
||||
GGML_ASSERT(start % n_per_row == 0);
|
||||
size_t start_row = start / n_per_row;
|
||||
size_t row_size = ggml_row_size(type, n_per_row);
|
||||
result = quantize_iq3_xxs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
||||
GGML_ASSERT(result == row_size * nrows);
|
||||
} break;
|
||||
case GGML_TYPE_F16:
|
||||
{
|
||||
size_t elemsize = sizeof(ggml_fp16_t);
|
||||
|
@ -19533,6 +19596,25 @@ struct gguf_context {
|
|||
void * data;
|
||||
};
|
||||
|
||||
static size_t gguf_type_size(enum gguf_type type) {
|
||||
GGML_ASSERT(0 <= type && type < GGUF_TYPE_COUNT);
|
||||
return GGUF_TYPE_SIZE[type];
|
||||
}
|
||||
|
||||
static void gguf_tensor_info_sanitize(struct gguf_tensor_info * info) {
|
||||
GGML_ASSERT(info->n_dims <= GGML_MAX_DIMS);
|
||||
GGML_ASSERT(0 <= info->type && info->type < GGML_TYPE_COUNT);
|
||||
|
||||
for (uint32_t i = 0; i < info->n_dims; ++i) {
|
||||
GGML_ASSERT(info->ne[i] > 0);
|
||||
}
|
||||
|
||||
// prevent overflow for total number of elements
|
||||
GGML_ASSERT(INT64_MAX/info->ne[1] > info->ne[0]);
|
||||
GGML_ASSERT(INT64_MAX/info->ne[2] > info->ne[0]*info->ne[1]);
|
||||
GGML_ASSERT(INT64_MAX/info->ne[3] > info->ne[0]*info->ne[1]*info->ne[2]);
|
||||
}
|
||||
|
||||
static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) {
|
||||
const size_t n = fread(dst, 1, size, file);
|
||||
*offset += n;
|
||||
|
@ -19545,8 +19627,17 @@ static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
|
|||
|
||||
bool ok = true;
|
||||
|
||||
ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset); p->data = calloc(p->n + 1, 1);
|
||||
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
|
||||
ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset);
|
||||
|
||||
// early exit if string length is invalid, prevents from integer overflow
|
||||
if (p->n == SIZE_MAX) {
|
||||
fprintf(stderr, "%s: invalid string length (%" PRIu64 ")\n", __func__, p->n);
|
||||
return false;
|
||||
}
|
||||
|
||||
p->data = GGML_CALLOC(p->n + 1, 1);
|
||||
|
||||
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
|
||||
|
||||
return ok;
|
||||
}
|
||||
|
@ -19618,6 +19709,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
return NULL;
|
||||
}
|
||||
|
||||
// sanity-checks to prevent from integer/buffer overflows
|
||||
|
||||
ok = ok && (ctx->header.n_tensors < (SIZE_MAX/2)/sizeof(struct gguf_tensor_info));
|
||||
ok = ok && (ctx->header.n_tensors < (SIZE_MAX/2)/ggml_tensor_overhead());
|
||||
ok = ok && (ctx->header.n_kv < (SIZE_MAX/2)/sizeof(struct gguf_kv));
|
||||
|
||||
if (!ok) {
|
||||
fprintf(stderr, "%s: failed to read header\n", __func__);
|
||||
fclose(file);
|
||||
|
@ -19628,7 +19725,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
|
||||
// read the kv pairs
|
||||
{
|
||||
ctx->kv = malloc(ctx->header.n_kv * sizeof(struct gguf_kv));
|
||||
ctx->kv = GGML_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
|
||||
|
||||
for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
|
||||
struct gguf_kv * kv = &ctx->kv[i];
|
||||
|
@ -19656,7 +19753,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
case GGUF_TYPE_ARRAY:
|
||||
{
|
||||
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
|
||||
ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
|
||||
ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
|
||||
|
||||
switch (kv->value.arr.type) {
|
||||
case GGUF_TYPE_UINT8:
|
||||
|
@ -19671,21 +19768,39 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
case GGUF_TYPE_FLOAT64:
|
||||
case GGUF_TYPE_BOOL:
|
||||
{
|
||||
kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
|
||||
ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], &offset);
|
||||
// prevent from integer overflow in the malloc below
|
||||
if (kv->value.arr.n >= SIZE_MAX/gguf_type_size(kv->value.arr.type)) {
|
||||
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
|
||||
fclose(file);
|
||||
gguf_free(ctx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
kv->value.arr.data = GGML_MALLOC(kv->value.arr.n * gguf_type_size(kv->value.arr.type));
|
||||
|
||||
ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type), &offset);
|
||||
} break;
|
||||
case GGUF_TYPE_STRING:
|
||||
{
|
||||
kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str));
|
||||
// prevent from integer overflow in the malloc below
|
||||
if (kv->value.arr.n >= SIZE_MAX/sizeof(struct gguf_str)) {
|
||||
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
|
||||
fclose(file);
|
||||
gguf_free(ctx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
kv->value.arr.data = GGML_MALLOC(kv->value.arr.n * sizeof(struct gguf_str));
|
||||
|
||||
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
||||
ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
|
||||
}
|
||||
} break;
|
||||
case GGUF_TYPE_ARRAY:
|
||||
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
|
||||
default: GGML_ASSERT(false && "invalid type"); break;
|
||||
}
|
||||
} break;
|
||||
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
|
||||
default: GGML_ASSERT(false && "invalid type");
|
||||
}
|
||||
|
||||
if (!ok) {
|
||||
|
@ -19703,7 +19818,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
|
||||
// read the tensor infos
|
||||
{
|
||||
ctx->infos = malloc(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
|
||||
ctx->infos = GGML_MALLOC(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
|
||||
|
||||
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
||||
struct gguf_tensor_info * info = &ctx->infos[i];
|
||||
|
@ -19714,12 +19829,18 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
|
||||
ok = ok && gguf_fread_str(file, &info->name, &offset);
|
||||
ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
|
||||
|
||||
ok = ok && (info->n_dims <= GGML_MAX_DIMS);
|
||||
|
||||
for (uint32_t j = 0; j < info->n_dims; ++j) {
|
||||
ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
|
||||
}
|
||||
|
||||
ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
|
||||
ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
|
||||
|
||||
gguf_tensor_info_sanitize(info);
|
||||
|
||||
if (!ok) {
|
||||
fprintf(stderr, "%s: failed to read tensor info\n", __func__);
|
||||
fclose(file);
|
||||
|
@ -19873,12 +19994,12 @@ void gguf_free(struct gguf_context * ctx) {
|
|||
struct gguf_kv * kv = &ctx->kv[i];
|
||||
|
||||
if (kv->key.data) {
|
||||
free(kv->key.data);
|
||||
GGML_FREE(kv->key.data);
|
||||
}
|
||||
|
||||
if (kv->type == GGUF_TYPE_STRING) {
|
||||
if (kv->value.str.data) {
|
||||
free(kv->value.str.data);
|
||||
GGML_FREE(kv->value.str.data);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -19888,16 +20009,16 @@ void gguf_free(struct gguf_context * ctx) {
|
|||
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
||||
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
|
||||
if (str->data) {
|
||||
free(str->data);
|
||||
GGML_FREE(str->data);
|
||||
}
|
||||
}
|
||||
}
|
||||
free(kv->value.arr.data);
|
||||
GGML_FREE(kv->value.arr.data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(ctx->kv);
|
||||
GGML_FREE(ctx->kv);
|
||||
}
|
||||
|
||||
if (ctx->infos) {
|
||||
|
@ -19905,11 +20026,11 @@ void gguf_free(struct gguf_context * ctx) {
|
|||
struct gguf_tensor_info * info = &ctx->infos[i];
|
||||
|
||||
if (info->name.data) {
|
||||
free(info->name.data);
|
||||
GGML_FREE(info->name.data);
|
||||
}
|
||||
}
|
||||
|
||||
free(ctx->infos);
|
||||
GGML_FREE(ctx->infos);
|
||||
}
|
||||
|
||||
GGML_ALIGNED_FREE(ctx);
|
||||
|
@ -20210,8 +20331,8 @@ void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_ty
|
|||
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
|
||||
ctx->kv[idx].value.arr.type = type;
|
||||
ctx->kv[idx].value.arr.n = n;
|
||||
ctx->kv[idx].value.arr.data = malloc(n*GGUF_TYPE_SIZE[type]);
|
||||
memcpy(ctx->kv[idx].value.arr.data, data, n*GGUF_TYPE_SIZE[type]);
|
||||
ctx->kv[idx].value.arr.data = GGML_MALLOC(n*gguf_type_size(type));
|
||||
memcpy(ctx->kv[idx].value.arr.data, data, n*gguf_type_size(type));
|
||||
}
|
||||
|
||||
void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, int n) {
|
||||
|
@ -20220,7 +20341,7 @@ void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char **
|
|||
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
|
||||
ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING;
|
||||
ctx->kv[idx].value.arr.n = n;
|
||||
ctx->kv[idx].value.arr.data = malloc(n*sizeof(struct gguf_str));
|
||||
ctx->kv[idx].value.arr.data = GGML_MALLOC(n*sizeof(struct gguf_str));
|
||||
for (int i = 0; i < n; i++) {
|
||||
struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
|
||||
str->n = strlen(data[i]);
|
||||
|
@ -20247,19 +20368,19 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
|
|||
case GGUF_TYPE_ARRAY:
|
||||
{
|
||||
if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) {
|
||||
const char ** data = malloc(src->kv[i].value.arr.n*sizeof(char *));
|
||||
const char ** data = GGML_MALLOC(src->kv[i].value.arr.n*sizeof(char *));
|
||||
for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
|
||||
data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
|
||||
}
|
||||
gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
|
||||
free((void *)data);
|
||||
GGML_FREE((void *)data);
|
||||
} else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
|
||||
GGML_ASSERT(false && "nested arrays not supported");
|
||||
} else {
|
||||
gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
|
||||
}
|
||||
} break;
|
||||
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
|
||||
default: GGML_ASSERT(false && "invalid type"); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -20335,7 +20456,7 @@ struct gguf_buf {
|
|||
|
||||
static struct gguf_buf gguf_buf_init(size_t size) {
|
||||
struct gguf_buf buf = {
|
||||
/*buf.data =*/ size == 0 ? NULL : malloc(size),
|
||||
/*buf.data =*/ size == 0 ? NULL : GGML_MALLOC(size),
|
||||
/*buf.size =*/ size,
|
||||
/*buf.offset =*/ 0,
|
||||
};
|
||||
|
@ -20345,7 +20466,7 @@ static struct gguf_buf gguf_buf_init(size_t size) {
|
|||
|
||||
static void gguf_buf_free(struct gguf_buf buf) {
|
||||
if (buf.data) {
|
||||
free(buf.data);
|
||||
GGML_FREE(buf.data);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -20426,7 +20547,7 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
|
|||
case GGUF_TYPE_FLOAT64:
|
||||
case GGUF_TYPE_BOOL:
|
||||
{
|
||||
gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
|
||||
gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type));
|
||||
} break;
|
||||
case GGUF_TYPE_STRING:
|
||||
{
|
||||
|
@ -20435,10 +20556,10 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
|
|||
}
|
||||
} break;
|
||||
case GGUF_TYPE_ARRAY:
|
||||
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
|
||||
default: GGML_ASSERT(false && "invalid type"); break;
|
||||
}
|
||||
} break;
|
||||
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
|
||||
default: GGML_ASSERT(false && "invalid type");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue