Merge branch 'master' into compilade/refactor-kv-cache
This commit is contained in:
commit
c460ff1a1c
172 changed files with 11845 additions and 11211 deletions
295
ggml.c
295
ggml.c
|
@ -4,6 +4,7 @@
|
|||
#include "ggml-impl.h"
|
||||
#include "ggml-quants.h"
|
||||
#include "ggml.h"
|
||||
#include "sgemm.h"
|
||||
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
#include <malloc.h> // using malloc.h with MSC/MINGW
|
||||
|
@ -32,6 +33,10 @@
|
|||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#ifdef __ARM_FEATURE_MATMUL_INT8
|
||||
#undef GGML_USE_LLAMAFILE
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
// disable "possible loss of data" to avoid hundreds of casts
|
||||
// we should just be careful :)
|
||||
|
@ -853,18 +858,6 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
|
|||
// simd mappings
|
||||
//
|
||||
|
||||
#if defined(__ARM_NEON)
|
||||
#if !defined(__aarch64__)
|
||||
|
||||
// 64-bit compatibility
|
||||
|
||||
inline static float vaddvq_f32(float32x4_t v) {
|
||||
return vgetq_lane_f32(v, 0) + vgetq_lane_f32(v, 1) + vgetq_lane_f32(v, 2) + vgetq_lane_f32(v, 3);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// we define a common set of C macros which map to specific intrinsics based on the current architecture
|
||||
// we then implement the fundamental computation operations below using only these macros
|
||||
// adding support for new architectures requires to define the corresponding SIMD macros
|
||||
|
@ -4573,21 +4566,32 @@ void ggml_mul_mat_set_prec(
|
|||
|
||||
// ggml_mul_mat_id
|
||||
|
||||
// NOTE: id will be removed in the future and instead all the experts listed in ids will be computed
|
||||
// this will allow computing all the used experts in a single matrix multiplication
|
||||
/*
|
||||
c = ggml_mul_mat_id(ctx, as, b, ids);
|
||||
|
||||
as -> [cols, rows, n_expert]
|
||||
ids -> [n_experts_used, n_tokens] (i32)
|
||||
b -> [cols, n_expert_used, n_tokens]
|
||||
c -> [cols, n_expert_used, n_tokens]
|
||||
|
||||
in b, n_experts_used can be broadcasted to match the n_expert_used of ids
|
||||
|
||||
c ~= as[:,:,i] @ b[:,i%r,t], i = ids[e,t] for all e,t in ids
|
||||
*/
|
||||
struct ggml_tensor * ggml_mul_mat_id(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * as,
|
||||
struct ggml_tensor * ids,
|
||||
int id,
|
||||
struct ggml_tensor * b) {
|
||||
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * ids) {
|
||||
GGML_ASSERT(!ggml_is_transposed(as));
|
||||
GGML_ASSERT(ids->type == GGML_TYPE_I32);
|
||||
|
||||
GGML_ASSERT(as->ne[3] == 1); // as is 3d (one matrix per expert)
|
||||
GGML_ASSERT(b->ne[3] == 1); // b is 3d
|
||||
GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1); // ids is 2d
|
||||
GGML_ASSERT(ids->ne[1] == b->ne[1]); // must have an expert per b row
|
||||
GGML_ASSERT(ids->ne[2] == b->ne[2] && ids->ne[3] == b->ne[3]);
|
||||
GGML_ASSERT(id >= 0 && id < ids->ne[0]); // valid id
|
||||
GGML_ASSERT(ids->ne[1] == b->ne[2]); // must have an expert list per b row
|
||||
GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat
|
||||
GGML_ASSERT(ids->ne[0] % b->ne[1] == 0); // can broadcast
|
||||
|
||||
bool is_node = false;
|
||||
|
||||
|
@ -4595,11 +4599,9 @@ struct ggml_tensor * ggml_mul_mat_id(
|
|||
is_node = true;
|
||||
}
|
||||
|
||||
const int64_t ne[4] = { as->ne[1], b->ne[1], b->ne[2], b->ne[3] };
|
||||
const int64_t ne[4] = { as->ne[1], ids->ne[0], b->ne[2], 1 };
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||
|
||||
ggml_set_op_params_i32(result, 0, id);
|
||||
|
||||
result->op = GGML_OP_MUL_MAT_ID;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src[0] = as;
|
||||
|
@ -10809,6 +10811,28 @@ static void ggml_compute_forward_mul_mat(
|
|||
}
|
||||
#endif
|
||||
|
||||
#if GGML_USE_LLAMAFILE
|
||||
if (src1_cont) {
|
||||
for (int64_t i13 = 0; i13 < ne13; i13++)
|
||||
for (int64_t i12 = 0; i12 < ne12; i12++)
|
||||
if (!llamafile_sgemm(ne01, ne11, ne00/ggml_blck_size(src0->type),
|
||||
(const char *)src0->data + i12/r2*nb02 + i13/r3*nb03,
|
||||
nb01/ggml_type_size(src0->type),
|
||||
(const char *)src1->data + i12*nb12 + i13*nb13,
|
||||
nb11/ggml_type_size(src1->type),
|
||||
(char *)dst->data + i12*nb2 + i13*nb3,
|
||||
nb1/ggml_type_size(dst->type),
|
||||
ith, nth,
|
||||
params->type,
|
||||
src0->type,
|
||||
src1->type,
|
||||
dst->type))
|
||||
goto UseGgmlGemm1;
|
||||
return;
|
||||
}
|
||||
UseGgmlGemm1:;
|
||||
#endif
|
||||
|
||||
if (params->type == GGML_TASK_TYPE_INIT) {
|
||||
if (ith != 0) {
|
||||
return;
|
||||
|
@ -10840,6 +10864,28 @@ static void ggml_compute_forward_mul_mat(
|
|||
const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
|
||||
const size_t row_size = ggml_row_size(vec_dot_type, ne10);
|
||||
|
||||
#if GGML_USE_LLAMAFILE
|
||||
if (src1->type != vec_dot_type) {
|
||||
for (int64_t i13 = 0; i13 < ne13; i13++)
|
||||
for (int64_t i12 = 0; i12 < ne12; i12++)
|
||||
if (!llamafile_sgemm(ne01, ne11, ne00/ggml_blck_size(src0->type),
|
||||
(const char *)src0->data + i12/r2*nb02 + i13/r3*nb03,
|
||||
nb01/ggml_type_size(src0->type),
|
||||
(const char *)wdata + (i12*ne11 + i13*ne12*ne11)*row_size,
|
||||
row_size/ggml_type_size(vec_dot_type),
|
||||
(char *)dst->data + i12*nb2 + i13*nb3,
|
||||
nb1/ggml_type_size(dst->type),
|
||||
ith, nth,
|
||||
params->type,
|
||||
src0->type,
|
||||
vec_dot_type,
|
||||
dst->type))
|
||||
goto UseGgmlGemm2;
|
||||
return;
|
||||
}
|
||||
UseGgmlGemm2:;
|
||||
#endif
|
||||
|
||||
const int64_t nr0 = ne01; // src0 rows
|
||||
const int64_t nr1 = ne1*ne12*ne13; // src1 rows
|
||||
|
||||
|
@ -10957,11 +11003,6 @@ static void ggml_compute_forward_mul_mat_id(
|
|||
enum ggml_type const vec_dot_type = type_traits[type].vec_dot_type;
|
||||
ggml_from_float_t const from_float_to_vec_dot = type_traits[vec_dot_type].from_float;
|
||||
|
||||
GGML_ASSERT(ne0 == ne01);
|
||||
GGML_ASSERT(ne1 == ne11);
|
||||
GGML_ASSERT(ne2 == ne12);
|
||||
GGML_ASSERT(ne3 == ne13);
|
||||
|
||||
// we don't support permuted src0 or src1
|
||||
GGML_ASSERT(nb00 == ggml_type_size(type));
|
||||
GGML_ASSERT(nb10 == ggml_type_size(src1->type));
|
||||
|
@ -10972,22 +11013,21 @@ static void ggml_compute_forward_mul_mat_id(
|
|||
GGML_ASSERT(nb1 <= nb2);
|
||||
GGML_ASSERT(nb2 <= nb3);
|
||||
|
||||
// broadcast is not supported with mmid
|
||||
assert(ne12 == 1);
|
||||
assert(ne13 == 1);
|
||||
|
||||
// row groups
|
||||
const int id = ggml_get_op_params_i32(dst, 0);
|
||||
const int n_as = src0->ne[2];
|
||||
const int n_ids = ids->ne[0]; // n_expert_used
|
||||
const int n_as = ne02; // n_expert
|
||||
|
||||
char * wdata_src1_end = (src1->type == vec_dot_type) ?
|
||||
(char *) params->wdata :
|
||||
(char *) params->wdata + GGML_PAD(ggml_row_size(vec_dot_type, ggml_nelements(src1)), sizeof(int64_t));
|
||||
|
||||
int64_t * matrix_row_counts = (int64_t *) (wdata_src1_end); // [n_as]
|
||||
int64_t * matrix_rows = matrix_row_counts + n_as; // [n_as][ne11]
|
||||
struct mmid_row_mapping {
|
||||
int32_t i1;
|
||||
int32_t i2;
|
||||
};
|
||||
|
||||
#define MMID_MATRIX_ROW(row_id, i1) matrix_rows[(row_id)*ne11 + (i1)]
|
||||
int64_t * matrix_row_counts = (int64_t *) (wdata_src1_end); // [n_as]
|
||||
struct mmid_row_mapping * matrix_rows = (struct mmid_row_mapping *)(matrix_row_counts + n_as); // [n_as][ne11]
|
||||
|
||||
if (params->type == GGML_TASK_TYPE_INIT) {
|
||||
if (ith != 0) {
|
||||
|
@ -11011,16 +11051,20 @@ static void ggml_compute_forward_mul_mat_id(
|
|||
}
|
||||
|
||||
// initialize matrix_row_counts
|
||||
GGML_ASSERT(wdata == wdata_src1_end);
|
||||
memset(matrix_row_counts, 0, n_as*sizeof(int64_t));
|
||||
|
||||
// group rows by src0 matrix
|
||||
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
|
||||
const int32_t row_id = *(const int32_t *) ((const char *) ids->data + i01*ids->nb[1] + id*ids->nb[0]);
|
||||
#define MMID_MATRIX_ROW(row_id, i1) matrix_rows[(row_id)*ne12 + (i1)]
|
||||
|
||||
GGML_ASSERT(row_id >= 0 && row_id < n_as);
|
||||
MMID_MATRIX_ROW(row_id, matrix_row_counts[row_id]) = i01;
|
||||
matrix_row_counts[row_id] += 1;
|
||||
// group rows by src0 matrix
|
||||
for (int64_t iid1 = 0; iid1 < ids->ne[1]; ++iid1) {
|
||||
for (int id = 0; id < n_ids; ++id) {
|
||||
const int32_t i02 = *(const int32_t *) ((const char *) ids->data + iid1*ids->nb[1] + id*ids->nb[0]);
|
||||
|
||||
assert(i02 >= 0 && i02 < n_as);
|
||||
|
||||
MMID_MATRIX_ROW(i02, matrix_row_counts[i02]) = (struct mmid_row_mapping) {id, iid1};
|
||||
matrix_row_counts[i02] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
|
@ -11038,15 +11082,13 @@ static void ggml_compute_forward_mul_mat_id(
|
|||
continue;
|
||||
}
|
||||
|
||||
size_t src0_offset = cur_a*src0->nb[2];
|
||||
const char * src0_cur = (const char *) src0->data + cur_a*nb02;
|
||||
|
||||
const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
|
||||
const size_t row_size = ggml_row_size(vec_dot_type, ne10);
|
||||
|
||||
const int64_t nr0 = ne01; // src0 rows
|
||||
const int64_t nr1 = cne1*ne12*ne13; // src1 rows
|
||||
|
||||
//printf("nr0 = %lld, nr1 = %lld\n", nr0, nr1);
|
||||
const int64_t nr0 = ne01; // src0 rows
|
||||
const int64_t nr1 = cne1; // src1 rows
|
||||
|
||||
// distribute the thread work across the inner or outer loop based on which one is larger
|
||||
|
||||
|
@ -11065,13 +11107,11 @@ static void ggml_compute_forward_mul_mat_id(
|
|||
const int64_t ir110 = dr1*ith1;
|
||||
const int64_t ir111 = MIN(ir110 + dr1, nr1);
|
||||
|
||||
//printf("ir010 = %6lld, ir011 = %6lld, ir110 = %6lld, ir111 = %6lld\n", ir010, ir011, ir110, ir111);
|
||||
|
||||
// threads with no work simply yield (not sure if it helps)
|
||||
if (ir010 >= ir011 || ir110 >= ir111) {
|
||||
sched_yield();
|
||||
continue;
|
||||
}
|
||||
//if (ir010 >= ir011 || ir110 >= ir111) {
|
||||
// sched_yield();
|
||||
// continue;
|
||||
//}
|
||||
|
||||
// block-tiling attempt
|
||||
const int64_t blck_0 = 16;
|
||||
|
@ -11083,20 +11123,16 @@ static void ggml_compute_forward_mul_mat_id(
|
|||
for (int64_t iir1 = ir110; iir1 < ir111; iir1 += blck_1) {
|
||||
for (int64_t iir0 = ir010; iir0 < ir011; iir0 += blck_0) {
|
||||
for (int64_t ir1 = iir1; ir1 < iir1 + blck_1 && ir1 < ir111; ++ir1) {
|
||||
const int64_t i13 = (ir1/(ne12*cne1)); // Note: currently, src1 is always a matrix
|
||||
const int64_t i12 = (ir1 - i13*ne12*cne1)/cne1;
|
||||
const int64_t _i11 = (ir1 - i13*ne12*cne1 - i12*cne1);
|
||||
const int64_t i11 = MMID_MATRIX_ROW(cur_a, _i11);
|
||||
const int64_t _i12 = ir1; // logical row index for this expert
|
||||
|
||||
// broadcast src0 into src1
|
||||
//const int64_t i03 = i13/r3;
|
||||
//const int64_t i02 = i12/r2;
|
||||
struct mmid_row_mapping row_mapping = MMID_MATRIX_ROW(cur_a, _i12);
|
||||
const int id = row_mapping.i1; // selected expert index
|
||||
|
||||
const int64_t i1 = i11;
|
||||
const int64_t i2 = i12;
|
||||
const int64_t i3 = i13;
|
||||
const int64_t i11 = id % ne11;
|
||||
const int64_t i12 = row_mapping.i2; // row index in src1
|
||||
|
||||
const char * src0_row = (const char *) src0->data + src0_offset;
|
||||
const int64_t i1 = id; // selected expert index
|
||||
const int64_t i2 = i12; // row
|
||||
|
||||
// desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides
|
||||
// if it is, then we have either copied the data to params->wdata and made it contiguous or we are using
|
||||
|
@ -11104,25 +11140,26 @@ static void ggml_compute_forward_mul_mat_id(
|
|||
// TODO: this is a bit of a hack, we should probably have a better way to handle this
|
||||
const char * src1_col = (const char *) wdata +
|
||||
(src1_cont || src1->type != vec_dot_type
|
||||
? (i11 + i12*ne11 + i13*ne12*ne11)*row_size
|
||||
: (i11*nb11 + i12*nb12 + i13*nb13));
|
||||
? (i11 + i12*ne11)*row_size
|
||||
: (i11*nb11 + i12*nb12));
|
||||
|
||||
float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3));
|
||||
float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2));
|
||||
|
||||
//for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
|
||||
// vec_dot(ne00, &dst_col[ir0], src0_row + ir0*nb01, src1_col);
|
||||
//}
|
||||
|
||||
for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
|
||||
vec_dot(ne00, &tmp[ir0 - iir0], 0, src0_row + ir0*nb01, 0, src1_col, 0, 1);
|
||||
vec_dot(ne00, &tmp[ir0 - iir0], 0, src0_cur + ir0*nb01, 0, src1_col, 0, 1);
|
||||
}
|
||||
|
||||
memcpy(&dst_col[iir0], tmp, (MIN(iir0 + blck_0, ir011) - iir0)*sizeof(float));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef MMID_MATRIX_ROW
|
||||
#undef MMID_MATRIX_ROW
|
||||
}
|
||||
|
||||
// ggml_compute_forward_out_prod
|
||||
|
@ -18442,7 +18479,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
|
|||
const int n_as = src0->ne[2];
|
||||
cur += GGML_PAD(cur, sizeof(int64_t)); // align
|
||||
cur += n_as * sizeof(int64_t); // matrix_row_counts
|
||||
cur += n_as * src1->ne[1] * sizeof(int64_t); // matrix_rows
|
||||
cur += n_as * src1->ne[2] * sizeof(int64_t); // matrix_rows
|
||||
} break;
|
||||
case GGML_OP_OUT_PROD:
|
||||
{
|
||||
|
@ -20530,8 +20567,34 @@ static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
|
|||
return ok;
|
||||
}
|
||||
|
||||
static void gguf_free_kv(struct gguf_kv * kv) {
|
||||
if (kv->key.data) {
|
||||
GGML_FREE(kv->key.data);
|
||||
}
|
||||
|
||||
if (kv->type == GGUF_TYPE_STRING) {
|
||||
if (kv->value.str.data) {
|
||||
GGML_FREE(kv->value.str.data);
|
||||
}
|
||||
}
|
||||
|
||||
if (kv->type == GGUF_TYPE_ARRAY) {
|
||||
if (kv->value.arr.data) {
|
||||
if (kv->value.arr.type == GGUF_TYPE_STRING) {
|
||||
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
||||
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
|
||||
if (str->data) {
|
||||
GGML_FREE(str->data);
|
||||
}
|
||||
}
|
||||
}
|
||||
GGML_FREE(kv->value.arr.data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct gguf_context * gguf_init_empty(void) {
|
||||
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
|
||||
struct gguf_context * ctx = GGML_CALLOC(1, sizeof(struct gguf_context));
|
||||
|
||||
memcpy(ctx->header.magic, GGUF_MAGIC, sizeof(ctx->header.magic));
|
||||
ctx->header.version = GGUF_VERSION;
|
||||
|
@ -20576,7 +20639,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
|
||||
bool ok = true;
|
||||
|
||||
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
|
||||
struct gguf_context * ctx = GGML_CALLOC(1, sizeof(struct gguf_context));
|
||||
|
||||
// read the header
|
||||
{
|
||||
|
@ -20613,9 +20676,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
|
||||
// read the kv pairs
|
||||
{
|
||||
ctx->kv = GGML_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
|
||||
const uint64_t n_kv = ctx->header.n_kv;
|
||||
|
||||
for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
|
||||
// header.n_kv will hold the actual value of pairs that were successfully read in the loop below
|
||||
ctx->header.n_kv = 0;
|
||||
ctx->kv = GGML_CALLOC(n_kv, sizeof(struct gguf_kv));
|
||||
|
||||
for (uint64_t i = 0; i < n_kv; ++i) {
|
||||
struct gguf_kv * kv = &ctx->kv[i];
|
||||
|
||||
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
|
||||
|
@ -20664,7 +20731,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
return NULL;
|
||||
}
|
||||
|
||||
kv->value.arr.data = GGML_MALLOC(kv->value.arr.n * gguf_type_size(kv->value.arr.type));
|
||||
kv->value.arr.data = GGML_CALLOC(kv->value.arr.n, gguf_type_size(kv->value.arr.type));
|
||||
|
||||
ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type), &offset);
|
||||
} break;
|
||||
|
@ -20678,7 +20745,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
return NULL;
|
||||
}
|
||||
|
||||
kv->value.arr.data = GGML_MALLOC(kv->value.arr.n * sizeof(struct gguf_str));
|
||||
kv->value.arr.data = GGML_CALLOC(kv->value.arr.n, sizeof(struct gguf_str));
|
||||
|
||||
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
||||
ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
|
||||
|
@ -20694,6 +20761,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
if (!ok) {
|
||||
break;
|
||||
}
|
||||
|
||||
ctx->header.n_kv++;
|
||||
}
|
||||
|
||||
if (!ok) {
|
||||
|
@ -20706,7 +20775,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
|
||||
// read the tensor infos
|
||||
{
|
||||
ctx->infos = GGML_MALLOC(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
|
||||
ctx->infos = GGML_CALLOC(ctx->header.n_tensors, sizeof(struct gguf_tensor_info));
|
||||
|
||||
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
||||
struct gguf_tensor_info * info = &ctx->infos[i];
|
||||
|
@ -20727,8 +20796,17 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
|
||||
ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
|
||||
|
||||
// TODO: return an error instead of crashing with GGML_ASSERT
|
||||
gguf_tensor_info_sanitize(info);
|
||||
|
||||
// make sure there is no duplicated tensor names
|
||||
for (uint64_t j = 0; j < i; ++j) {
|
||||
if (strcmp(info->name.data, ctx->infos[j].name.data) == 0) {
|
||||
fprintf(stderr, "%s: duplicated tensor name %s\n", __func__, info->name.data);
|
||||
ok = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ok) {
|
||||
fprintf(stderr, "%s: failed to read tensor info\n", __func__);
|
||||
fclose(file);
|
||||
|
@ -20842,12 +20920,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|||
|
||||
ok = ok && cur != NULL;
|
||||
|
||||
ggml_set_name(cur, ctx->infos[i].name.data);
|
||||
|
||||
if (!ok) {
|
||||
break;
|
||||
}
|
||||
|
||||
ggml_set_name(cur, ctx->infos[i].name.data);
|
||||
|
||||
// point the data member to the appropriate location in the binary blob using the tensor infos
|
||||
if (!params.no_alloc) {
|
||||
//cur->data = (char *) data->data + ctx->infos[i].offset - ctx->offset; // offset from start of file
|
||||
|
@ -20879,31 +20957,7 @@ void gguf_free(struct gguf_context * ctx) {
|
|||
if (ctx->kv) {
|
||||
// free string memory - not great..
|
||||
for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
|
||||
struct gguf_kv * kv = &ctx->kv[i];
|
||||
|
||||
if (kv->key.data) {
|
||||
GGML_FREE(kv->key.data);
|
||||
}
|
||||
|
||||
if (kv->type == GGUF_TYPE_STRING) {
|
||||
if (kv->value.str.data) {
|
||||
GGML_FREE(kv->value.str.data);
|
||||
}
|
||||
}
|
||||
|
||||
if (kv->type == GGUF_TYPE_ARRAY) {
|
||||
if (kv->value.arr.data) {
|
||||
if (kv->value.arr.type == GGUF_TYPE_STRING) {
|
||||
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
||||
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
|
||||
if (str->data) {
|
||||
GGML_FREE(str->data);
|
||||
}
|
||||
}
|
||||
}
|
||||
GGML_FREE(kv->value.arr.data);
|
||||
}
|
||||
}
|
||||
gguf_free_kv(&ctx->kv[i]);
|
||||
}
|
||||
|
||||
GGML_FREE(ctx->kv);
|
||||
|
@ -20921,7 +20975,7 @@ void gguf_free(struct gguf_context * ctx) {
|
|||
GGML_FREE(ctx->infos);
|
||||
}
|
||||
|
||||
GGML_ALIGNED_FREE(ctx);
|
||||
GGML_FREE(ctx);
|
||||
}
|
||||
|
||||
const char * gguf_type_name(enum gguf_type type) {
|
||||
|
@ -21128,6 +21182,19 @@ static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
|
|||
return n_kv;
|
||||
}
|
||||
|
||||
void gguf_remove_key(struct gguf_context * ctx, const char * key) {
|
||||
const int idx = gguf_find_key(ctx, key);
|
||||
if (idx >= 0) {
|
||||
const int n_kv = gguf_get_n_kv(ctx);
|
||||
gguf_free_kv(&ctx->kv[idx]);
|
||||
for (int i = idx; i < n_kv-1; ++i) {
|
||||
ctx->kv[i] = ctx->kv[i+1];
|
||||
}
|
||||
ctx->kv = realloc(ctx->kv, (n_kv - 1) * sizeof(struct gguf_kv));
|
||||
ctx->header.n_kv--;
|
||||
}
|
||||
}
|
||||
|
||||
void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
|
||||
const int idx = gguf_get_or_add_key(ctx, key);
|
||||
|
||||
|
@ -21219,7 +21286,7 @@ void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_ty
|
|||
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
|
||||
ctx->kv[idx].value.arr.type = type;
|
||||
ctx->kv[idx].value.arr.n = n;
|
||||
ctx->kv[idx].value.arr.data = GGML_MALLOC(n*gguf_type_size(type));
|
||||
ctx->kv[idx].value.arr.data = GGML_CALLOC(n, gguf_type_size(type));
|
||||
memcpy(ctx->kv[idx].value.arr.data, data, n*gguf_type_size(type));
|
||||
}
|
||||
|
||||
|
@ -21229,7 +21296,7 @@ void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char **
|
|||
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
|
||||
ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING;
|
||||
ctx->kv[idx].value.arr.n = n;
|
||||
ctx->kv[idx].value.arr.data = GGML_MALLOC(n*sizeof(struct gguf_str));
|
||||
ctx->kv[idx].value.arr.data = GGML_CALLOC(n, sizeof(struct gguf_str));
|
||||
for (int i = 0; i < n; i++) {
|
||||
struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
|
||||
str->n = strlen(data[i]);
|
||||
|
@ -21256,7 +21323,7 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
|
|||
case GGUF_TYPE_ARRAY:
|
||||
{
|
||||
if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) {
|
||||
const char ** data = GGML_MALLOC(src->kv[i].value.arr.n*sizeof(char *));
|
||||
const char ** data = GGML_CALLOC(src->kv[i].value.arr.n, sizeof(char *));
|
||||
for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
|
||||
data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
|
||||
}
|
||||
|
@ -21276,6 +21343,10 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
|
|||
void gguf_add_tensor(
|
||||
struct gguf_context * ctx,
|
||||
const struct ggml_tensor * tensor) {
|
||||
if (gguf_find_tensor(ctx, tensor->name) != -1) {
|
||||
GGML_ASSERT(false && "duplicated tensor name");
|
||||
}
|
||||
|
||||
const int idx = ctx->header.n_tensors;
|
||||
ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info));
|
||||
|
||||
|
@ -21344,7 +21415,7 @@ struct gguf_buf {
|
|||
|
||||
static struct gguf_buf gguf_buf_init(size_t size) {
|
||||
struct gguf_buf buf = {
|
||||
/*buf.data =*/ size == 0 ? NULL : GGML_MALLOC(size),
|
||||
/*buf.data =*/ size == 0 ? NULL : GGML_CALLOC(1, size),
|
||||
/*buf.size =*/ size,
|
||||
/*buf.offset =*/ 0,
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue