gguf : fixes + simplify example + add ggml_nbytes_pad()

This commit is contained in:
Georgi Gerganov 2023-08-15 16:01:38 +03:00
parent 4463965401
commit c9b2f7f1bf
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
3 changed files with 17 additions and 93 deletions

View file

@ -21,91 +21,6 @@ static std::string to_string(const T & val) {
return ss.str(); return ss.str();
} }
void gguf_ex_write_str(std::ofstream & fout, const std::string & val) {
const int32_t n = val.size();
fout.write((const char *) &n, sizeof(n));
fout.write(val.c_str(), n);
}
void gguf_ex_write_i32(std::ofstream & fout, int32_t val) {
fout.write((const char *) &val, sizeof(val));
}
void gguf_ex_write_u64(std::ofstream & fout, size_t val) {
fout.write((const char *) &val, sizeof(val));
}
template<typename T>
void gguf_ex_write_val(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) {
gguf_ex_write_str(fout, key);
fout.write((const char *) &type, sizeof(type));
fout.write((const char *) &val, sizeof(val));
fprintf(stdout, "%s: write param: %s = %s\n", __func__, key.c_str(), to_string(val).c_str());
}
template<>
void gguf_ex_write_val<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) {
gguf_ex_write_str(fout, key);
fout.write((const char *) &type, sizeof(type));
const int32_t n = val.size();
fout.write((const char *) &n, sizeof(n));
fout.write(val.c_str(), n);
fprintf(stdout, "%s: write param: %s = %s\n", __func__, key.c_str(), val.c_str());
}
template<typename T>
void gguf_ex_write_arr(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<T> & val) {
gguf_ex_write_str(fout, key);
{
const enum gguf_type tarr = GGUF_TYPE_ARRAY;
fout.write((const char *) &tarr, sizeof(tarr));
}
const int32_t n = val.size();
fout.write((const char *) &type, sizeof(type));
fout.write((const char *) &n, sizeof(n));
fout.write((const char *) val.data(), n * sizeof(T));
fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str());
for (int i = 0; i < n; ++i) {
fprintf(stdout, "%s", to_string(val[i]).c_str());
if (i < n - 1) {
fprintf(stdout, ", ");
}
}
fprintf(stdout, "]\n");
}
template<>
void gguf_ex_write_arr<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<std::string> & val) {
gguf_ex_write_str(fout, key);
{
const enum gguf_type tarr = GGUF_TYPE_ARRAY;
fout.write((const char *) &tarr, sizeof(tarr));
}
const int32_t n = val.size();
fout.write((const char *) &type, sizeof(type));
fout.write((const char *) &n, sizeof(n));
for (int i = 0; i < n; ++i) {
const int32_t nstr = val[i].size();
fout.write((const char *) &nstr, sizeof(nstr));
fout.write(val[i].c_str(), nstr);
}
fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str());
for (int i = 0; i < n; ++i) {
fprintf(stdout, "%s", val[i].c_str());
if (i < n - 1) {
fprintf(stdout, ", ");
}
}
fprintf(stdout, "]\n");
}
bool gguf_ex_write(const std::string & fname) { bool gguf_ex_write(const std::string & fname) {
struct gguf_context * ctx = gguf_init_empty(); struct gguf_context * ctx = gguf_init_empty();
@ -120,9 +35,9 @@ bool gguf_ex_write(const std::string & fname) {
gguf_set_val_bool(ctx, "some.parameter.bool", true); gguf_set_val_bool(ctx, "some.parameter.bool", true);
gguf_set_val_str (ctx, "some.parameter.string", "hello world"); gguf_set_val_str (ctx, "some.parameter.string", "hello world");
//gguf_set_arr_data(ctx, "some.parameter.arr.i16", GGUF_TYPE_INT16, std::vector<int16_t>{ 1, 2, 3, 4, }.data(), 4); gguf_set_arr_data(ctx, "some.parameter.arr.i16", GGUF_TYPE_INT16, std::vector<int16_t>{ 1, 2, 3, 4, }.data(), 4);
//gguf_set_arr_data(ctx, "some.parameter.arr.f32", GGUF_TYPE_FLOAT32, std::vector<float>{ 3.145f, 2.718f, 1.414f, }.data(), 3); gguf_set_arr_data(ctx, "some.parameter.arr.f32", GGUF_TYPE_FLOAT32, std::vector<float>{ 3.145f, 2.718f, 1.414f, }.data(), 3);
//gguf_ex_write_arr<std::string>(fout, "some.parameter.arr.str", GGUF_TYPE_STRING, { "hello", "world", "!" }); gguf_set_arr_str (ctx, "some.parameter.arr.str", std::vector<const char *>{ "hello", "world", "!" }.data(), 3);
} }
struct ggml_init_params params = { struct ggml_init_params params = {

16
ggml.c
View file

@ -213,10 +213,10 @@ inline static void * ggml_aligned_malloc(size_t size) {
error_desc = "insufficient memory"; error_desc = "insufficient memory";
break; break;
} }
GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n", GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
__func__, error_desc, size/(1024.0*1024.0));
return NULL; return NULL;
} }
return aligned_memory; return aligned_memory;
} }
#define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size) #define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size)
@ -4109,7 +4109,11 @@ size_t ggml_nbytes(const struct ggml_tensor * tensor) {
// //
// is enough, but just in case, adding the second part // is enough, but just in case, adding the second part
return GGML_PAD(MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]), GGML_MEM_ALIGN); return MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]);
}
size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
} }
size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) { size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
@ -19271,6 +19275,10 @@ void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tenso
ctx->infos[idx].name.n = strlen(tensor->name) + 1; ctx->infos[idx].name.n = strlen(tensor->name) + 1;
ctx->infos[idx].name.data = strdup(tensor->name); ctx->infos[idx].name.data = strdup(tensor->name);
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
ctx->infos[idx].ne[i] = 1;
}
ctx->infos[idx].n_dims = tensor->n_dims; ctx->infos[idx].n_dims = tensor->n_dims;
for (int i = 0; i < tensor->n_dims; i++) { for (int i = 0; i < tensor->n_dims; i++) {
ctx->infos[idx].ne[i] = tensor->ne[i]; ctx->infos[idx].ne[i] = tensor->ne[i];
@ -19305,8 +19313,8 @@ void gguf_write_to_file(struct gguf_context * ctx, const char * fname) {
// write header // write header
gguf_fwrite_el(file, &ctx->header.magic, sizeof(ctx->header.magic)); gguf_fwrite_el(file, &ctx->header.magic, sizeof(ctx->header.magic));
gguf_fwrite_el(file, &ctx->header.version, sizeof(ctx->header.version)); gguf_fwrite_el(file, &ctx->header.version, sizeof(ctx->header.version));
gguf_fwrite_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv));
gguf_fwrite_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors)); gguf_fwrite_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors));
gguf_fwrite_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv));
// write key-value pairs // write key-value pairs
for (uint32_t i = 0; i < ctx->header.n_kv; ++i) { for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {

1
ggml.h
View file

@ -566,6 +566,7 @@ extern "C" {
GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor); GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor); GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor); GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split); GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
GGML_API int ggml_blck_size (enum ggml_type type); GGML_API int ggml_blck_size (enum ggml_type type);