feat: add new GGUFValueType.OBJ virtual type

The content of the OBJ type is actually a list of all key names of the object.

* Python
  * `gguf_writer.py`:
    * Added `def add_kv(self, key: str, val: Any) -> None`: Automatically determines the appropriate value type based on `val`.
    * Added `def add_dict(self, key: str, val: dict, excludes: Sequence[str] = []) -> None`: Adds object (dict) values, It will recursively add all subkeys.
    * Added `add_array_ex` to support the nested and mixed-type array.
  * `constants.py`:
    * Added `GGUFValueType.get_type_ex(val)`: Added support for numpy's integers and floating-point numbers, selecting the number of digits according to the size of the integer.
  * `gguf_reader.py`:
    * Added functionality to retrieve values from specific fields using `ReaderField.get()` method.
  * Unit test added
* CPP
  * `ggml`:
    * Added `GGUF_TYPE_OBJ` to the `gguf_type` enum type.
    * Use `gguf_get_arr_n` and `gguf_get_arr_str` to get the subKey names of `GGUF_TYPE_OBJ`.
    * Added `gguf_set_obj_str` function to set object subkey names
    * Added `gguf_set_arr_obj` function to set object array count
    * Added `gguf_set_arr_arr` function to set nested array count
  * `llama`:
    * Modified `gguf_kv_to_str`
    * Added `LLAMA_API char * gguf_kv_to_c_str` function to get the c_str value as JSON format.
      * Maybe this API should be moved into `ggml` as `gguf_get_val_json`. (问题是 ggml.c 用的是C语言,而这里大量用了C++的功能)
    * Added basic support to `GGUF_TYPE_OBJ` and nested array
  * Unit test added

feat: add basic support to GGUF_TYPE_OBJ on cpp
feat(gguf.py): add OBJ and mixed-type array supports to GGUF ARRAY
feat: add OBJ and mixed-type array supports to GGUF ARRAY(CPP)
feat: add nested array supported
feat: * Subkey name convention in OBJ types:
  * If the first letter of the subkey name is "/", it means referencing the full name of other keys.
  * If there is a ":" colon delimiter, it means that the string after the colon represents the subkey name in this object, otherwise the referencing subkey name is used.
feat: add LLAMA_API gguf_kv_to_c_str to llama.h
test: write test gguf file to tests folder directly(py)
test: add test-gguf-meta.cpp
feat: Key convention: "." indicates that the key is a subkey, not an independent key.
feat: add excludes argument to add_dict(gguf_write.py)
feat: add_array_ex to supports nested and mix-typed array, and keep the add_array to the same
fix(constant.py): rollback the get_type function and add the new get_type_ex
test: add test compatibility
fix: use GGML_MALLOC instead of malloc
This commit is contained in:
Riceball LEE 2024-01-26 16:32:12 +08:00
parent 906cff55c2
commit 95a492a8c5
No known key found for this signature in database
GPG key ID: 10F15E84852CB868
14 changed files with 576 additions and 89 deletions

View file

@ -854,3 +854,6 @@ tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o te
tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS) tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
tests/test-gguf-meta: tests/test-gguf-meta.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

View file

@ -6,6 +6,7 @@
#include "ggml.h" #include "ggml.h"
#include "ggml-alloc.h" #include "ggml-alloc.h"
#include "ggml-backend.h" #include "ggml-backend.h"
#include "llama.h"
#ifdef GGML_USE_CUBLAS #ifdef GGML_USE_CUBLAS
#include "ggml-cuda.h" #include "ggml-cuda.h"
@ -148,24 +149,6 @@ static std::string get_ftype(int ftype) {
return ggml_type_name(static_cast<ggml_type>(ftype)); return ggml_type_name(static_cast<ggml_type>(ftype));
} }
static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) {
switch (type) {
case GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]);
case GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]);
case GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]);
case GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]);
case GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]);
case GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]);
case GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]);
case GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]);
case GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]);
case GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]);
case GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false";
default: return format("unknown type %d", type);
}
}
static void replace_all(std::string & s, const std::string & search, const std::string & replace) { static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
std::string result; std::string result;
for (size_t pos = 0; ; pos += search.length()) { for (size_t pos = 0; ; pos += search.length()) {
@ -180,43 +163,6 @@ static void replace_all(std::string & s, const std::string & search, const std::
s = std::move(result); s = std::move(result);
} }
static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
switch (type) {
case GGUF_TYPE_STRING:
return gguf_get_val_str(ctx_gguf, i);
case GGUF_TYPE_ARRAY:
{
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
int arr_n = gguf_get_arr_n(ctx_gguf, i);
const void * data = gguf_get_arr_data(ctx_gguf, i);
std::stringstream ss;
ss << "[";
for (int j = 0; j < arr_n; j++) {
if (arr_type == GGUF_TYPE_STRING) {
std::string val = gguf_get_arr_str(ctx_gguf, i, j);
// escape quotes
replace_all(val, "\\", "\\\\");
replace_all(val, "\"", "\\\"");
ss << '"' << val << '"';
} else if (arr_type == GGUF_TYPE_ARRAY) {
ss << "???";
} else {
ss << gguf_data_to_str(arr_type, data, j);
}
if (j < arr_n - 1) {
ss << ", ";
}
}
ss << "]";
return ss.str();
}
default:
return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0);
}
}
static void print_tensor_info(const ggml_tensor* tensor, const char* prefix = "") { static void print_tensor_info(const ggml_tensor* tensor, const char* prefix = "") {
size_t tensor_size = ggml_nbytes(tensor); size_t tensor_size = ggml_nbytes(tensor);
printf("%s: n_dims = %d, name = %s, tensor_size=%zu, shape:[%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "], type = %s\n", printf("%s: n_dims = %d, name = %s, tensor_size=%zu, shape:[%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "], type = %s\n",
@ -784,11 +730,12 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
const char * name = gguf_get_key(ctx, i); const char * name = gguf_get_key(ctx, i);
const enum gguf_type type = gguf_get_kv_type(ctx, i); const enum gguf_type type = gguf_get_kv_type(ctx, i);
const std::string type_name = const std::string type_name =
type == GGUF_TYPE_ARRAY type == GGUF_TYPE_ARRAY || type == GGUF_TYPE_OBJ
? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(ctx, i)), gguf_get_arr_n(ctx, i)) ? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(ctx, i)), gguf_get_arr_n(ctx, i))
: gguf_type_name(type); : gguf_type_name(type);
std::string value = gguf_kv_to_str(ctx, i); char * v = gguf_kv_to_c_str(ctx, i, name);
std::string value = v;
const size_t MAX_VALUE_LEN = 40; const size_t MAX_VALUE_LEN = 40;
if (value.size() > MAX_VALUE_LEN) { if (value.size() > MAX_VALUE_LEN) {
value = format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str()); value = format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str());

70
ggml.c
View file

@ -19284,8 +19284,9 @@ static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_INT64] = sizeof(int64_t), [GGUF_TYPE_INT64] = sizeof(int64_t),
[GGUF_TYPE_FLOAT64] = sizeof(double), [GGUF_TYPE_FLOAT64] = sizeof(double),
[GGUF_TYPE_ARRAY] = 0, // undefined [GGUF_TYPE_ARRAY] = 0, // undefined
[GGUF_TYPE_OBJ] = 0, // undefined
}; };
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13"); static_assert(GGUF_TYPE_COUNT == 14, "GGUF_TYPE_COUNT != 14");
static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = { static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_UINT8] = "u8", [GGUF_TYPE_UINT8] = "u8",
@ -19301,8 +19302,9 @@ static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_UINT64] = "u64", [GGUF_TYPE_UINT64] = "u64",
[GGUF_TYPE_INT64] = "i64", [GGUF_TYPE_INT64] = "i64",
[GGUF_TYPE_FLOAT64] = "f64", [GGUF_TYPE_FLOAT64] = "f64",
[GGUF_TYPE_OBJ] = "obj",
}; };
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13"); static_assert(GGUF_TYPE_COUNT == 14, "GGUF_TYPE_COUNT != 14");
union gguf_value { union gguf_value {
uint8_t uint8; uint8_t uint8;
@ -19525,6 +19527,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break; case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break; case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break; case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
case GGUF_TYPE_OBJ:
case GGUF_TYPE_ARRAY: case GGUF_TYPE_ARRAY:
{ {
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset); ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
@ -19571,7 +19574,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset); ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
} }
} break; } break;
case GGUF_TYPE_ARRAY: case GGUF_TYPE_OBJ:
case GGUF_TYPE_ARRAY: break;
default: GGML_ASSERT(false && "invalid type"); break; default: GGML_ASSERT(false && "invalid type"); break;
} }
} break; } break;
@ -19778,7 +19782,7 @@ void gguf_free(struct gguf_context * ctx) {
} }
} }
if (kv->type == GGUF_TYPE_ARRAY) { if (kv->type == GGUF_TYPE_ARRAY || kv->type == GGUF_TYPE_OBJ) {
if (kv->value.arr.data) { if (kv->value.arr.data) {
if (kv->value.arr.type == GGUF_TYPE_STRING) { if (kv->value.arr.type == GGUF_TYPE_STRING) {
for (uint64_t j = 0; j < kv->value.arr.n; ++j) { for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
@ -19863,7 +19867,7 @@ enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int key_id) {
enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id) { enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY); GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY || ctx->kv[key_id].type == GGUF_TYPE_OBJ);
return ctx->kv[key_id].value.arr.type; return ctx->kv[key_id].value.arr.type;
} }
@ -19875,7 +19879,7 @@ const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id) {
const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) { const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY); GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY || ctx->kv[key_id].type == GGUF_TYPE_OBJ);
struct gguf_kv * kv = &ctx->kv[key_id]; struct gguf_kv * kv = &ctx->kv[key_id];
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i]; struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
return str->data; return str->data;
@ -19883,7 +19887,7 @@ const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i
int gguf_get_arr_n(const struct gguf_context * ctx, int key_id) { int gguf_get_arr_n(const struct gguf_context * ctx, int key_id) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY); GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY || ctx->kv[key_id].type == GGUF_TYPE_OBJ);
return ctx->kv[key_id].value.arr.n; return ctx->kv[key_id].value.arr.n;
} }
@ -19962,6 +19966,7 @@ const char * gguf_get_val_str(const struct gguf_context * ctx, int key_id) {
const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id) { const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_ARRAY); GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_ARRAY);
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_OBJ);
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_STRING); GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_STRING);
return &ctx->kv[key_id].value; return &ctx->kv[key_id].value;
} }
@ -20106,6 +20111,10 @@ void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_ty
ctx->kv[idx].type = GGUF_TYPE_ARRAY; ctx->kv[idx].type = GGUF_TYPE_ARRAY;
ctx->kv[idx].value.arr.type = type; ctx->kv[idx].value.arr.type = type;
ctx->kv[idx].value.arr.n = n; ctx->kv[idx].value.arr.n = n;
if (data == NULL) {
ctx->kv[idx].value.arr.data = NULL;
return;
}
ctx->kv[idx].value.arr.data = GGML_MALLOC(n*gguf_type_size(type)); ctx->kv[idx].value.arr.data = GGML_MALLOC(n*gguf_type_size(type));
memcpy(ctx->kv[idx].value.arr.data, data, n*gguf_type_size(type)); memcpy(ctx->kv[idx].value.arr.data, data, n*gguf_type_size(type));
} }
@ -20124,6 +20133,38 @@ void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char **
} }
} }
void gguf_set_arr_obj(struct gguf_context * ctx, const char * key, int n) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
ctx->kv[idx].value.arr.type = GGUF_TYPE_OBJ;
ctx->kv[idx].value.arr.n = n;
ctx->kv[idx].value.arr.data = NULL;
}
void gguf_set_arr_arr(struct gguf_context * ctx, const char * key, int n) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
ctx->kv[idx].value.arr.type = GGUF_TYPE_ARRAY;
ctx->kv[idx].value.arr.n = n;
ctx->kv[idx].value.arr.data = NULL;
}
void gguf_set_obj_str(struct gguf_context * ctx, const char * key, const char ** data, int n) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_OBJ;
ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING;
ctx->kv[idx].value.arr.n = n;
ctx->kv[idx].value.arr.data = GGML_MALLOC(n*sizeof(struct gguf_str));
for (int i = 0; i < n; i++) {
struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
str->n = strlen(data[i]);
str->data = strdup(data[i]);
}
}
// set or add KV pairs from another context // set or add KV pairs from another context
void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) { void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
for (uint32_t i = 0; i < src->header.n_kv; i++) { for (uint32_t i = 0; i < src->header.n_kv; i++) {
@ -20140,6 +20181,15 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break; case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break;
case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break; case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break;
case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break; case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
case GGUF_TYPE_OBJ:
{
const char ** data = malloc(src->kv[i].value.arr.n*sizeof(char *));
for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
}
gguf_set_obj_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
free((void *)data);
} break;
case GGUF_TYPE_ARRAY: case GGUF_TYPE_ARRAY:
{ {
if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) { if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) {
@ -20149,8 +20199,6 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
} }
gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n); gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
GGML_FREE((void *)data); GGML_FREE((void *)data);
} else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
GGML_ASSERT(false && "nested arrays not supported");
} else { } else {
gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n); gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
} }
@ -20304,6 +20352,7 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break; case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break; case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break; case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break;
case GGUF_TYPE_OBJ:
case GGUF_TYPE_ARRAY: case GGUF_TYPE_ARRAY:
{ {
gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type)); gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type));
@ -20330,7 +20379,8 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
gguf_bwrite_str(buf, &((struct gguf_str *) kv->value.arr.data)[j]); gguf_bwrite_str(buf, &((struct gguf_str *) kv->value.arr.data)[j]);
} }
} break; } break;
case GGUF_TYPE_ARRAY: case GGUF_TYPE_OBJ:
case GGUF_TYPE_ARRAY: break;
default: GGML_ASSERT(false && "invalid type"); break; default: GGML_ASSERT(false && "invalid type"); break;
} }
} break; } break;

4
ggml.h
View file

@ -2141,6 +2141,7 @@ extern "C" {
GGUF_TYPE_UINT64 = 10, GGUF_TYPE_UINT64 = 10,
GGUF_TYPE_INT64 = 11, GGUF_TYPE_INT64 = 11,
GGUF_TYPE_FLOAT64 = 12, GGUF_TYPE_FLOAT64 = 12,
GGUF_TYPE_OBJ = 13,
GGUF_TYPE_COUNT, // marks the end of the enum GGUF_TYPE_COUNT, // marks the end of the enum
}; };
@ -2212,6 +2213,9 @@ extern "C" {
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val); GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n); GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n); GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
GGML_API void gguf_set_arr_obj (struct gguf_context * ctx, const char * key, int n);
GGML_API void gguf_set_arr_arr (struct gguf_context * ctx, const char * key, int n);
GGML_API void gguf_set_obj_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
// set or add KV pairs from another context // set or add KV pairs from another context
GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src); GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);

View file

@ -3,6 +3,7 @@ from __future__ import annotations
import sys import sys
from enum import Enum, IntEnum, auto from enum import Enum, IntEnum, auto
from typing import Any from typing import Any
import numpy as np
# #
# constants # constants
@ -550,6 +551,64 @@ class GGUFValueType(IntEnum):
UINT64 = 10 UINT64 = 10
INT64 = 11 INT64 = 11
FLOAT64 = 12 FLOAT64 = 12
OBJ = 13
@staticmethod
def get_type_ex(val: Any) -> GGUFValueType:
if isinstance(val, (str, bytes, bytearray)):
return GGUFValueType.STRING
elif isinstance(val, list):
return GGUFValueType.ARRAY
elif isinstance(val, np.float32):
return GGUFValueType.FLOAT32
elif isinstance(val, np.float64):
return GGUFValueType.FLOAT64
elif isinstance(val, float):
return GGUFValueType.FLOAT32
elif isinstance(val, bool):
return GGUFValueType.BOOL
elif isinstance(val, np.uint8):
return GGUFValueType.UINT8
elif isinstance(val, np.uint16):
return GGUFValueType.UINT16
elif isinstance(val, np.uint32):
return GGUFValueType.UINT32
elif isinstance(val, np.uint64):
return GGUFValueType.UINT64
elif isinstance(val, np.int8):
return GGUFValueType.INT8
elif isinstance(val, np.int16):
return GGUFValueType.INT16
elif isinstance(val, np.int32):
return GGUFValueType.INT32
elif isinstance(val, np.int64):
return GGUFValueType.INT64
elif isinstance(val, int):
if val >=0 and val <= np.iinfo(np.uint8).max:
return GGUFValueType.UINT8
elif val >=0 and val <= np.iinfo(np.uint16).max:
return GGUFValueType.UINT16
elif val >=0 and val <= np.iinfo(np.uint32).max:
return GGUFValueType.UINT32
elif val >=0 and val <= np.iinfo(np.uint64).max:
return GGUFValueType.UINT64
elif val >=np.iinfo(np.int8).min and val <= np.iinfo(np.int8).max:
return GGUFValueType.INT8
elif val >=np.iinfo(np.int16).min and val <= np.iinfo(np.int16).max:
return GGUFValueType.INT16
elif val >=np.iinfo(np.int32).min and val <= np.iinfo(np.int32).max:
return GGUFValueType.INT32
elif val >=np.iinfo(np.int64).min and val <= np.iinfo(np.int64).max:
return GGUFValueType.INT64
else:
print("The integer exceed limit:", val)
sys.exit()
elif isinstance(val, dict):
return GGUFValueType.OBJ
# TODO: need help with 64-bit types in Python
else:
print("Unknown type:", type(val))
sys.exit()
@staticmethod @staticmethod
def get_type(val: Any) -> GGUFValueType: def get_type(val: Any) -> GGUFValueType:

View file

@ -49,6 +49,34 @@ class ReaderField(NamedTuple):
types: list[GGUFValueType] = [] types: list[GGUFValueType] = []
def get(self):
result = None
type = self.types[0]
itype = None
if type == GGUFValueType.ARRAY or type == GGUFValueType.OBJ:
itype = self.types[-1]
if itype == GGUFValueType.STRING:
result = [str(bytes(self.parts[idx]), encoding="utf-8") for idx in self.data]
elif itype == GGUFValueType.OBJ or itype == GGUFValueType.ARRAY:
count=self.parts[-1]
result = count
else:
result = [pv for idx in self.data for pv in self.parts[idx].tolist()]
elif type == GGUFValueType.STRING:
result = str(bytes(self.parts[-1]), encoding="utf-8")
else:
result = self.parts[-1].tolist()[0]
return result
def getType(self):
type = self.types[0]
if type == GGUFValueType.ARRAY:
itype = self.types[-1]
return type, itype
else:
return type
class ReaderTensor(NamedTuple): class ReaderTensor(NamedTuple):
name: str name: str
@ -165,13 +193,16 @@ class GGUFReader:
val = self._get(offs, nptype) val = self._get(offs, nptype)
return int(val.nbytes), [val], [0], types return int(val.nbytes), [val], [0], types
# Handle arrays. # Handle arrays.
if gtype == GGUFValueType.ARRAY: if gtype == GGUFValueType.ARRAY or gtype == GGUFValueType.OBJ:
raw_itype = self._get(offs, np.uint32) raw_itype = self._get(offs, np.uint32)
offs += int(raw_itype.nbytes) offs += int(raw_itype.nbytes)
alen = self._get(offs, np.uint64) alen = self._get(offs, np.uint64)
offs += int(alen.nbytes) offs += int(alen.nbytes)
aparts: list[npt.NDArray[Any]] = [raw_itype, alen] aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
data_idxs: list[int] = [] data_idxs: list[int] = []
if raw_itype[0] == GGUFValueType.OBJ or raw_itype[0] == GGUFValueType.ARRAY:
types += [raw_itype[0]]
return offs - orig_offs, aparts, data_idxs, types
for idx in range(alen[0]): for idx in range(alen[0]):
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0]) curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
if idx == 0: if idx == 0:
@ -212,11 +243,12 @@ class GGUFReader:
offs += int(raw_kv_type.nbytes) offs += int(raw_kv_type.nbytes)
parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type] parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
idxs_offs = len(parts) idxs_offs = len(parts)
field_name = str(bytes(kv_kdata), encoding = 'utf-8')
field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0]) field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0])
parts += field_parts parts += field_parts
self._push_field(ReaderField( self._push_field(ReaderField(
orig_offs, orig_offs,
str(bytes(kv_kdata), encoding = 'utf-8'), field_name,
parts, parts,
[idx + idxs_offs for idx in field_idxs], [idx + idxs_offs for idx in field_idxs],
field_types, field_types,

View file

@ -158,6 +158,75 @@ class GGUFWriter:
self.add_key(key) self.add_key(key)
self.add_val(val, GGUFValueType.ARRAY) self.add_val(val, GGUFValueType.ARRAY)
def add_array_ex(self, key: str, val: Sequence[Any]) -> None:
if not isinstance(val, Sequence):
raise ValueError("Value must be a sequence for array type")
self.add_key(key)
ltype = GGUFValueType.get_type_ex(val[0])
if not all(GGUFValueType.get_type_ex(i) is ltype for i in val[1:]):
ltype = GGUFValueType.OBJ
if ltype == GGUFValueType.OBJ or ltype == GGUFValueType.ARRAY:
self.kv_data += self._pack("I", GGUFValueType.ARRAY)
self.kv_data_count += 1
self.kv_data += self._pack("I", ltype)
self.kv_data += self._pack("Q", len(val))
for i, item in enumerate(val):
if key[0] != '.':
key = "." + key
self.add_kv(key + "[" + str(i) + "]", item)
else:
self.add_val(val, GGUFValueType.ARRAY)
def add_kv(self, key: str, val: Any) -> None:
vtype=GGUFValueType.get_type_ex(val)
if vtype == GGUFValueType.OBJ:
self.add_dict(key, val)
elif vtype == GGUFValueType.ARRAY:
self.add_array_ex(key, val)
elif vtype == GGUFValueType.STRING:
self.add_string(key, val)
elif vtype == GGUFValueType.BOOL:
self.add_bool(key, val)
elif vtype == GGUFValueType.INT8:
self.add_int8(key, val)
elif vtype == GGUFValueType.INT16:
self.add_int16(key, val)
elif vtype == GGUFValueType.INT32:
self.add_int32(key, val)
elif vtype == GGUFValueType.INT64:
self.add_int64(key, val)
elif vtype == GGUFValueType.UINT8:
self.add_uint8(key, val)
elif vtype == GGUFValueType.UINT16:
self.add_uint16(key, val)
elif vtype == GGUFValueType.UINT32:
self.add_uint32(key, val)
elif vtype == GGUFValueType.UINT64:
self.add_uint64(key, val)
elif vtype == GGUFValueType.FLOAT32:
self.add_float32(key, val)
elif vtype == GGUFValueType.FLOAT64:
self.add_float64(key, val)
else:
raise ValueError(f"Unsupported type: {type(val)}")
def add_dict(self, key: str, val: dict, excludes: Sequence[str] = []) -> None:
if not isinstance(val, dict):
raise ValueError("Value must be a dict type")
self.add_key(key)
self.add_val(val, GGUFValueType.OBJ)
for k, v in val.items():
if k in excludes:
continue
real_key = key + "." + k
# "/" means referencing an existing key
if k[0] != "/":
if real_key[0] != '.':
real_key = "." + real_key
self.add_kv(real_key, v)
def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool = True) -> None: def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool = True) -> None:
if vtype is None: if vtype is None:
vtype = GGUFValueType.get_type(val) vtype = GGUFValueType.get_type(val)
@ -181,6 +250,8 @@ class GGUFWriter:
self.kv_data += self._pack("Q", len(val)) self.kv_data += self._pack("Q", len(val))
for item in val: for item in val:
self.add_val(item, add_vtype=False) self.add_val(item, add_vtype=False)
elif vtype == GGUFValueType.OBJ and isinstance(val, dict) and val:
self.add_val(list(val.keys()), GGUFValueType.ARRAY, False)
else: else:
raise ValueError("Invalid GGUF metadata value type or value") raise ValueError("Invalid GGUF metadata value type or value")

View file

@ -0,0 +1,34 @@
import sys
from pathlib import Path
import numpy as np
import unittest
# Necessary to load the local gguf package
sys.path.insert(0, str(Path(__file__).parent.parent))
from gguf.constants import GGUFValueType # noqa: E402
class TestGGUFValueType(unittest.TestCase):
def test_get_type(self):
self.assertEqual(GGUFValueType.get_type("test"), GGUFValueType.STRING)
self.assertEqual(GGUFValueType.get_type([1, 2, 3]), GGUFValueType.ARRAY)
self.assertEqual(GGUFValueType.get_type(1.0), GGUFValueType.FLOAT32)
self.assertEqual(GGUFValueType.get_type(True), GGUFValueType.BOOL)
self.assertEqual(GGUFValueType.get_type(b"test"), GGUFValueType.STRING)
self.assertEqual(GGUFValueType.get_type(np.uint8(1)), GGUFValueType.UINT8)
self.assertEqual(GGUFValueType.get_type(np.uint16(1)), GGUFValueType.UINT16)
self.assertEqual(GGUFValueType.get_type(np.uint32(1)), GGUFValueType.UINT32)
self.assertEqual(GGUFValueType.get_type(np.uint64(1)), GGUFValueType.UINT64)
self.assertEqual(GGUFValueType.get_type(np.int8(-1)), GGUFValueType.INT8)
self.assertEqual(GGUFValueType.get_type(np.int16(-1)), GGUFValueType.INT16)
self.assertEqual(GGUFValueType.get_type(np.int32(-1)), GGUFValueType.INT32)
self.assertEqual(GGUFValueType.get_type(np.int64(-1)), GGUFValueType.INT64)
self.assertEqual(GGUFValueType.get_type(np.float32(1.0)), GGUFValueType.FLOAT32)
self.assertEqual(GGUFValueType.get_type(np.float64(1.0)), GGUFValueType.FLOAT64)
self.assertEqual(GGUFValueType.get_type({"k": 12}), GGUFValueType.OBJ)
if __name__ == '__main__':
unittest.main()

View file

@ -1,7 +1,133 @@
import gguf # noqa: F401 import os
import sys
from pathlib import Path
import numpy as np
import unittest
# TODO: add tests # Necessary to load the local gguf package
sys.path.insert(0, str(Path(__file__).parent.parent))
from gguf import GGUFWriter, GGUFReader, GGUFValueType # noqa: E402
model_file = os.path.join(Path(__file__).parent.parent.parent, "models", "test_writer.gguf")
def test_write_gguf() -> None: class TestGGUFReaderWriter(unittest.TestCase):
pass
@classmethod
def setUpClass(cls):
gguf_writer = GGUFWriter(model_file, "llama")
# gguf_writer.add_architecture()
gguf_writer.add_block_count(12)
gguf_writer.add_uint32("answer", 42) # Write a 32-bit integer
gguf_writer.add_float32("answer_in_float", 42.0) # Write a 32-bit float
gguf_writer.add_kv("uint8", 1)
gguf_writer.add_kv("nint8", np.int8(1))
gguf_writer.add_dict("dict1", {"key1": 2, "key2": "hi", "obj": {"k": 1}})
gguf_writer.add_array_ex("oArray", [{"k": 4, "o": {"o1": 6}}, {"k": 9}])
gguf_writer.add_array_ex("cArray", [3, "hi", [1, 2]])
gguf_writer.add_array_ex("arrayInArray", [[2, 3, 4], [5, 7, 8]])
gguf_writer.add_kv("tokenizer.ggml.bos_token_id", "bos")
gguf_writer.add_kv("tokenizer.ggml.add_bos_token", True)
gguf_writer.add_dict("tokenizer_config", {
"/tokenizer.ggml.bos_token_id:bos_token": None, "/tokenizer.ggml.add_bos_token": None})
gguf_writer.add_array("oldArray", [1, 2, 3])
gguf_writer.add_custom_alignment(64)
tensor1 = np.ones((32,), dtype=np.float32) * 100.0
tensor2 = np.ones((64,), dtype=np.float32) * 101.0
tensor3 = np.ones((96,), dtype=np.float32) * 102.0
gguf_writer.add_tensor("tensor1", tensor1)
gguf_writer.add_tensor("tensor2", tensor2)
gguf_writer.add_tensor("tensor3", tensor3)
gguf_writer.write_header_to_file()
gguf_writer.write_kv_data_to_file()
gguf_writer.write_tensors_to_file()
gguf_writer.close()
def test_rw(self) -> None:
# test compatibility
gguf_reader = GGUFReader(model_file)
self.assertEqual(gguf_reader.alignment, 64)
v = gguf_reader.get_field("oldArray")
self.assertIsNotNone(v)
type, itype = v.getType()
self.assertEqual(type, GGUFValueType.ARRAY)
self.assertEqual(itype, GGUFValueType.INT32)
self.assertListEqual(v.get(), [1,2,3])
def test_rw_ex(self) -> None:
gguf_reader = GGUFReader(model_file)
self.assertEqual(gguf_reader.alignment, 64)
v = gguf_reader.get_field("uint8")
self.assertEqual(v.get(), 1)
self.assertEqual(v.types[0], GGUFValueType.UINT8)
v = gguf_reader.get_field("nint8")
self.assertEqual(v.get(), 1)
self.assertEqual(v.types[0], GGUFValueType.INT8)
v = gguf_reader.get_field("dict1")
self.assertIsNotNone(v)
self.assertListEqual(v.get(), ['key1', 'key2', 'obj'])
v = gguf_reader.get_field(".dict1.key1")
self.assertEqual(v.get(), 2)
v = gguf_reader.get_field(".dict1.key2")
self.assertEqual(v.get(), "hi")
v = gguf_reader.get_field(".dict1.obj")
self.assertListEqual(v.get(), ['k'])
v = gguf_reader.get_field(".dict1.obj.k")
self.assertEqual(v.get(), 1)
v = gguf_reader.get_field("oArray")
self.assertIsNotNone(v)
count = v.get()
self.assertEqual(count, 2)
type, itype = v.getType()
self.assertEqual(type, GGUFValueType.ARRAY)
self.assertEqual(itype, GGUFValueType.OBJ)
v = gguf_reader.get_field(".oArray[0].k")
self.assertIsNotNone(v)
self.assertEqual(v.get(), 4)
v = gguf_reader.get_field(".oArray[1].k")
self.assertEqual(v.get(), 9)
v = gguf_reader.get_field("cArray")
self.assertIsNotNone(v)
count = v.get()
self.assertEqual(count, 3)
type, itype = v.getType()
self.assertEqual(type, GGUFValueType.ARRAY)
self.assertEqual(itype, GGUFValueType.OBJ)
v = gguf_reader.get_field(".cArray[0]")
self.assertEqual(v.get(), 3)
v = gguf_reader.get_field(".cArray[1]")
self.assertEqual(v.get(), "hi")
v = gguf_reader.get_field(".cArray[2]")
self.assertListEqual(v.get(), [1, 2])
v = gguf_reader.get_field("arrayInArray")
self.assertIsNotNone(v)
count = v.get()
self.assertEqual(count, 2)
type, itype = v.getType()
self.assertEqual(type, GGUFValueType.ARRAY)
self.assertEqual(itype, GGUFValueType.ARRAY)
v = gguf_reader.get_field(".arrayInArray[0]")
self.assertListEqual(v.get(), [2, 3, 4])
v = gguf_reader.get_field(".arrayInArray[1]")
self.assertListEqual(v.get(), [5, 7, 8])
v = gguf_reader.get_field("tokenizer.ggml.bos_token_id")
self.assertEqual(v.get(), "bos")
v = gguf_reader.get_field("tokenizer.ggml.add_bos_token")
self.assertEqual(v.get(), True)
v = gguf_reader.get_field("tokenizer_config")
self.assertIsNotNone(v)
self.assertListEqual(v.get(), ["/tokenizer.ggml.bos_token_id:bos_token", "/tokenizer.ggml.add_bos_token"])
if __name__ == '__main__':
unittest.main()

105
llama.cpp
View file

@ -780,28 +780,87 @@ static std::string gguf_data_to_str(enum gguf_type type, const void * data, int
} }
} }
static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) { template< class CharT >
static std::string quoted_str(const CharT* s, const std::string delim="\"", const std::string escape="\\") {
std::string val = s;
replace_all(val, escape, escape + escape);
replace_all(val, delim, escape + delim);
val = delim + val + delim;
return val;
}
static void gguf_kv_to_stream(const struct gguf_context * ctx_gguf, int i, std::ostringstream &ss, const std::string parent_name = "") {
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i); const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
switch (type) { switch (type) {
case GGUF_TYPE_STRING: case GGUF_TYPE_STRING:
return gguf_get_val_str(ctx_gguf, i); {
ss << quoted_str(gguf_get_val_str(ctx_gguf, i));
} break;
case GGUF_TYPE_OBJ:
{
ss << "{";
int arr_n = gguf_get_arr_n(ctx_gguf, i);
for (int j = 0; j < arr_n; j++) {
std::string subkey_name = gguf_get_arr_str(ctx_gguf, i, j);
std::string key;
if (!subkey_name.empty() && subkey_name.at(0) == '/') {
std::size_t ix = subkey_name.find(':');
if (ix != std::string::npos) {
key = subkey_name;
subkey_name = subkey_name.substr(ix+1);
key = key.substr(1, ix-1);
} else {
subkey_name = subkey_name.substr(1);
key = subkey_name;
ix = subkey_name.rfind('.');
if (ix != std::string::npos) {
subkey_name = subkey_name.substr(ix+1);
}
}
} else {
if (parent_name.empty()) {
key = subkey_name;
} else {
key = parent_name;
key.append(".");
key.append(subkey_name);
}
if (key.at(0) != '.') {key = "." + key;}
}
ss << quoted_str(subkey_name.c_str()) << ":";
int k_id = gguf_find_key(ctx_gguf, key.c_str());
if (k_id != -1) {
gguf_kv_to_stream(ctx_gguf, k_id, ss, key);
} else {
ss << "undefined";
}
if (j < arr_n - 1) {
ss << ", ";
}
}
ss << "}";
} break;
case GGUF_TYPE_ARRAY: case GGUF_TYPE_ARRAY:
{ {
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i); const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
int arr_n = gguf_get_arr_n(ctx_gguf, i); int arr_n = gguf_get_arr_n(ctx_gguf, i);
const void * data = gguf_get_arr_data(ctx_gguf, i); const void * data = gguf_get_arr_data(ctx_gguf, i);
std::stringstream ss;
ss << "["; ss << "[";
for (int j = 0; j < arr_n; j++) { for (int j = 0; j < arr_n; j++) {
if (arr_type == GGUF_TYPE_STRING) { if (arr_type == GGUF_TYPE_STRING) {
std::string val = gguf_get_arr_str(ctx_gguf, i, j); std::string val = quoted_str(gguf_get_arr_str(ctx_gguf, i, j));
// escape quotes
replace_all(val, "\\", "\\\\");
replace_all(val, "\"", "\\\"");
ss << '"' << val << '"'; ss << '"' << val << '"';
} else if (arr_type == GGUF_TYPE_ARRAY) { } else if (arr_type == GGUF_TYPE_OBJ || arr_type == GGUF_TYPE_ARRAY) {
ss << "???"; std::string s = "[" + std::to_string(j) + "]";
std::string key = parent_name.empty() ? s : parent_name + s;
if (key.at(0) != '.') {key = "." + key;}
int k_id = gguf_find_key(ctx_gguf, key.c_str());
if (k_id != -1) {
gguf_kv_to_stream(ctx_gguf, k_id, ss, key);
} else {
ss << "undefined";
}
} else { } else {
ss << gguf_data_to_str(arr_type, data, j); ss << gguf_data_to_str(arr_type, data, j);
} }
@ -810,13 +869,23 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
} }
} }
ss << "]"; ss << "]";
return ss.str(); } break;
}
default: default:
return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0); ss << gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0);
} }
} }
static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i, const char* parent_name = nullptr) {
std::ostringstream ss;
gguf_kv_to_stream(ctx_gguf, i, ss, parent_name == NULL ? gguf_get_key(ctx_gguf, i) : parent_name);
return ss.str();
}
char * gguf_kv_to_c_str(const struct gguf_context * ctx_gguf, int i, const char* parent_name = nullptr) {
std::string result = gguf_kv_to_str(ctx_gguf, i, parent_name);
return strdup(result.c_str());
}
// //
// ggml helpers // ggml helpers
// //
@ -2184,7 +2253,7 @@ namespace GGUFMeta {
static T get_kv(const gguf_context * ctx, const int k) { static T get_kv(const gguf_context * ctx, const int k) {
const enum gguf_type kt = gguf_get_kv_type(ctx, k); const enum gguf_type kt = gguf_get_kv_type(ctx, k);
if (kt != GKV::gt) { if (kt != GKV::gt && kt != GGUF_TYPE_OBJ && GKV::gt != GGUF_TYPE_ARRAY) {
throw std::runtime_error(format("key %s has wrong type %s but expected type %s", throw std::runtime_error(format("key %s has wrong type %s but expected type %s",
gguf_get_key(ctx, k), gguf_type_name(kt), gguf_type_name(GKV::gt))); gguf_get_key(ctx, k), gguf_type_name(kt), gguf_type_name(GKV::gt)));
} }
@ -2411,13 +2480,16 @@ struct llama_model_loader {
LLAMA_LOG_INFO("%s: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", __func__); LLAMA_LOG_INFO("%s: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", __func__);
for (int i = 0; i < n_kv; i++) { for (int i = 0; i < n_kv; i++) {
const char * name = gguf_get_key(ctx_gguf, i); const char * name = gguf_get_key(ctx_gguf, i);
// skip the subkeys.
if (name[0] == '.') { continue; }
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i); const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
const std::string type_name = const std::string type_name =
type == GGUF_TYPE_ARRAY type == GGUF_TYPE_ARRAY || type == GGUF_TYPE_OBJ
? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(ctx_gguf, i)), gguf_get_arr_n(ctx_gguf, i)) ? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(ctx_gguf, i)), gguf_get_arr_n(ctx_gguf, i))
: gguf_type_name(type); : gguf_type_name(type);
std::string value = gguf_kv_to_str(ctx_gguf, i); std::string value = gguf_kv_to_str(ctx_gguf, i, name);
const size_t MAX_VALUE_LEN = 40; const size_t MAX_VALUE_LEN = 40;
if (value.size() > MAX_VALUE_LEN) { if (value.size() > MAX_VALUE_LEN) {
value = format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str()); value = format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str());
@ -2426,6 +2498,7 @@ struct llama_model_loader {
LLAMA_LOG_INFO("%s: - kv %3d: %42s %-16s = %s\n", __func__, i, name, type_name.c_str(), value.c_str()); LLAMA_LOG_INFO("%s: - kv %3d: %42s %-16s = %s\n", __func__, i, name, type_name.c_str(), value.c_str());
} }
LLAMA_LOG_INFO("%s: Dumping metadata keys/values Done.\n", __func__);
// print type counts // print type counts
for (auto & kv : n_type) { for (auto & kv : n_type) {
@ -2796,7 +2869,7 @@ static void llm_load_hparams(
continue; continue;
} }
const char * name = gguf_get_key(ctx, i); const char * name = gguf_get_key(ctx, i);
const std::string value = gguf_kv_to_str(ctx, i); const std::string value = gguf_kv_to_str(ctx, i, name);
model.gguf_kv.emplace(name, value); model.gguf_kv.emplace(name, value);
} }

View file

@ -898,6 +898,8 @@ extern "C" {
LLAMA_API void llama_dump_timing_info_yaml(FILE * stream, const struct llama_context * ctx); LLAMA_API void llama_dump_timing_info_yaml(FILE * stream, const struct llama_context * ctx);
LLAMA_API char * gguf_kv_to_c_str(const struct gguf_context * ctx_gguf, int i, const char* parent_name);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

BIN
models/test_writer.gguf Normal file

Binary file not shown.

View file

@ -50,6 +50,9 @@ llama_test_executable (test-tokenizer-1-starcoder test-tokenizer-1-bpe.cp
llama_test_executable (test-tokenizer-1-gpt2 test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt2.gguf) llama_test_executable (test-tokenizer-1-gpt2 test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt2.gguf)
# llama_test_executable (test-tokenizer-1-bloom test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bloom.gguf) # BIG # llama_test_executable (test-tokenizer-1-bloom test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bloom.gguf) # BIG
llama_build_executable(test-gguf-meta.cpp)
llama_test_executable (test-gguf-meta test-gguf-meta.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/test_writer.gguf)
llama_build_and_test_executable(test-grammar-parser.cpp) llama_build_and_test_executable(test-grammar-parser.cpp)
llama_build_and_test_executable(test-llama-grammar.cpp) llama_build_and_test_executable(test-llama-grammar.cpp)
llama_build_and_test_executable(test-grad0.cpp) llama_build_and_test_executable(test-grad0.cpp)

83
tests/test-gguf-meta.cpp Normal file
View file

@ -0,0 +1,83 @@
#ifdef NDEBUG
#undef NDEBUG
#endif
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cassert>
#include "ggml.h"
#include "llama.h"
#include "get-model.h"
// run `python3 gguf-py/tests/test_gguf.py` to generate test_writer.gguf file.
int main(int argc, char ** argv)
{
char* fname = get_model_or_exit(argc, argv);
struct gguf_context * ctx_gguf = NULL;
struct ggml_context * ctx_meta = NULL;
struct gguf_init_params params = {
/*.no_alloc = */ true,
/*.ctx = */ &ctx_meta,
};
ctx_gguf = gguf_init_from_file(fname, params);
if (!ctx_gguf) {
fprintf(stderr, "%s: failed to load model from %s\n", __func__, fname);
return 1;
}
int n_kv = gguf_get_n_kv(ctx_gguf);
for (int i = 0; i < n_kv; i++) {
const char * name = gguf_get_key(ctx_gguf, i);
// skip the subkeys.
if (name[0] == '.') { continue; }
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
char * value = gguf_kv_to_c_str(ctx_gguf, i, name);
printf("key: %s, type: %s, value: %s\n", name, gguf_type_name(type), value);
free(value);
}
int k_id = gguf_find_key(ctx_gguf, "no_such_key");
assert(k_id == -1);
k_id = gguf_find_key(ctx_gguf, "tokenizer_config");
assert(k_id != -1);
const char * name = gguf_get_key(ctx_gguf, k_id);
assert(strcmp(name, "tokenizer_config") == 0);
enum gguf_type type = gguf_get_kv_type(ctx_gguf, k_id);
assert(type == GGUF_TYPE_OBJ);
char * value = gguf_kv_to_c_str(ctx_gguf, k_id, NULL);
assert(strcmp(value, "{\"bos_token\":\"bos\", \"add_bos_token\":true}") == 0);
free(value);
k_id = gguf_find_key(ctx_gguf, "dict1");
assert(k_id != -1);
value = gguf_kv_to_c_str(ctx_gguf, k_id, NULL);
assert(strcmp(value, "{\"key1\":2, \"key2\":\"hi\", \"obj\":{\"k\":1}}") == 0);
free(value);
k_id = gguf_find_key(ctx_gguf, "oArray");
assert(k_id != -1);
value = gguf_kv_to_c_str(ctx_gguf, k_id, NULL);
assert(strcmp(value, "[{\"k\":4, \"o\":{\"o1\":6}}, {\"k\":9}]") == 0);
free(value);
k_id = gguf_find_key(ctx_gguf, "cArray");
assert(k_id != -1);
value = gguf_kv_to_c_str(ctx_gguf, k_id, NULL);
assert(strcmp(value, "[3, \"hi\", [1, 2]]") == 0);
free(value);
k_id = gguf_find_key(ctx_gguf, "arrayInArray");
assert(k_id != -1);
type = gguf_get_kv_type(ctx_gguf, k_id);
assert(type == GGUF_TYPE_ARRAY);
value = gguf_kv_to_c_str(ctx_gguf, k_id, NULL);
assert(strcmp(value, "[[2, 3, 4], [5, 7, 8]]") == 0);
free(value);
printf("Done!\n");
}