feat: add new GGUFValueType.OBJ virtual type

The content of the OBJ type is actually a list of all key names of the object.

* Python
  * `gguf_writer.py`:
    * Added `def add_kv(self, key: str, val: Any) -> None`: Automatically determines the appropriate value type based on `val`.
    * Added `def add_dict(self, key: str, val: dict, excludes: Sequence[str] = []) -> None`: Adds object (dict) values, It will recursively add all subkeys.
    * Added `add_array_ex` to support the nested and mixed-type array.
  * `constants.py`:
    * Added `GGUFValueType.get_type_ex(val)`: Added support for numpy's integers and floating-point numbers, selecting the number of digits according to the size of the integer.
  * `gguf_reader.py`:
    * Added functionality to retrieve values from specific fields using `ReaderField.get()` method.
  * Unit test added
* CPP
  * `ggml`:
    * Added `GGUF_TYPE_OBJ` to the `gguf_type` enum type.
    * Use `gguf_get_arr_n` and `gguf_get_arr_str` to get the subKey names of `GGUF_TYPE_OBJ`.
    * Added `gguf_set_obj_str` function to set object subkey names
    * Added `gguf_set_arr_obj` function to set object array count
    * Added `gguf_set_arr_arr` function to set nested array count
  * `llama`:
    * Modified `gguf_kv_to_str`
    * Added `LLAMA_API char * gguf_kv_to_c_str` function to get the c_str value as JSON format.
      * Maybe this API should be moved into `ggml` as `gguf_get_val_json`. (问题是 ggml.c 用的是C语言,而这里大量用了C++的功能)
    * Added basic support to `GGUF_TYPE_OBJ` and nested array
  * Unit test added

feat: add basic support to GGUF_TYPE_OBJ on cpp
feat(gguf.py): add OBJ and mixed-type array supports to GGUF ARRAY
feat: add OBJ and mixed-type array supports to GGUF ARRAY(CPP)
feat: add nested array supported
feat: * Subkey name convention in OBJ types:
  * If the first letter of the subkey name is "/", it means referencing the full name of other keys.
  * If there is a ":" colon delimiter, it means that the string after the colon represents the subkey name in this object, otherwise the referencing subkey name is used.
feat: add LLAMA_API gguf_kv_to_c_str to llama.h
test: write test gguf file to tests folder directly(py)
test: add test-gguf-meta.cpp
feat: Key convention: "." indicates that the key is a subkey, not an independent key.
feat: add excludes argument to add_dict(gguf_write.py)
feat: add_array_ex to supports nested and mix-typed array, and keep the add_array to the same
fix(constant.py): rollback the get_type function and add the new get_type_ex
test: add test compatibility
fix: use GGML_MALLOC instead of malloc
This commit is contained in:
Riceball LEE 2024-01-26 16:32:12 +08:00
parent 906cff55c2
commit 95a492a8c5
No known key found for this signature in database
GPG key ID: 10F15E84852CB868
14 changed files with 576 additions and 89 deletions

View file

@ -854,3 +854,6 @@ tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o te
tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
tests/test-gguf-meta: tests/test-gguf-meta.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

View file

@ -6,6 +6,7 @@
#include "ggml.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "llama.h"
#ifdef GGML_USE_CUBLAS
#include "ggml-cuda.h"
@ -148,24 +149,6 @@ static std::string get_ftype(int ftype) {
return ggml_type_name(static_cast<ggml_type>(ftype));
}
static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) {
switch (type) {
case GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]);
case GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]);
case GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]);
case GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]);
case GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]);
case GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]);
case GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]);
case GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]);
case GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]);
case GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]);
case GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false";
default: return format("unknown type %d", type);
}
}
static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
std::string result;
for (size_t pos = 0; ; pos += search.length()) {
@ -180,43 +163,6 @@ static void replace_all(std::string & s, const std::string & search, const std::
s = std::move(result);
}
static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
switch (type) {
case GGUF_TYPE_STRING:
return gguf_get_val_str(ctx_gguf, i);
case GGUF_TYPE_ARRAY:
{
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
int arr_n = gguf_get_arr_n(ctx_gguf, i);
const void * data = gguf_get_arr_data(ctx_gguf, i);
std::stringstream ss;
ss << "[";
for (int j = 0; j < arr_n; j++) {
if (arr_type == GGUF_TYPE_STRING) {
std::string val = gguf_get_arr_str(ctx_gguf, i, j);
// escape quotes
replace_all(val, "\\", "\\\\");
replace_all(val, "\"", "\\\"");
ss << '"' << val << '"';
} else if (arr_type == GGUF_TYPE_ARRAY) {
ss << "???";
} else {
ss << gguf_data_to_str(arr_type, data, j);
}
if (j < arr_n - 1) {
ss << ", ";
}
}
ss << "]";
return ss.str();
}
default:
return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0);
}
}
static void print_tensor_info(const ggml_tensor* tensor, const char* prefix = "") {
size_t tensor_size = ggml_nbytes(tensor);
printf("%s: n_dims = %d, name = %s, tensor_size=%zu, shape:[%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "], type = %s\n",
@ -784,11 +730,12 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
const char * name = gguf_get_key(ctx, i);
const enum gguf_type type = gguf_get_kv_type(ctx, i);
const std::string type_name =
type == GGUF_TYPE_ARRAY
type == GGUF_TYPE_ARRAY || type == GGUF_TYPE_OBJ
? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(ctx, i)), gguf_get_arr_n(ctx, i))
: gguf_type_name(type);
std::string value = gguf_kv_to_str(ctx, i);
char * v = gguf_kv_to_c_str(ctx, i, name);
std::string value = v;
const size_t MAX_VALUE_LEN = 40;
if (value.size() > MAX_VALUE_LEN) {
value = format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str());

70
ggml.c
View file

@ -19284,8 +19284,9 @@ static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_INT64] = sizeof(int64_t),
[GGUF_TYPE_FLOAT64] = sizeof(double),
[GGUF_TYPE_ARRAY] = 0, // undefined
[GGUF_TYPE_OBJ] = 0, // undefined
};
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
static_assert(GGUF_TYPE_COUNT == 14, "GGUF_TYPE_COUNT != 14");
static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_UINT8] = "u8",
@ -19301,8 +19302,9 @@ static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_UINT64] = "u64",
[GGUF_TYPE_INT64] = "i64",
[GGUF_TYPE_FLOAT64] = "f64",
[GGUF_TYPE_OBJ] = "obj",
};
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
static_assert(GGUF_TYPE_COUNT == 14, "GGUF_TYPE_COUNT != 14");
union gguf_value {
uint8_t uint8;
@ -19525,6 +19527,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
case GGUF_TYPE_OBJ:
case GGUF_TYPE_ARRAY:
{
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
@ -19571,7 +19574,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
}
} break;
case GGUF_TYPE_ARRAY:
case GGUF_TYPE_OBJ:
case GGUF_TYPE_ARRAY: break;
default: GGML_ASSERT(false && "invalid type"); break;
}
} break;
@ -19778,7 +19782,7 @@ void gguf_free(struct gguf_context * ctx) {
}
}
if (kv->type == GGUF_TYPE_ARRAY) {
if (kv->type == GGUF_TYPE_ARRAY || kv->type == GGUF_TYPE_OBJ) {
if (kv->value.arr.data) {
if (kv->value.arr.type == GGUF_TYPE_STRING) {
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
@ -19863,7 +19867,7 @@ enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int key_id) {
enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY || ctx->kv[key_id].type == GGUF_TYPE_OBJ);
return ctx->kv[key_id].value.arr.type;
}
@ -19875,7 +19879,7 @@ const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id) {
const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY || ctx->kv[key_id].type == GGUF_TYPE_OBJ);
struct gguf_kv * kv = &ctx->kv[key_id];
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
return str->data;
@ -19883,7 +19887,7 @@ const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i
int gguf_get_arr_n(const struct gguf_context * ctx, int key_id) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY || ctx->kv[key_id].type == GGUF_TYPE_OBJ);
return ctx->kv[key_id].value.arr.n;
}
@ -19962,6 +19966,7 @@ const char * gguf_get_val_str(const struct gguf_context * ctx, int key_id) {
const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_ARRAY);
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_OBJ);
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_STRING);
return &ctx->kv[key_id].value;
}
@ -20106,6 +20111,10 @@ void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_ty
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
ctx->kv[idx].value.arr.type = type;
ctx->kv[idx].value.arr.n = n;
if (data == NULL) {
ctx->kv[idx].value.arr.data = NULL;
return;
}
ctx->kv[idx].value.arr.data = GGML_MALLOC(n*gguf_type_size(type));
memcpy(ctx->kv[idx].value.arr.data, data, n*gguf_type_size(type));
}
@ -20124,6 +20133,38 @@ void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char **
}
}
void gguf_set_arr_obj(struct gguf_context * ctx, const char * key, int n) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
ctx->kv[idx].value.arr.type = GGUF_TYPE_OBJ;
ctx->kv[idx].value.arr.n = n;
ctx->kv[idx].value.arr.data = NULL;
}
void gguf_set_arr_arr(struct gguf_context * ctx, const char * key, int n) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
ctx->kv[idx].value.arr.type = GGUF_TYPE_ARRAY;
ctx->kv[idx].value.arr.n = n;
ctx->kv[idx].value.arr.data = NULL;
}
void gguf_set_obj_str(struct gguf_context * ctx, const char * key, const char ** data, int n) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_OBJ;
ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING;
ctx->kv[idx].value.arr.n = n;
ctx->kv[idx].value.arr.data = GGML_MALLOC(n*sizeof(struct gguf_str));
for (int i = 0; i < n; i++) {
struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
str->n = strlen(data[i]);
str->data = strdup(data[i]);
}
}
// set or add KV pairs from another context
void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
for (uint32_t i = 0; i < src->header.n_kv; i++) {
@ -20140,6 +20181,15 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break;
case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break;
case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
case GGUF_TYPE_OBJ:
{
const char ** data = malloc(src->kv[i].value.arr.n*sizeof(char *));
for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
}
gguf_set_obj_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
free((void *)data);
} break;
case GGUF_TYPE_ARRAY:
{
if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) {
@ -20149,8 +20199,6 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
}
gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
GGML_FREE((void *)data);
} else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
GGML_ASSERT(false && "nested arrays not supported");
} else {
gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
}
@ -20304,6 +20352,7 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break;
case GGUF_TYPE_OBJ:
case GGUF_TYPE_ARRAY:
{
gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type));
@ -20330,7 +20379,8 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
gguf_bwrite_str(buf, &((struct gguf_str *) kv->value.arr.data)[j]);
}
} break;
case GGUF_TYPE_ARRAY:
case GGUF_TYPE_OBJ:
case GGUF_TYPE_ARRAY: break;
default: GGML_ASSERT(false && "invalid type"); break;
}
} break;

4
ggml.h
View file

@ -2141,6 +2141,7 @@ extern "C" {
GGUF_TYPE_UINT64 = 10,
GGUF_TYPE_INT64 = 11,
GGUF_TYPE_FLOAT64 = 12,
GGUF_TYPE_OBJ = 13,
GGUF_TYPE_COUNT, // marks the end of the enum
};
@ -2212,6 +2213,9 @@ extern "C" {
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
GGML_API void gguf_set_arr_obj (struct gguf_context * ctx, const char * key, int n);
GGML_API void gguf_set_arr_arr (struct gguf_context * ctx, const char * key, int n);
GGML_API void gguf_set_obj_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
// set or add KV pairs from another context
GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);

View file

@ -3,6 +3,7 @@ from __future__ import annotations
import sys
from enum import Enum, IntEnum, auto
from typing import Any
import numpy as np
#
# constants
@ -550,6 +551,64 @@ class GGUFValueType(IntEnum):
UINT64 = 10
INT64 = 11
FLOAT64 = 12
OBJ = 13
@staticmethod
def get_type_ex(val: Any) -> GGUFValueType:
if isinstance(val, (str, bytes, bytearray)):
return GGUFValueType.STRING
elif isinstance(val, list):
return GGUFValueType.ARRAY
elif isinstance(val, np.float32):
return GGUFValueType.FLOAT32
elif isinstance(val, np.float64):
return GGUFValueType.FLOAT64
elif isinstance(val, float):
return GGUFValueType.FLOAT32
elif isinstance(val, bool):
return GGUFValueType.BOOL
elif isinstance(val, np.uint8):
return GGUFValueType.UINT8
elif isinstance(val, np.uint16):
return GGUFValueType.UINT16
elif isinstance(val, np.uint32):
return GGUFValueType.UINT32
elif isinstance(val, np.uint64):
return GGUFValueType.UINT64
elif isinstance(val, np.int8):
return GGUFValueType.INT8
elif isinstance(val, np.int16):
return GGUFValueType.INT16
elif isinstance(val, np.int32):
return GGUFValueType.INT32
elif isinstance(val, np.int64):
return GGUFValueType.INT64
elif isinstance(val, int):
if val >=0 and val <= np.iinfo(np.uint8).max:
return GGUFValueType.UINT8
elif val >=0 and val <= np.iinfo(np.uint16).max:
return GGUFValueType.UINT16
elif val >=0 and val <= np.iinfo(np.uint32).max:
return GGUFValueType.UINT32
elif val >=0 and val <= np.iinfo(np.uint64).max:
return GGUFValueType.UINT64
elif val >=np.iinfo(np.int8).min and val <= np.iinfo(np.int8).max:
return GGUFValueType.INT8
elif val >=np.iinfo(np.int16).min and val <= np.iinfo(np.int16).max:
return GGUFValueType.INT16
elif val >=np.iinfo(np.int32).min and val <= np.iinfo(np.int32).max:
return GGUFValueType.INT32
elif val >=np.iinfo(np.int64).min and val <= np.iinfo(np.int64).max:
return GGUFValueType.INT64
else:
print("The integer exceed limit:", val)
sys.exit()
elif isinstance(val, dict):
return GGUFValueType.OBJ
# TODO: need help with 64-bit types in Python
else:
print("Unknown type:", type(val))
sys.exit()
@staticmethod
def get_type(val: Any) -> GGUFValueType:

View file

@ -49,6 +49,34 @@ class ReaderField(NamedTuple):
types: list[GGUFValueType] = []
def get(self):
result = None
type = self.types[0]
itype = None
if type == GGUFValueType.ARRAY or type == GGUFValueType.OBJ:
itype = self.types[-1]
if itype == GGUFValueType.STRING:
result = [str(bytes(self.parts[idx]), encoding="utf-8") for idx in self.data]
elif itype == GGUFValueType.OBJ or itype == GGUFValueType.ARRAY:
count=self.parts[-1]
result = count
else:
result = [pv for idx in self.data for pv in self.parts[idx].tolist()]
elif type == GGUFValueType.STRING:
result = str(bytes(self.parts[-1]), encoding="utf-8")
else:
result = self.parts[-1].tolist()[0]
return result
def getType(self):
type = self.types[0]
if type == GGUFValueType.ARRAY:
itype = self.types[-1]
return type, itype
else:
return type
class ReaderTensor(NamedTuple):
name: str
@ -165,13 +193,16 @@ class GGUFReader:
val = self._get(offs, nptype)
return int(val.nbytes), [val], [0], types
# Handle arrays.
if gtype == GGUFValueType.ARRAY:
if gtype == GGUFValueType.ARRAY or gtype == GGUFValueType.OBJ:
raw_itype = self._get(offs, np.uint32)
offs += int(raw_itype.nbytes)
alen = self._get(offs, np.uint64)
offs += int(alen.nbytes)
aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
data_idxs: list[int] = []
if raw_itype[0] == GGUFValueType.OBJ or raw_itype[0] == GGUFValueType.ARRAY:
types += [raw_itype[0]]
return offs - orig_offs, aparts, data_idxs, types
for idx in range(alen[0]):
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
if idx == 0:
@ -212,11 +243,12 @@ class GGUFReader:
offs += int(raw_kv_type.nbytes)
parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
idxs_offs = len(parts)
field_name = str(bytes(kv_kdata), encoding = 'utf-8')
field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0])
parts += field_parts
self._push_field(ReaderField(
orig_offs,
str(bytes(kv_kdata), encoding = 'utf-8'),
field_name,
parts,
[idx + idxs_offs for idx in field_idxs],
field_types,

View file

@ -158,6 +158,75 @@ class GGUFWriter:
self.add_key(key)
self.add_val(val, GGUFValueType.ARRAY)
def add_array_ex(self, key: str, val: Sequence[Any]) -> None:
if not isinstance(val, Sequence):
raise ValueError("Value must be a sequence for array type")
self.add_key(key)
ltype = GGUFValueType.get_type_ex(val[0])
if not all(GGUFValueType.get_type_ex(i) is ltype for i in val[1:]):
ltype = GGUFValueType.OBJ
if ltype == GGUFValueType.OBJ or ltype == GGUFValueType.ARRAY:
self.kv_data += self._pack("I", GGUFValueType.ARRAY)
self.kv_data_count += 1
self.kv_data += self._pack("I", ltype)
self.kv_data += self._pack("Q", len(val))
for i, item in enumerate(val):
if key[0] != '.':
key = "." + key
self.add_kv(key + "[" + str(i) + "]", item)
else:
self.add_val(val, GGUFValueType.ARRAY)
def add_kv(self, key: str, val: Any) -> None:
vtype=GGUFValueType.get_type_ex(val)
if vtype == GGUFValueType.OBJ:
self.add_dict(key, val)
elif vtype == GGUFValueType.ARRAY:
self.add_array_ex(key, val)
elif vtype == GGUFValueType.STRING:
self.add_string(key, val)
elif vtype == GGUFValueType.BOOL:
self.add_bool(key, val)
elif vtype == GGUFValueType.INT8:
self.add_int8(key, val)
elif vtype == GGUFValueType.INT16:
self.add_int16(key, val)
elif vtype == GGUFValueType.INT32:
self.add_int32(key, val)
elif vtype == GGUFValueType.INT64:
self.add_int64(key, val)
elif vtype == GGUFValueType.UINT8:
self.add_uint8(key, val)
elif vtype == GGUFValueType.UINT16:
self.add_uint16(key, val)
elif vtype == GGUFValueType.UINT32:
self.add_uint32(key, val)
elif vtype == GGUFValueType.UINT64:
self.add_uint64(key, val)
elif vtype == GGUFValueType.FLOAT32:
self.add_float32(key, val)
elif vtype == GGUFValueType.FLOAT64:
self.add_float64(key, val)
else:
raise ValueError(f"Unsupported type: {type(val)}")
def add_dict(self, key: str, val: dict, excludes: Sequence[str] = []) -> None:
if not isinstance(val, dict):
raise ValueError("Value must be a dict type")
self.add_key(key)
self.add_val(val, GGUFValueType.OBJ)
for k, v in val.items():
if k in excludes:
continue
real_key = key + "." + k
# "/" means referencing an existing key
if k[0] != "/":
if real_key[0] != '.':
real_key = "." + real_key
self.add_kv(real_key, v)
def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool = True) -> None:
if vtype is None:
vtype = GGUFValueType.get_type(val)
@ -181,6 +250,8 @@ class GGUFWriter:
self.kv_data += self._pack("Q", len(val))
for item in val:
self.add_val(item, add_vtype=False)
elif vtype == GGUFValueType.OBJ and isinstance(val, dict) and val:
self.add_val(list(val.keys()), GGUFValueType.ARRAY, False)
else:
raise ValueError("Invalid GGUF metadata value type or value")

View file

@ -0,0 +1,34 @@
import sys
from pathlib import Path
import numpy as np
import unittest
# Necessary to load the local gguf package
sys.path.insert(0, str(Path(__file__).parent.parent))
from gguf.constants import GGUFValueType # noqa: E402
class TestGGUFValueType(unittest.TestCase):
def test_get_type(self):
self.assertEqual(GGUFValueType.get_type("test"), GGUFValueType.STRING)
self.assertEqual(GGUFValueType.get_type([1, 2, 3]), GGUFValueType.ARRAY)
self.assertEqual(GGUFValueType.get_type(1.0), GGUFValueType.FLOAT32)
self.assertEqual(GGUFValueType.get_type(True), GGUFValueType.BOOL)
self.assertEqual(GGUFValueType.get_type(b"test"), GGUFValueType.STRING)
self.assertEqual(GGUFValueType.get_type(np.uint8(1)), GGUFValueType.UINT8)
self.assertEqual(GGUFValueType.get_type(np.uint16(1)), GGUFValueType.UINT16)
self.assertEqual(GGUFValueType.get_type(np.uint32(1)), GGUFValueType.UINT32)
self.assertEqual(GGUFValueType.get_type(np.uint64(1)), GGUFValueType.UINT64)
self.assertEqual(GGUFValueType.get_type(np.int8(-1)), GGUFValueType.INT8)
self.assertEqual(GGUFValueType.get_type(np.int16(-1)), GGUFValueType.INT16)
self.assertEqual(GGUFValueType.get_type(np.int32(-1)), GGUFValueType.INT32)
self.assertEqual(GGUFValueType.get_type(np.int64(-1)), GGUFValueType.INT64)
self.assertEqual(GGUFValueType.get_type(np.float32(1.0)), GGUFValueType.FLOAT32)
self.assertEqual(GGUFValueType.get_type(np.float64(1.0)), GGUFValueType.FLOAT64)
self.assertEqual(GGUFValueType.get_type({"k": 12}), GGUFValueType.OBJ)
if __name__ == '__main__':
unittest.main()

View file

@ -1,7 +1,133 @@
import gguf # noqa: F401
import os
import sys
from pathlib import Path
import numpy as np
import unittest
# TODO: add tests
# Necessary to load the local gguf package
sys.path.insert(0, str(Path(__file__).parent.parent))
from gguf import GGUFWriter, GGUFReader, GGUFValueType # noqa: E402
model_file = os.path.join(Path(__file__).parent.parent.parent, "models", "test_writer.gguf")
def test_write_gguf() -> None:
pass
class TestGGUFReaderWriter(unittest.TestCase):
@classmethod
def setUpClass(cls):
gguf_writer = GGUFWriter(model_file, "llama")
# gguf_writer.add_architecture()
gguf_writer.add_block_count(12)
gguf_writer.add_uint32("answer", 42) # Write a 32-bit integer
gguf_writer.add_float32("answer_in_float", 42.0) # Write a 32-bit float
gguf_writer.add_kv("uint8", 1)
gguf_writer.add_kv("nint8", np.int8(1))
gguf_writer.add_dict("dict1", {"key1": 2, "key2": "hi", "obj": {"k": 1}})
gguf_writer.add_array_ex("oArray", [{"k": 4, "o": {"o1": 6}}, {"k": 9}])
gguf_writer.add_array_ex("cArray", [3, "hi", [1, 2]])
gguf_writer.add_array_ex("arrayInArray", [[2, 3, 4], [5, 7, 8]])
gguf_writer.add_kv("tokenizer.ggml.bos_token_id", "bos")
gguf_writer.add_kv("tokenizer.ggml.add_bos_token", True)
gguf_writer.add_dict("tokenizer_config", {
"/tokenizer.ggml.bos_token_id:bos_token": None, "/tokenizer.ggml.add_bos_token": None})
gguf_writer.add_array("oldArray", [1, 2, 3])
gguf_writer.add_custom_alignment(64)
tensor1 = np.ones((32,), dtype=np.float32) * 100.0
tensor2 = np.ones((64,), dtype=np.float32) * 101.0
tensor3 = np.ones((96,), dtype=np.float32) * 102.0
gguf_writer.add_tensor("tensor1", tensor1)
gguf_writer.add_tensor("tensor2", tensor2)
gguf_writer.add_tensor("tensor3", tensor3)
gguf_writer.write_header_to_file()
gguf_writer.write_kv_data_to_file()
gguf_writer.write_tensors_to_file()
gguf_writer.close()
def test_rw(self) -> None:
# test compatibility
gguf_reader = GGUFReader(model_file)
self.assertEqual(gguf_reader.alignment, 64)
v = gguf_reader.get_field("oldArray")
self.assertIsNotNone(v)
type, itype = v.getType()
self.assertEqual(type, GGUFValueType.ARRAY)
self.assertEqual(itype, GGUFValueType.INT32)
self.assertListEqual(v.get(), [1,2,3])
def test_rw_ex(self) -> None:
gguf_reader = GGUFReader(model_file)
self.assertEqual(gguf_reader.alignment, 64)
v = gguf_reader.get_field("uint8")
self.assertEqual(v.get(), 1)
self.assertEqual(v.types[0], GGUFValueType.UINT8)
v = gguf_reader.get_field("nint8")
self.assertEqual(v.get(), 1)
self.assertEqual(v.types[0], GGUFValueType.INT8)
v = gguf_reader.get_field("dict1")
self.assertIsNotNone(v)
self.assertListEqual(v.get(), ['key1', 'key2', 'obj'])
v = gguf_reader.get_field(".dict1.key1")
self.assertEqual(v.get(), 2)
v = gguf_reader.get_field(".dict1.key2")
self.assertEqual(v.get(), "hi")
v = gguf_reader.get_field(".dict1.obj")
self.assertListEqual(v.get(), ['k'])
v = gguf_reader.get_field(".dict1.obj.k")
self.assertEqual(v.get(), 1)
v = gguf_reader.get_field("oArray")
self.assertIsNotNone(v)
count = v.get()
self.assertEqual(count, 2)
type, itype = v.getType()
self.assertEqual(type, GGUFValueType.ARRAY)
self.assertEqual(itype, GGUFValueType.OBJ)
v = gguf_reader.get_field(".oArray[0].k")
self.assertIsNotNone(v)
self.assertEqual(v.get(), 4)
v = gguf_reader.get_field(".oArray[1].k")
self.assertEqual(v.get(), 9)
v = gguf_reader.get_field("cArray")
self.assertIsNotNone(v)
count = v.get()
self.assertEqual(count, 3)
type, itype = v.getType()
self.assertEqual(type, GGUFValueType.ARRAY)
self.assertEqual(itype, GGUFValueType.OBJ)
v = gguf_reader.get_field(".cArray[0]")
self.assertEqual(v.get(), 3)
v = gguf_reader.get_field(".cArray[1]")
self.assertEqual(v.get(), "hi")
v = gguf_reader.get_field(".cArray[2]")
self.assertListEqual(v.get(), [1, 2])
v = gguf_reader.get_field("arrayInArray")
self.assertIsNotNone(v)
count = v.get()
self.assertEqual(count, 2)
type, itype = v.getType()
self.assertEqual(type, GGUFValueType.ARRAY)
self.assertEqual(itype, GGUFValueType.ARRAY)
v = gguf_reader.get_field(".arrayInArray[0]")
self.assertListEqual(v.get(), [2, 3, 4])
v = gguf_reader.get_field(".arrayInArray[1]")
self.assertListEqual(v.get(), [5, 7, 8])
v = gguf_reader.get_field("tokenizer.ggml.bos_token_id")
self.assertEqual(v.get(), "bos")
v = gguf_reader.get_field("tokenizer.ggml.add_bos_token")
self.assertEqual(v.get(), True)
v = gguf_reader.get_field("tokenizer_config")
self.assertIsNotNone(v)
self.assertListEqual(v.get(), ["/tokenizer.ggml.bos_token_id:bos_token", "/tokenizer.ggml.add_bos_token"])
if __name__ == '__main__':
unittest.main()

105
llama.cpp
View file

@ -780,28 +780,87 @@ static std::string gguf_data_to_str(enum gguf_type type, const void * data, int
}
}
static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
template< class CharT >
static std::string quoted_str(const CharT* s, const std::string delim="\"", const std::string escape="\\") {
std::string val = s;
replace_all(val, escape, escape + escape);
replace_all(val, delim, escape + delim);
val = delim + val + delim;
return val;
}
static void gguf_kv_to_stream(const struct gguf_context * ctx_gguf, int i, std::ostringstream &ss, const std::string parent_name = "") {
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
switch (type) {
case GGUF_TYPE_STRING:
return gguf_get_val_str(ctx_gguf, i);
{
ss << quoted_str(gguf_get_val_str(ctx_gguf, i));
} break;
case GGUF_TYPE_OBJ:
{
ss << "{";
int arr_n = gguf_get_arr_n(ctx_gguf, i);
for (int j = 0; j < arr_n; j++) {
std::string subkey_name = gguf_get_arr_str(ctx_gguf, i, j);
std::string key;
if (!subkey_name.empty() && subkey_name.at(0) == '/') {
std::size_t ix = subkey_name.find(':');
if (ix != std::string::npos) {
key = subkey_name;
subkey_name = subkey_name.substr(ix+1);
key = key.substr(1, ix-1);
} else {
subkey_name = subkey_name.substr(1);
key = subkey_name;
ix = subkey_name.rfind('.');
if (ix != std::string::npos) {
subkey_name = subkey_name.substr(ix+1);
}
}
} else {
if (parent_name.empty()) {
key = subkey_name;
} else {
key = parent_name;
key.append(".");
key.append(subkey_name);
}
if (key.at(0) != '.') {key = "." + key;}
}
ss << quoted_str(subkey_name.c_str()) << ":";
int k_id = gguf_find_key(ctx_gguf, key.c_str());
if (k_id != -1) {
gguf_kv_to_stream(ctx_gguf, k_id, ss, key);
} else {
ss << "undefined";
}
if (j < arr_n - 1) {
ss << ", ";
}
}
ss << "}";
} break;
case GGUF_TYPE_ARRAY:
{
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
int arr_n = gguf_get_arr_n(ctx_gguf, i);
const void * data = gguf_get_arr_data(ctx_gguf, i);
std::stringstream ss;
ss << "[";
for (int j = 0; j < arr_n; j++) {
if (arr_type == GGUF_TYPE_STRING) {
std::string val = gguf_get_arr_str(ctx_gguf, i, j);
// escape quotes
replace_all(val, "\\", "\\\\");
replace_all(val, "\"", "\\\"");
std::string val = quoted_str(gguf_get_arr_str(ctx_gguf, i, j));
ss << '"' << val << '"';
} else if (arr_type == GGUF_TYPE_ARRAY) {
ss << "???";
} else if (arr_type == GGUF_TYPE_OBJ || arr_type == GGUF_TYPE_ARRAY) {
std::string s = "[" + std::to_string(j) + "]";
std::string key = parent_name.empty() ? s : parent_name + s;
if (key.at(0) != '.') {key = "." + key;}
int k_id = gguf_find_key(ctx_gguf, key.c_str());
if (k_id != -1) {
gguf_kv_to_stream(ctx_gguf, k_id, ss, key);
} else {
ss << "undefined";
}
} else {
ss << gguf_data_to_str(arr_type, data, j);
}
@ -810,11 +869,21 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
}
}
ss << "]";
} break;
default:
ss << gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0);
}
}
static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i, const char* parent_name = nullptr) {
std::ostringstream ss;
gguf_kv_to_stream(ctx_gguf, i, ss, parent_name == NULL ? gguf_get_key(ctx_gguf, i) : parent_name);
return ss.str();
}
default:
return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0);
}
char * gguf_kv_to_c_str(const struct gguf_context * ctx_gguf, int i, const char* parent_name = nullptr) {
std::string result = gguf_kv_to_str(ctx_gguf, i, parent_name);
return strdup(result.c_str());
}
//
@ -2184,7 +2253,7 @@ namespace GGUFMeta {
static T get_kv(const gguf_context * ctx, const int k) {
const enum gguf_type kt = gguf_get_kv_type(ctx, k);
if (kt != GKV::gt) {
if (kt != GKV::gt && kt != GGUF_TYPE_OBJ && GKV::gt != GGUF_TYPE_ARRAY) {
throw std::runtime_error(format("key %s has wrong type %s but expected type %s",
gguf_get_key(ctx, k), gguf_type_name(kt), gguf_type_name(GKV::gt)));
}
@ -2411,13 +2480,16 @@ struct llama_model_loader {
LLAMA_LOG_INFO("%s: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", __func__);
for (int i = 0; i < n_kv; i++) {
const char * name = gguf_get_key(ctx_gguf, i);
// skip the subkeys.
if (name[0] == '.') { continue; }
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
const std::string type_name =
type == GGUF_TYPE_ARRAY
type == GGUF_TYPE_ARRAY || type == GGUF_TYPE_OBJ
? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(ctx_gguf, i)), gguf_get_arr_n(ctx_gguf, i))
: gguf_type_name(type);
std::string value = gguf_kv_to_str(ctx_gguf, i);
std::string value = gguf_kv_to_str(ctx_gguf, i, name);
const size_t MAX_VALUE_LEN = 40;
if (value.size() > MAX_VALUE_LEN) {
value = format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str());
@ -2426,6 +2498,7 @@ struct llama_model_loader {
LLAMA_LOG_INFO("%s: - kv %3d: %42s %-16s = %s\n", __func__, i, name, type_name.c_str(), value.c_str());
}
LLAMA_LOG_INFO("%s: Dumping metadata keys/values Done.\n", __func__);
// print type counts
for (auto & kv : n_type) {
@ -2796,7 +2869,7 @@ static void llm_load_hparams(
continue;
}
const char * name = gguf_get_key(ctx, i);
const std::string value = gguf_kv_to_str(ctx, i);
const std::string value = gguf_kv_to_str(ctx, i, name);
model.gguf_kv.emplace(name, value);
}

View file

@ -898,6 +898,8 @@ extern "C" {
LLAMA_API void llama_dump_timing_info_yaml(FILE * stream, const struct llama_context * ctx);
LLAMA_API char * gguf_kv_to_c_str(const struct gguf_context * ctx_gguf, int i, const char* parent_name);
#ifdef __cplusplus
}
#endif

BIN
models/test_writer.gguf Normal file

Binary file not shown.

View file

@ -50,6 +50,9 @@ llama_test_executable (test-tokenizer-1-starcoder test-tokenizer-1-bpe.cp
llama_test_executable (test-tokenizer-1-gpt2 test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt2.gguf)
# llama_test_executable (test-tokenizer-1-bloom test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bloom.gguf) # BIG
llama_build_executable(test-gguf-meta.cpp)
llama_test_executable (test-gguf-meta test-gguf-meta.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/test_writer.gguf)
llama_build_and_test_executable(test-grammar-parser.cpp)
llama_build_and_test_executable(test-llama-grammar.cpp)
llama_build_and_test_executable(test-grad0.cpp)

83
tests/test-gguf-meta.cpp Normal file
View file

@ -0,0 +1,83 @@
#ifdef NDEBUG
#undef NDEBUG
#endif
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cassert>
#include "ggml.h"
#include "llama.h"
#include "get-model.h"
// run `python3 gguf-py/tests/test_gguf.py` to generate test_writer.gguf file.
int main(int argc, char ** argv)
{
char* fname = get_model_or_exit(argc, argv);
struct gguf_context * ctx_gguf = NULL;
struct ggml_context * ctx_meta = NULL;
struct gguf_init_params params = {
/*.no_alloc = */ true,
/*.ctx = */ &ctx_meta,
};
ctx_gguf = gguf_init_from_file(fname, params);
if (!ctx_gguf) {
fprintf(stderr, "%s: failed to load model from %s\n", __func__, fname);
return 1;
}
int n_kv = gguf_get_n_kv(ctx_gguf);
for (int i = 0; i < n_kv; i++) {
const char * name = gguf_get_key(ctx_gguf, i);
// skip the subkeys.
if (name[0] == '.') { continue; }
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
char * value = gguf_kv_to_c_str(ctx_gguf, i, name);
printf("key: %s, type: %s, value: %s\n", name, gguf_type_name(type), value);
free(value);
}
int k_id = gguf_find_key(ctx_gguf, "no_such_key");
assert(k_id == -1);
k_id = gguf_find_key(ctx_gguf, "tokenizer_config");
assert(k_id != -1);
const char * name = gguf_get_key(ctx_gguf, k_id);
assert(strcmp(name, "tokenizer_config") == 0);
enum gguf_type type = gguf_get_kv_type(ctx_gguf, k_id);
assert(type == GGUF_TYPE_OBJ);
char * value = gguf_kv_to_c_str(ctx_gguf, k_id, NULL);
assert(strcmp(value, "{\"bos_token\":\"bos\", \"add_bos_token\":true}") == 0);
free(value);
k_id = gguf_find_key(ctx_gguf, "dict1");
assert(k_id != -1);
value = gguf_kv_to_c_str(ctx_gguf, k_id, NULL);
assert(strcmp(value, "{\"key1\":2, \"key2\":\"hi\", \"obj\":{\"k\":1}}") == 0);
free(value);
k_id = gguf_find_key(ctx_gguf, "oArray");
assert(k_id != -1);
value = gguf_kv_to_c_str(ctx_gguf, k_id, NULL);
assert(strcmp(value, "[{\"k\":4, \"o\":{\"o1\":6}}, {\"k\":9}]") == 0);
free(value);
k_id = gguf_find_key(ctx_gguf, "cArray");
assert(k_id != -1);
value = gguf_kv_to_c_str(ctx_gguf, k_id, NULL);
assert(strcmp(value, "[3, \"hi\", [1, 2]]") == 0);
free(value);
k_id = gguf_find_key(ctx_gguf, "arrayInArray");
assert(k_id != -1);
type = gguf_get_kv_type(ctx_gguf, k_id);
assert(type == GGUF_TYPE_ARRAY);
value = gguf_kv_to_c_str(ctx_gguf, k_id, NULL);
assert(strcmp(value, "[[2, 3, 4], [5, 7, 8]]") == 0);
free(value);
printf("Done!\n");
}