From 27c19c4eb767666c0cad818133a21cb0271c7c1f Mon Sep 17 00:00:00 2001 From: Aleksei Nikiforov Date: Fri, 10 Jan 2025 12:19:26 +0100 Subject: [PATCH] Implement write byteswap for tests --- ggml/src/gguf.cpp | 68 +++++++++++++++++++++++++++++++++ tests/test-gguf.cpp | 92 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 158 insertions(+), 2 deletions(-) diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp index 55de3b765..f7136b3f0 100644 --- a/ggml/src/gguf.cpp +++ b/ggml/src/gguf.cpp @@ -1184,7 +1184,13 @@ struct gguf_writer { template void write(const T & val) const { for (size_t i = 0; i < sizeof(val); ++i) { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ buf.push_back(reinterpret_cast(&val)[i]); +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + buf.push_back(reinterpret_cast(&val)[sizeof(val) - i - 1]); +#else +#error Unexpected or undefined __BYTE_ORDER__ +#endif } } @@ -1233,6 +1239,7 @@ struct gguf_writer { } switch (kv.get_type()) { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ case GGUF_TYPE_UINT8: case GGUF_TYPE_INT8: case GGUF_TYPE_UINT16: @@ -1245,6 +1252,60 @@ struct gguf_writer { case GGUF_TYPE_FLOAT64: { write(kv.data); } break; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + case GGUF_TYPE_UINT8: { + for (size_t i = 0; i < ne; ++i) { + write(kv.get_val(i)); + } + } break; + case GGUF_TYPE_INT8: { + for (size_t i = 0; i < ne; ++i) { + write(kv.get_val(i)); + } + } break; + case GGUF_TYPE_UINT16: { + for (size_t i = 0; i < ne; ++i) { + write(kv.get_val(i)); + } + } break; + case GGUF_TYPE_INT16: { + for (size_t i = 0; i < ne; ++i) { + write(kv.get_val(i)); + } + } break; + case GGUF_TYPE_UINT32: { + for (size_t i = 0; i < ne; ++i) { + write(kv.get_val(i)); + } + } break; + case GGUF_TYPE_INT32: { + for (size_t i = 0; i < ne; ++i) { + write(kv.get_val(i)); + } + } break; + case GGUF_TYPE_FLOAT32: { + for (size_t i = 0; i < ne; ++i) { + write(kv.get_val(i)); + } + } break; + case GGUF_TYPE_UINT64: { + for (size_t i = 0; i < ne; ++i) { + write(kv.get_val(i)); + } + } break; + case GGUF_TYPE_INT64: { + for (size_t i = 0; i < ne; ++i) { + write(kv.get_val(i)); + } + } break; + case GGUF_TYPE_FLOAT64: { + for (size_t i = 0; i < ne; ++i) { + write(kv.get_val(i)); + } + } break; +#else +#error Unexpected or undefined __BYTE_ORDER__ +#endif case GGUF_TYPE_BOOL: { for (size_t i = 0; i < ne; ++i) { write(kv.get_val(i)); @@ -1295,6 +1356,13 @@ struct gguf_writer { memcpy(buf.data() + offset, info.t.data, nbytes); } +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + auto byteswap = ggml_get_type_traits(info.t.type)->byteswap; + if (byteswap != nullptr) { + byteswap(buf.data() + offset, ggml_nelements(&(info.t)) / ggml_blck_size(info.t.type)); + } +#endif + pad(alignment); } }; diff --git a/tests/test-gguf.cpp b/tests/test-gguf.cpp index 6ed696328..1dd391f77 100644 --- a/tests/test-gguf.cpp +++ b/tests/test-gguf.cpp @@ -10,6 +10,43 @@ #include #include +#if defined(__gnu_linux__) +#include +#else +#define le64toh(x) (x) +#define le32toh(x) (x) +#define le16toh(x) (x) +#endif + +// endianness conversion +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define convert_to_le(x) +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#include + +template = 0> +static inline void convert_to_le(T * /*value*/) +{ +} + +template = 0> +static inline void convert_to_le(T * value) { + *((uint16_t*)value) = htole16(*((uint16_t*)value)); +} + +template = 0> +static inline void convert_to_le(T * value) { + *((uint32_t*)value) = htole32(*((uint32_t*)value)); +} + +template = 0> +static inline void convert_to_le(T * value) { + *((uint64_t*)value) = htole64(*((uint64_t*)value)); +} +#else +#error Unexpected or undefined __BYTE_ORDER__ +#endif + constexpr int offset_has_kv = 1000; constexpr int offset_has_tensors = 2000; constexpr int offset_has_data = 3000; @@ -146,7 +183,8 @@ static std::vector> get_kv_types(std:: } template -static void helper_write(FILE * file, const T & val) { +static void helper_write(FILE * file, T val) { + convert_to_le(&val); GGML_ASSERT(fwrite(&val, 1, sizeof(val), file) == sizeof(val)); } @@ -363,7 +401,9 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft helper_write(file, big_dim); } } else { - helper_write(file, shape.data(), n_dims*sizeof(int64_t)); + for (uint32_t j = 0; j < n_dims; ++j) { + helper_write(file, shape[j]); + } } { @@ -533,6 +573,33 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i continue; } +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + switch (type_arr) { + case GGUF_TYPE_UINT16: + case GGUF_TYPE_INT16: + for (size_t j = 0; j < arr_n; ++j) { + convert_to_le((uint16_t*)(data8 + j * 2)); + } + break; + + case GGUF_TYPE_UINT32: + case GGUF_TYPE_INT32: + case GGUF_TYPE_FLOAT32: + for (size_t j = 0; j < arr_n; ++j) { + convert_to_le((uint32_t*)(data8 + j * 4)); + } + break; + + case GGUF_TYPE_UINT64: + case GGUF_TYPE_INT64: + case GGUF_TYPE_FLOAT64: + for (size_t j = 0; j < arr_n; ++j) { + convert_to_le((uint64_t*)(data8 + j * 8)); + } + break; + } +#endif + if (!std::equal(data8, data8 + arr_n*type_size, data_gguf)) { ok = false; } @@ -548,6 +615,27 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i continue; } +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + switch (type) { + case GGUF_TYPE_UINT16: + case GGUF_TYPE_INT16: + convert_to_le((uint16_t*)(data8)); + break; + + case GGUF_TYPE_UINT32: + case GGUF_TYPE_INT32: + case GGUF_TYPE_FLOAT32: + convert_to_le((uint32_t*)(data8)); + break; + + case GGUF_TYPE_UINT64: + case GGUF_TYPE_INT64: + case GGUF_TYPE_FLOAT64: + convert_to_le((uint64_t*)(data8)); + break; + } +#endif + if (!std::equal(data8, data8 + gguf_type_size(type), data_gguf)) { ok = false; }