From a9402ba2b6553b25c7abaac416598673692987d9 Mon Sep 17 00:00:00 2001 From: Aleksei Nikiforov Date: Wed, 15 Jan 2025 11:48:26 +0100 Subject: [PATCH] Move conversion functions to common header Add ggml_ prefix to their names. --- ggml/src/ggml-impl.h | 65 ++++++++++++++++++++++++++++ ggml/src/ggml.c | 101 +++++++++++++++---------------------------- ggml/src/gguf.cpp | 39 +---------------- tests/test-gguf.cpp | 51 +++------------------- 4 files changed, 109 insertions(+), 147 deletions(-) diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index eab017889..1a1056f7c 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -28,6 +28,14 @@ #include #endif +#if defined(__gnu_linux__) +#include +#else // defined(__gnu_linux__) +#define le64toh(x) (x) +#define le32toh(x) (x) +#define le16toh(x) (x) +#endif // defined(__gnu_linux__) + #ifdef __cplusplus extern "C" { #endif @@ -553,6 +561,31 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) { #define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x) #define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x) +// endianness conversion +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define ggml_convert_from_le16(x) GGML_UNUSED(x) +#define ggml_convert_from_le32(x) GGML_UNUSED(x) +#define ggml_convert_from_le64(x) GGML_UNUSED(x) +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +static inline void ggml_convert_from_le16(void * value) { + *((uint16_t*)value) = le16toh(*((uint16_t*)value)); +} + +static inline void ggml_convert_from_le32(void * value) { + *((uint32_t*)value) = le32toh(*((uint32_t*)value)); +} + +static inline void ggml_convert_from_le64(void * value) { + *((uint64_t*)value) = le64toh(*((uint64_t*)value)); +} +#else // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#error Unexpected or undefined __BYTE_ORDER__ +#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +#define ggml_convert_to_le16(x) ggml_convert_from_le16(x) +#define ggml_convert_to_le32(x) ggml_convert_from_le32(x) +#define ggml_convert_to_le64(x) ggml_convert_from_le64(x) + #ifdef __cplusplus } #endif @@ -560,6 +593,38 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) { #ifdef __cplusplus #include +// endianness conversion +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define ggml_convert_from_le(x) GGML_UNUSED(x) +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#include + +template = 0> +static inline void ggml_convert_from_le(T * value) +{ + GGML_UNUSED(value); +} + +template = 0> +static inline void ggml_convert_from_le(T * value) { + ggml_convert_from_le16(value); +} + +template = 0> +static inline void ggml_convert_from_le(T * value) { + ggml_convert_from_le32(value); +} + +template = 0> +static inline void ggml_convert_from_le(T * value) { + ggml_convert_from_le64(value); +} +#else // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#error Unexpected or undefined __BYTE_ORDER__ +#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +#define ggml_convert_to_le(x) ggml_convert_from_le(x) + // expose GGUF internals for test code GGML_API size_t gguf_type_size(enum gguf_type type); GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params); diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index d017bb4f9..02ee731f3 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -36,35 +36,6 @@ #include #endif -#if defined(__gnu_linux__) -#include -#else -#define le64toh(x) (x) -#define le32toh(x) (x) -#define le16toh(x) (x) -#endif - -// endianness conversion -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -#define convert_from_le16(x) UNUSED(x) -#define convert_from_le32(x) UNUSED(x) -#define convert_from_le64(x) UNUSED(x) -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -static inline void convert_from_le16(void * value) { - *((uint16_t*)value) = le16toh(*((uint16_t*)value)); -} - -static inline void convert_from_le32(void * value) { - *((uint32_t*)value) = le32toh(*((uint32_t*)value)); -} - -static inline void convert_from_le64(void * value) { - *((uint64_t*)value) = le64toh(*((uint64_t*)value)); -} -#else -#error Unexpected or undefined __BYTE_ORDER__ -#endif - #if defined(__APPLE__) #include #include @@ -6593,113 +6564,113 @@ bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, cons static void ggml_byteswap_i16(void * restrict buffer, size_t elements) { uint16_t *data_ptr = (uint16_t*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(data_ptr + i); + ggml_convert_from_le16(data_ptr + i); } } static void ggml_byteswap_i32(void * restrict buffer, size_t elements) { uint32_t *data_ptr = (uint32_t*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le32(data_ptr + i); + ggml_convert_from_le32(data_ptr + i); } } static void ggml_byteswap_i64(void * restrict buffer, size_t elements) { uint64_t *data_ptr = (uint64_t*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le64(data_ptr + i); + ggml_convert_from_le64(data_ptr + i); } } static void ggml_byteswap_q4_0(void * restrict buffer, size_t elements) { block_q4_0 *data_ptr = (block_q4_0*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].d)); } } static void ggml_byteswap_q4_1(void * restrict buffer, size_t elements) { block_q4_1 *data_ptr = (block_q4_1*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); - convert_from_le16(&(data_ptr[i].m)); + ggml_convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].m)); } } static void ggml_byteswap_q5_0(void * restrict buffer, size_t elements) { block_q5_0 *data_ptr = (block_q5_0*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].d)); } } static void ggml_byteswap_q5_1(void * restrict buffer, size_t elements) { block_q5_1 *data_ptr = (block_q5_1*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); - convert_from_le16(&(data_ptr[i].m)); + ggml_convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].m)); } } static void ggml_byteswap_q8_0(void * restrict buffer, size_t elements) { block_q8_0 *data_ptr = (block_q8_0*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].d)); } } static void ggml_byteswap_q8_1(void * restrict buffer, size_t elements) { block_q8_1 *data_ptr = (block_q8_1*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); - convert_from_le16(&(data_ptr[i].s)); + ggml_convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].s)); } } static void ggml_byteswap_q2_k(void * restrict buffer, size_t elements) { block_q2_K *data_ptr = (block_q2_K*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); - convert_from_le16(&(data_ptr[i].dmin)); + ggml_convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].dmin)); } } static void ggml_byteswap_q3_k(void * restrict buffer, size_t elements) { block_q3_K *data_ptr = (block_q3_K*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].d)); } } static void ggml_byteswap_q4_k(void * restrict buffer, size_t elements) { block_q4_K *data_ptr = (block_q4_K*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); - convert_from_le16(&(data_ptr[i].dmin)); + ggml_convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].dmin)); } } static void ggml_byteswap_q5_k(void * restrict buffer, size_t elements) { block_q5_K *data_ptr = (block_q5_K*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); - convert_from_le16(&(data_ptr[i].dmin)); + ggml_convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].dmin)); } } static void ggml_byteswap_q6_k(void * restrict buffer, size_t elements) { block_q6_K *data_ptr = (block_q6_K*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].d)); } } static void ggml_byteswap_iq2_xxs(void * restrict buffer, size_t elements) { block_iq2_xxs *data_ptr = (block_iq2_xxs*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].d)); for (size_t j = 0; j < QK_K/8; ++j) { - convert_from_le16(&(data_ptr[i].qs[j])); + ggml_convert_from_le16(&(data_ptr[i].qs[j])); } } } @@ -6707,9 +6678,9 @@ static void ggml_byteswap_iq2_xxs(void * restrict buffer, size_t elements) { static void ggml_byteswap_iq2_xs(void * restrict buffer, size_t elements) { block_iq2_xs *data_ptr = (block_iq2_xs*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].d)); for (size_t j = 0; j < QK_K/8; ++j) { - convert_from_le16(&(data_ptr[i].qs[j])); + ggml_convert_from_le16(&(data_ptr[i].qs[j])); } } } @@ -6717,30 +6688,30 @@ static void ggml_byteswap_iq2_xs(void * restrict buffer, size_t elements) { static void ggml_byteswap_iq3_xxs(void * restrict buffer, size_t elements) { block_iq3_xxs *data_ptr = (block_iq3_xxs*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].d)); } } static void ggml_byteswap_iq3_s(void * restrict buffer, size_t elements) { block_iq3_s *data_ptr = (block_iq3_s*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].d)); } } static void ggml_byteswap_iq2_s(void * restrict buffer, size_t elements) { block_iq2_s *data_ptr = (block_iq2_s*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].d)); } } static void ggml_byteswap_iq1_s(void * restrict buffer, size_t elements) { block_iq1_s *data_ptr = (block_iq1_s*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].d)); for (size_t j = 0; j < QK_K/32; ++j) { - convert_from_le16(&(data_ptr[i].qh[j])); + ggml_convert_from_le16(&(data_ptr[i].qh[j])); } } } @@ -6748,24 +6719,24 @@ static void ggml_byteswap_iq1_s(void * restrict buffer, size_t elements) { static void ggml_byteswap_iq4_nl(void * restrict buffer, size_t elements) { block_iq4_nl *data_ptr = (block_iq4_nl*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].d)); } } static void ggml_byteswap_iq4_xs(void * restrict buffer, size_t elements) { block_iq4_xs *data_ptr = (block_iq4_xs*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); - convert_from_le16(&(data_ptr[i].scales_h)); + ggml_convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].scales_h)); } } static void ggml_byteswap_q8_k(void * restrict buffer, size_t elements) { block_q8_K *data_ptr = (block_q8_K*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le32(&(data_ptr[i].d)); + ggml_convert_from_le32(&(data_ptr[i].d)); for (size_t j = 0; j < QK_K/16; ++j) { - convert_from_le16(&(data_ptr[i].bsums[j])); + ggml_convert_from_le16(&(data_ptr[i].bsums[j])); } } } @@ -6791,13 +6762,13 @@ static void ggml_byteswap_q4_0_8x8(void * restrict buffer, size_t elements) { static void ggml_byteswap_tq1_0(void * restrict buffer, size_t elements) { block_tq1_0 *data_ptr = (block_tq1_0*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].d)); } } static void ggml_byteswap_tq2_0(void * restrict buffer, size_t elements) { block_tq2_0 *data_ptr = (block_tq2_0*) buffer; for (size_t i = 0; i < elements; ++i) { - convert_from_le16(&(data_ptr[i].d)); + ggml_convert_from_le16(&(data_ptr[i].d)); } } diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp index 5dd47c067..a3d99ab16 100644 --- a/ggml/src/gguf.cpp +++ b/ggml/src/gguf.cpp @@ -15,43 +15,6 @@ #include #include -#if defined(__gnu_linux__) -#include -#else -#define le64toh(x) (x) -#define le32toh(x) (x) -#define le16toh(x) (x) -#endif - -// endianness conversion -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -#define convert_from_le(x) (void)(x) -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#include - -template = 0> -static inline void convert_from_le(T * /*value*/) -{ -} - -template = 0> -static inline void convert_from_le(T * value) { - *((uint16_t*)value) = le16toh(*((uint16_t*)value)); -} - -template = 0> -static inline void convert_from_le(T * value) { - *((uint32_t*)value) = le32toh(*((uint32_t*)value)); -} - -template = 0> -static inline void convert_from_le(T * value) { - *((uint64_t*)value) = le64toh(*((uint64_t*)value)); -} -#else -#error Unexpected or undefined __BYTE_ORDER__ -#endif - template struct type_to_gguf_type; @@ -261,7 +224,7 @@ struct gguf_reader { template bool read(T & dst) const { auto res = fread(&dst, 1, sizeof(dst), file); - convert_from_le(&dst); + ggml_convert_from_le(&dst); return res == sizeof(dst); } diff --git a/tests/test-gguf.cpp b/tests/test-gguf.cpp index 1dd391f77..86a0cdb44 100644 --- a/tests/test-gguf.cpp +++ b/tests/test-gguf.cpp @@ -10,43 +10,6 @@ #include #include -#if defined(__gnu_linux__) -#include -#else -#define le64toh(x) (x) -#define le32toh(x) (x) -#define le16toh(x) (x) -#endif - -// endianness conversion -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -#define convert_to_le(x) -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#include - -template = 0> -static inline void convert_to_le(T * /*value*/) -{ -} - -template = 0> -static inline void convert_to_le(T * value) { - *((uint16_t*)value) = htole16(*((uint16_t*)value)); -} - -template = 0> -static inline void convert_to_le(T * value) { - *((uint32_t*)value) = htole32(*((uint32_t*)value)); -} - -template = 0> -static inline void convert_to_le(T * value) { - *((uint64_t*)value) = htole64(*((uint64_t*)value)); -} -#else -#error Unexpected or undefined __BYTE_ORDER__ -#endif - constexpr int offset_has_kv = 1000; constexpr int offset_has_tensors = 2000; constexpr int offset_has_data = 3000; @@ -184,7 +147,7 @@ static std::vector> get_kv_types(std:: template static void helper_write(FILE * file, T val) { - convert_to_le(&val); + ggml_convert_to_le(&val); GGML_ASSERT(fwrite(&val, 1, sizeof(val), file) == sizeof(val)); } @@ -578,7 +541,7 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i case GGUF_TYPE_UINT16: case GGUF_TYPE_INT16: for (size_t j = 0; j < arr_n; ++j) { - convert_to_le((uint16_t*)(data8 + j * 2)); + ggml_convert_to_le((uint16_t*)(data8 + j * 2)); } break; @@ -586,7 +549,7 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i case GGUF_TYPE_INT32: case GGUF_TYPE_FLOAT32: for (size_t j = 0; j < arr_n; ++j) { - convert_to_le((uint32_t*)(data8 + j * 4)); + ggml_convert_to_le((uint32_t*)(data8 + j * 4)); } break; @@ -594,7 +557,7 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i case GGUF_TYPE_INT64: case GGUF_TYPE_FLOAT64: for (size_t j = 0; j < arr_n; ++j) { - convert_to_le((uint64_t*)(data8 + j * 8)); + ggml_convert_to_le((uint64_t*)(data8 + j * 8)); } break; } @@ -619,19 +582,19 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i switch (type) { case GGUF_TYPE_UINT16: case GGUF_TYPE_INT16: - convert_to_le((uint16_t*)(data8)); + ggml_convert_to_le((uint16_t*)(data8)); break; case GGUF_TYPE_UINT32: case GGUF_TYPE_INT32: case GGUF_TYPE_FLOAT32: - convert_to_le((uint32_t*)(data8)); + ggml_convert_to_le((uint32_t*)(data8)); break; case GGUF_TYPE_UINT64: case GGUF_TYPE_INT64: case GGUF_TYPE_FLOAT64: - convert_to_le((uint64_t*)(data8)); + ggml_convert_to_le((uint64_t*)(data8)); break; } #endif