Move conversion functions to common header

Add ggml_ prefix to their names.
This commit is contained in:
Aleksei Nikiforov 2025-01-15 11:48:26 +01:00
parent 1d01548627
commit a9402ba2b6
4 changed files with 109 additions and 147 deletions

View file

@ -28,6 +28,14 @@
#include <immintrin.h>
#endif
#if defined(__gnu_linux__)
#include <endian.h>
#else // defined(__gnu_linux__)
#define le64toh(x) (x)
#define le32toh(x) (x)
#define le16toh(x) (x)
#endif // defined(__gnu_linux__)
#ifdef __cplusplus
extern "C" {
#endif
@ -553,6 +561,31 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
#define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
#define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
// endianness conversion
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define ggml_convert_from_le16(x) GGML_UNUSED(x)
#define ggml_convert_from_le32(x) GGML_UNUSED(x)
#define ggml_convert_from_le64(x) GGML_UNUSED(x)
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
static inline void ggml_convert_from_le16(void * value) {
*((uint16_t*)value) = le16toh(*((uint16_t*)value));
}
static inline void ggml_convert_from_le32(void * value) {
*((uint32_t*)value) = le32toh(*((uint32_t*)value));
}
static inline void ggml_convert_from_le64(void * value) {
*((uint64_t*)value) = le64toh(*((uint64_t*)value));
}
#else // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#error Unexpected or undefined __BYTE_ORDER__
#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define ggml_convert_to_le16(x) ggml_convert_from_le16(x)
#define ggml_convert_to_le32(x) ggml_convert_from_le32(x)
#define ggml_convert_to_le64(x) ggml_convert_from_le64(x)
#ifdef __cplusplus
}
#endif
@ -560,6 +593,38 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
#ifdef __cplusplus
#include <vector>
// endianness conversion
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define ggml_convert_from_le(x) GGML_UNUSED(x)
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#include <type_traits>
template <typename T, std::enable_if_t<sizeof(T) == 1, int> = 0>
static inline void ggml_convert_from_le(T * value)
{
GGML_UNUSED(value);
}
template <typename T, std::enable_if_t<sizeof(T) == 2, int> = 0>
static inline void ggml_convert_from_le(T * value) {
ggml_convert_from_le16(value);
}
template <typename T, std::enable_if_t<sizeof(T) == 4, int> = 0>
static inline void ggml_convert_from_le(T * value) {
ggml_convert_from_le32(value);
}
template <typename T, std::enable_if_t<sizeof(T) == 8, int> = 0>
static inline void ggml_convert_from_le(T * value) {
ggml_convert_from_le64(value);
}
#else // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#error Unexpected or undefined __BYTE_ORDER__
#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define ggml_convert_to_le(x) ggml_convert_from_le(x)
// expose GGUF internals for test code
GGML_API size_t gguf_type_size(enum gguf_type type);
GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);

View file

@ -36,35 +36,6 @@
#include <syscall.h>
#endif
#if defined(__gnu_linux__)
#include <endian.h>
#else
#define le64toh(x) (x)
#define le32toh(x) (x)
#define le16toh(x) (x)
#endif
// endianness conversion
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define convert_from_le16(x) UNUSED(x)
#define convert_from_le32(x) UNUSED(x)
#define convert_from_le64(x) UNUSED(x)
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
static inline void convert_from_le16(void * value) {
*((uint16_t*)value) = le16toh(*((uint16_t*)value));
}
static inline void convert_from_le32(void * value) {
*((uint32_t*)value) = le32toh(*((uint32_t*)value));
}
static inline void convert_from_le64(void * value) {
*((uint64_t*)value) = le64toh(*((uint64_t*)value));
}
#else
#error Unexpected or undefined __BYTE_ORDER__
#endif
#if defined(__APPLE__)
#include <unistd.h>
#include <mach/mach.h>
@ -6593,113 +6564,113 @@ bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, cons
static void ggml_byteswap_i16(void * restrict buffer, size_t elements) {
uint16_t *data_ptr = (uint16_t*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(data_ptr + i);
ggml_convert_from_le16(data_ptr + i);
}
}
static void ggml_byteswap_i32(void * restrict buffer, size_t elements) {
uint32_t *data_ptr = (uint32_t*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le32(data_ptr + i);
ggml_convert_from_le32(data_ptr + i);
}
}
static void ggml_byteswap_i64(void * restrict buffer, size_t elements) {
uint64_t *data_ptr = (uint64_t*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le64(data_ptr + i);
ggml_convert_from_le64(data_ptr + i);
}
}
static void ggml_byteswap_q4_0(void * restrict buffer, size_t elements) {
block_q4_0 *data_ptr = (block_q4_0*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].d));
}
}
static void ggml_byteswap_q4_1(void * restrict buffer, size_t elements) {
block_q4_1 *data_ptr = (block_q4_1*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
convert_from_le16(&(data_ptr[i].m));
ggml_convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].m));
}
}
static void ggml_byteswap_q5_0(void * restrict buffer, size_t elements) {
block_q5_0 *data_ptr = (block_q5_0*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].d));
}
}
static void ggml_byteswap_q5_1(void * restrict buffer, size_t elements) {
block_q5_1 *data_ptr = (block_q5_1*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
convert_from_le16(&(data_ptr[i].m));
ggml_convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].m));
}
}
static void ggml_byteswap_q8_0(void * restrict buffer, size_t elements) {
block_q8_0 *data_ptr = (block_q8_0*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].d));
}
}
static void ggml_byteswap_q8_1(void * restrict buffer, size_t elements) {
block_q8_1 *data_ptr = (block_q8_1*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
convert_from_le16(&(data_ptr[i].s));
ggml_convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].s));
}
}
static void ggml_byteswap_q2_k(void * restrict buffer, size_t elements) {
block_q2_K *data_ptr = (block_q2_K*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
convert_from_le16(&(data_ptr[i].dmin));
ggml_convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].dmin));
}
}
static void ggml_byteswap_q3_k(void * restrict buffer, size_t elements) {
block_q3_K *data_ptr = (block_q3_K*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].d));
}
}
static void ggml_byteswap_q4_k(void * restrict buffer, size_t elements) {
block_q4_K *data_ptr = (block_q4_K*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
convert_from_le16(&(data_ptr[i].dmin));
ggml_convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].dmin));
}
}
static void ggml_byteswap_q5_k(void * restrict buffer, size_t elements) {
block_q5_K *data_ptr = (block_q5_K*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
convert_from_le16(&(data_ptr[i].dmin));
ggml_convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].dmin));
}
}
static void ggml_byteswap_q6_k(void * restrict buffer, size_t elements) {
block_q6_K *data_ptr = (block_q6_K*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].d));
}
}
static void ggml_byteswap_iq2_xxs(void * restrict buffer, size_t elements) {
block_iq2_xxs *data_ptr = (block_iq2_xxs*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].d));
for (size_t j = 0; j < QK_K/8; ++j) {
convert_from_le16(&(data_ptr[i].qs[j]));
ggml_convert_from_le16(&(data_ptr[i].qs[j]));
}
}
}
@ -6707,9 +6678,9 @@ static void ggml_byteswap_iq2_xxs(void * restrict buffer, size_t elements) {
static void ggml_byteswap_iq2_xs(void * restrict buffer, size_t elements) {
block_iq2_xs *data_ptr = (block_iq2_xs*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].d));
for (size_t j = 0; j < QK_K/8; ++j) {
convert_from_le16(&(data_ptr[i].qs[j]));
ggml_convert_from_le16(&(data_ptr[i].qs[j]));
}
}
}
@ -6717,30 +6688,30 @@ static void ggml_byteswap_iq2_xs(void * restrict buffer, size_t elements) {
static void ggml_byteswap_iq3_xxs(void * restrict buffer, size_t elements) {
block_iq3_xxs *data_ptr = (block_iq3_xxs*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].d));
}
}
static void ggml_byteswap_iq3_s(void * restrict buffer, size_t elements) {
block_iq3_s *data_ptr = (block_iq3_s*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].d));
}
}
static void ggml_byteswap_iq2_s(void * restrict buffer, size_t elements) {
block_iq2_s *data_ptr = (block_iq2_s*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].d));
}
}
static void ggml_byteswap_iq1_s(void * restrict buffer, size_t elements) {
block_iq1_s *data_ptr = (block_iq1_s*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].d));
for (size_t j = 0; j < QK_K/32; ++j) {
convert_from_le16(&(data_ptr[i].qh[j]));
ggml_convert_from_le16(&(data_ptr[i].qh[j]));
}
}
}
@ -6748,24 +6719,24 @@ static void ggml_byteswap_iq1_s(void * restrict buffer, size_t elements) {
static void ggml_byteswap_iq4_nl(void * restrict buffer, size_t elements) {
block_iq4_nl *data_ptr = (block_iq4_nl*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].d));
}
}
static void ggml_byteswap_iq4_xs(void * restrict buffer, size_t elements) {
block_iq4_xs *data_ptr = (block_iq4_xs*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
convert_from_le16(&(data_ptr[i].scales_h));
ggml_convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].scales_h));
}
}
static void ggml_byteswap_q8_k(void * restrict buffer, size_t elements) {
block_q8_K *data_ptr = (block_q8_K*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le32(&(data_ptr[i].d));
ggml_convert_from_le32(&(data_ptr[i].d));
for (size_t j = 0; j < QK_K/16; ++j) {
convert_from_le16(&(data_ptr[i].bsums[j]));
ggml_convert_from_le16(&(data_ptr[i].bsums[j]));
}
}
}
@ -6791,13 +6762,13 @@ static void ggml_byteswap_q4_0_8x8(void * restrict buffer, size_t elements) {
static void ggml_byteswap_tq1_0(void * restrict buffer, size_t elements) {
block_tq1_0 *data_ptr = (block_tq1_0*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].d));
}
}
static void ggml_byteswap_tq2_0(void * restrict buffer, size_t elements) {
block_tq2_0 *data_ptr = (block_tq2_0*) buffer;
for (size_t i = 0; i < elements; ++i) {
convert_from_le16(&(data_ptr[i].d));
ggml_convert_from_le16(&(data_ptr[i].d));
}
}

View file

@ -15,43 +15,6 @@
#include <string>
#include <vector>
#if defined(__gnu_linux__)
#include <endian.h>
#else
#define le64toh(x) (x)
#define le32toh(x) (x)
#define le16toh(x) (x)
#endif
// endianness conversion
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define convert_from_le(x) (void)(x)
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#include <type_traits>
template <typename T, std::enable_if_t<sizeof(T) == 1, int> = 0>
static inline void convert_from_le(T * /*value*/)
{
}
template <typename T, std::enable_if_t<sizeof(T) == 2, int> = 0>
static inline void convert_from_le(T * value) {
*((uint16_t*)value) = le16toh(*((uint16_t*)value));
}
template <typename T, std::enable_if_t<sizeof(T) == 4, int> = 0>
static inline void convert_from_le(T * value) {
*((uint32_t*)value) = le32toh(*((uint32_t*)value));
}
template <typename T, std::enable_if_t<sizeof(T) == 8, int> = 0>
static inline void convert_from_le(T * value) {
*((uint64_t*)value) = le64toh(*((uint64_t*)value));
}
#else
#error Unexpected or undefined __BYTE_ORDER__
#endif
template <typename T>
struct type_to_gguf_type;
@ -261,7 +224,7 @@ struct gguf_reader {
template <typename T>
bool read(T & dst) const {
auto res = fread(&dst, 1, sizeof(dst), file);
convert_from_le(&dst);
ggml_convert_from_le(&dst);
return res == sizeof(dst);
}

View file

@ -10,43 +10,6 @@
#include <string>
#include <vector>
#if defined(__gnu_linux__)
#include <endian.h>
#else
#define le64toh(x) (x)
#define le32toh(x) (x)
#define le16toh(x) (x)
#endif
// endianness conversion
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define convert_to_le(x)
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#include <type_traits>
template <typename T, std::enable_if_t<sizeof(T) == 1, int> = 0>
static inline void convert_to_le(T * /*value*/)
{
}
template <typename T, std::enable_if_t<sizeof(T) == 2, int> = 0>
static inline void convert_to_le(T * value) {
*((uint16_t*)value) = htole16(*((uint16_t*)value));
}
template <typename T, std::enable_if_t<sizeof(T) == 4, int> = 0>
static inline void convert_to_le(T * value) {
*((uint32_t*)value) = htole32(*((uint32_t*)value));
}
template <typename T, std::enable_if_t<sizeof(T) == 8, int> = 0>
static inline void convert_to_le(T * value) {
*((uint64_t*)value) = htole64(*((uint64_t*)value));
}
#else
#error Unexpected or undefined __BYTE_ORDER__
#endif
constexpr int offset_has_kv = 1000;
constexpr int offset_has_tensors = 2000;
constexpr int offset_has_data = 3000;
@ -184,7 +147,7 @@ static std::vector<std::pair<enum gguf_type, enum gguf_type>> get_kv_types(std::
template <typename T>
static void helper_write(FILE * file, T val) {
convert_to_le(&val);
ggml_convert_to_le(&val);
GGML_ASSERT(fwrite(&val, 1, sizeof(val), file) == sizeof(val));
}
@ -578,7 +541,7 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i
case GGUF_TYPE_UINT16:
case GGUF_TYPE_INT16:
for (size_t j = 0; j < arr_n; ++j) {
convert_to_le((uint16_t*)(data8 + j * 2));
ggml_convert_to_le((uint16_t*)(data8 + j * 2));
}
break;
@ -586,7 +549,7 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i
case GGUF_TYPE_INT32:
case GGUF_TYPE_FLOAT32:
for (size_t j = 0; j < arr_n; ++j) {
convert_to_le((uint32_t*)(data8 + j * 4));
ggml_convert_to_le((uint32_t*)(data8 + j * 4));
}
break;
@ -594,7 +557,7 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i
case GGUF_TYPE_INT64:
case GGUF_TYPE_FLOAT64:
for (size_t j = 0; j < arr_n; ++j) {
convert_to_le((uint64_t*)(data8 + j * 8));
ggml_convert_to_le((uint64_t*)(data8 + j * 8));
}
break;
}
@ -619,19 +582,19 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i
switch (type) {
case GGUF_TYPE_UINT16:
case GGUF_TYPE_INT16:
convert_to_le((uint16_t*)(data8));
ggml_convert_to_le((uint16_t*)(data8));
break;
case GGUF_TYPE_UINT32:
case GGUF_TYPE_INT32:
case GGUF_TYPE_FLOAT32:
convert_to_le((uint32_t*)(data8));
ggml_convert_to_le((uint32_t*)(data8));
break;
case GGUF_TYPE_UINT64:
case GGUF_TYPE_INT64:
case GGUF_TYPE_FLOAT64:
convert_to_le((uint64_t*)(data8));
ggml_convert_to_le((uint64_t*)(data8));
break;
}
#endif