Move conversion functions to common header
Add ggml_ prefix to their names.
This commit is contained in:
parent
1d01548627
commit
a9402ba2b6
4 changed files with 109 additions and 147 deletions
|
@ -28,6 +28,14 @@
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__gnu_linux__)
|
||||||
|
#include <endian.h>
|
||||||
|
#else // defined(__gnu_linux__)
|
||||||
|
#define le64toh(x) (x)
|
||||||
|
#define le32toh(x) (x)
|
||||||
|
#define le16toh(x) (x)
|
||||||
|
#endif // defined(__gnu_linux__)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
@ -553,6 +561,31 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
|
||||||
#define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
|
#define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
|
||||||
#define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
|
#define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
|
||||||
|
|
||||||
|
// endianness conversion
|
||||||
|
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||||
|
#define ggml_convert_from_le16(x) GGML_UNUSED(x)
|
||||||
|
#define ggml_convert_from_le32(x) GGML_UNUSED(x)
|
||||||
|
#define ggml_convert_from_le64(x) GGML_UNUSED(x)
|
||||||
|
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
|
static inline void ggml_convert_from_le16(void * value) {
|
||||||
|
*((uint16_t*)value) = le16toh(*((uint16_t*)value));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ggml_convert_from_le32(void * value) {
|
||||||
|
*((uint32_t*)value) = le32toh(*((uint32_t*)value));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ggml_convert_from_le64(void * value) {
|
||||||
|
*((uint64_t*)value) = le64toh(*((uint64_t*)value));
|
||||||
|
}
|
||||||
|
#else // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
|
#error Unexpected or undefined __BYTE_ORDER__
|
||||||
|
#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
|
|
||||||
|
#define ggml_convert_to_le16(x) ggml_convert_from_le16(x)
|
||||||
|
#define ggml_convert_to_le32(x) ggml_convert_from_le32(x)
|
||||||
|
#define ggml_convert_to_le64(x) ggml_convert_from_le64(x)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -560,6 +593,38 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
// endianness conversion
|
||||||
|
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||||
|
#define ggml_convert_from_le(x) GGML_UNUSED(x)
|
||||||
|
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
template <typename T, std::enable_if_t<sizeof(T) == 1, int> = 0>
|
||||||
|
static inline void ggml_convert_from_le(T * value)
|
||||||
|
{
|
||||||
|
GGML_UNUSED(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, std::enable_if_t<sizeof(T) == 2, int> = 0>
|
||||||
|
static inline void ggml_convert_from_le(T * value) {
|
||||||
|
ggml_convert_from_le16(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, std::enable_if_t<sizeof(T) == 4, int> = 0>
|
||||||
|
static inline void ggml_convert_from_le(T * value) {
|
||||||
|
ggml_convert_from_le32(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, std::enable_if_t<sizeof(T) == 8, int> = 0>
|
||||||
|
static inline void ggml_convert_from_le(T * value) {
|
||||||
|
ggml_convert_from_le64(value);
|
||||||
|
}
|
||||||
|
#else // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
|
#error Unexpected or undefined __BYTE_ORDER__
|
||||||
|
#endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
|
|
||||||
|
#define ggml_convert_to_le(x) ggml_convert_from_le(x)
|
||||||
|
|
||||||
// expose GGUF internals for test code
|
// expose GGUF internals for test code
|
||||||
GGML_API size_t gguf_type_size(enum gguf_type type);
|
GGML_API size_t gguf_type_size(enum gguf_type type);
|
||||||
GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
|
GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
|
||||||
|
|
101
ggml/src/ggml.c
101
ggml/src/ggml.c
|
@ -36,35 +36,6 @@
|
||||||
#include <syscall.h>
|
#include <syscall.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__gnu_linux__)
|
|
||||||
#include <endian.h>
|
|
||||||
#else
|
|
||||||
#define le64toh(x) (x)
|
|
||||||
#define le32toh(x) (x)
|
|
||||||
#define le16toh(x) (x)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// endianness conversion
|
|
||||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
|
||||||
#define convert_from_le16(x) UNUSED(x)
|
|
||||||
#define convert_from_le32(x) UNUSED(x)
|
|
||||||
#define convert_from_le64(x) UNUSED(x)
|
|
||||||
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
||||||
static inline void convert_from_le16(void * value) {
|
|
||||||
*((uint16_t*)value) = le16toh(*((uint16_t*)value));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void convert_from_le32(void * value) {
|
|
||||||
*((uint32_t*)value) = le32toh(*((uint32_t*)value));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void convert_from_le64(void * value) {
|
|
||||||
*((uint64_t*)value) = le64toh(*((uint64_t*)value));
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
#error Unexpected or undefined __BYTE_ORDER__
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__APPLE__)
|
#if defined(__APPLE__)
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <mach/mach.h>
|
#include <mach/mach.h>
|
||||||
|
@ -6593,113 +6564,113 @@ bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, cons
|
||||||
static void ggml_byteswap_i16(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_i16(void * restrict buffer, size_t elements) {
|
||||||
uint16_t *data_ptr = (uint16_t*) buffer;
|
uint16_t *data_ptr = (uint16_t*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(data_ptr + i);
|
ggml_convert_from_le16(data_ptr + i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_i32(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_i32(void * restrict buffer, size_t elements) {
|
||||||
uint32_t *data_ptr = (uint32_t*) buffer;
|
uint32_t *data_ptr = (uint32_t*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le32(data_ptr + i);
|
ggml_convert_from_le32(data_ptr + i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_i64(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_i64(void * restrict buffer, size_t elements) {
|
||||||
uint64_t *data_ptr = (uint64_t*) buffer;
|
uint64_t *data_ptr = (uint64_t*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le64(data_ptr + i);
|
ggml_convert_from_le64(data_ptr + i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_q4_0(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_q4_0(void * restrict buffer, size_t elements) {
|
||||||
block_q4_0 *data_ptr = (block_q4_0*) buffer;
|
block_q4_0 *data_ptr = (block_q4_0*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_q4_1(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_q4_1(void * restrict buffer, size_t elements) {
|
||||||
block_q4_1 *data_ptr = (block_q4_1*) buffer;
|
block_q4_1 *data_ptr = (block_q4_1*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
convert_from_le16(&(data_ptr[i].m));
|
ggml_convert_from_le16(&(data_ptr[i].m));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_q5_0(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_q5_0(void * restrict buffer, size_t elements) {
|
||||||
block_q5_0 *data_ptr = (block_q5_0*) buffer;
|
block_q5_0 *data_ptr = (block_q5_0*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_q5_1(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_q5_1(void * restrict buffer, size_t elements) {
|
||||||
block_q5_1 *data_ptr = (block_q5_1*) buffer;
|
block_q5_1 *data_ptr = (block_q5_1*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
convert_from_le16(&(data_ptr[i].m));
|
ggml_convert_from_le16(&(data_ptr[i].m));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_q8_0(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_q8_0(void * restrict buffer, size_t elements) {
|
||||||
block_q8_0 *data_ptr = (block_q8_0*) buffer;
|
block_q8_0 *data_ptr = (block_q8_0*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_q8_1(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_q8_1(void * restrict buffer, size_t elements) {
|
||||||
block_q8_1 *data_ptr = (block_q8_1*) buffer;
|
block_q8_1 *data_ptr = (block_q8_1*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
convert_from_le16(&(data_ptr[i].s));
|
ggml_convert_from_le16(&(data_ptr[i].s));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_q2_k(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_q2_k(void * restrict buffer, size_t elements) {
|
||||||
block_q2_K *data_ptr = (block_q2_K*) buffer;
|
block_q2_K *data_ptr = (block_q2_K*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
convert_from_le16(&(data_ptr[i].dmin));
|
ggml_convert_from_le16(&(data_ptr[i].dmin));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_q3_k(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_q3_k(void * restrict buffer, size_t elements) {
|
||||||
block_q3_K *data_ptr = (block_q3_K*) buffer;
|
block_q3_K *data_ptr = (block_q3_K*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_q4_k(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_q4_k(void * restrict buffer, size_t elements) {
|
||||||
block_q4_K *data_ptr = (block_q4_K*) buffer;
|
block_q4_K *data_ptr = (block_q4_K*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
convert_from_le16(&(data_ptr[i].dmin));
|
ggml_convert_from_le16(&(data_ptr[i].dmin));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_q5_k(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_q5_k(void * restrict buffer, size_t elements) {
|
||||||
block_q5_K *data_ptr = (block_q5_K*) buffer;
|
block_q5_K *data_ptr = (block_q5_K*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
convert_from_le16(&(data_ptr[i].dmin));
|
ggml_convert_from_le16(&(data_ptr[i].dmin));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_q6_k(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_q6_k(void * restrict buffer, size_t elements) {
|
||||||
block_q6_K *data_ptr = (block_q6_K*) buffer;
|
block_q6_K *data_ptr = (block_q6_K*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_iq2_xxs(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_iq2_xxs(void * restrict buffer, size_t elements) {
|
||||||
block_iq2_xxs *data_ptr = (block_iq2_xxs*) buffer;
|
block_iq2_xxs *data_ptr = (block_iq2_xxs*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
for (size_t j = 0; j < QK_K/8; ++j) {
|
for (size_t j = 0; j < QK_K/8; ++j) {
|
||||||
convert_from_le16(&(data_ptr[i].qs[j]));
|
ggml_convert_from_le16(&(data_ptr[i].qs[j]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6707,9 +6678,9 @@ static void ggml_byteswap_iq2_xxs(void * restrict buffer, size_t elements) {
|
||||||
static void ggml_byteswap_iq2_xs(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_iq2_xs(void * restrict buffer, size_t elements) {
|
||||||
block_iq2_xs *data_ptr = (block_iq2_xs*) buffer;
|
block_iq2_xs *data_ptr = (block_iq2_xs*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
for (size_t j = 0; j < QK_K/8; ++j) {
|
for (size_t j = 0; j < QK_K/8; ++j) {
|
||||||
convert_from_le16(&(data_ptr[i].qs[j]));
|
ggml_convert_from_le16(&(data_ptr[i].qs[j]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6717,30 +6688,30 @@ static void ggml_byteswap_iq2_xs(void * restrict buffer, size_t elements) {
|
||||||
static void ggml_byteswap_iq3_xxs(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_iq3_xxs(void * restrict buffer, size_t elements) {
|
||||||
block_iq3_xxs *data_ptr = (block_iq3_xxs*) buffer;
|
block_iq3_xxs *data_ptr = (block_iq3_xxs*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_iq3_s(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_iq3_s(void * restrict buffer, size_t elements) {
|
||||||
block_iq3_s *data_ptr = (block_iq3_s*) buffer;
|
block_iq3_s *data_ptr = (block_iq3_s*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_iq2_s(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_iq2_s(void * restrict buffer, size_t elements) {
|
||||||
block_iq2_s *data_ptr = (block_iq2_s*) buffer;
|
block_iq2_s *data_ptr = (block_iq2_s*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_iq1_s(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_iq1_s(void * restrict buffer, size_t elements) {
|
||||||
block_iq1_s *data_ptr = (block_iq1_s*) buffer;
|
block_iq1_s *data_ptr = (block_iq1_s*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
for (size_t j = 0; j < QK_K/32; ++j) {
|
for (size_t j = 0; j < QK_K/32; ++j) {
|
||||||
convert_from_le16(&(data_ptr[i].qh[j]));
|
ggml_convert_from_le16(&(data_ptr[i].qh[j]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6748,24 +6719,24 @@ static void ggml_byteswap_iq1_s(void * restrict buffer, size_t elements) {
|
||||||
static void ggml_byteswap_iq4_nl(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_iq4_nl(void * restrict buffer, size_t elements) {
|
||||||
block_iq4_nl *data_ptr = (block_iq4_nl*) buffer;
|
block_iq4_nl *data_ptr = (block_iq4_nl*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_iq4_xs(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_iq4_xs(void * restrict buffer, size_t elements) {
|
||||||
block_iq4_xs *data_ptr = (block_iq4_xs*) buffer;
|
block_iq4_xs *data_ptr = (block_iq4_xs*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
convert_from_le16(&(data_ptr[i].scales_h));
|
ggml_convert_from_le16(&(data_ptr[i].scales_h));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_q8_k(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_q8_k(void * restrict buffer, size_t elements) {
|
||||||
block_q8_K *data_ptr = (block_q8_K*) buffer;
|
block_q8_K *data_ptr = (block_q8_K*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le32(&(data_ptr[i].d));
|
ggml_convert_from_le32(&(data_ptr[i].d));
|
||||||
for (size_t j = 0; j < QK_K/16; ++j) {
|
for (size_t j = 0; j < QK_K/16; ++j) {
|
||||||
convert_from_le16(&(data_ptr[i].bsums[j]));
|
ggml_convert_from_le16(&(data_ptr[i].bsums[j]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6791,13 +6762,13 @@ static void ggml_byteswap_q4_0_8x8(void * restrict buffer, size_t elements) {
|
||||||
static void ggml_byteswap_tq1_0(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_tq1_0(void * restrict buffer, size_t elements) {
|
||||||
block_tq1_0 *data_ptr = (block_tq1_0*) buffer;
|
block_tq1_0 *data_ptr = (block_tq1_0*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_byteswap_tq2_0(void * restrict buffer, size_t elements) {
|
static void ggml_byteswap_tq2_0(void * restrict buffer, size_t elements) {
|
||||||
block_tq2_0 *data_ptr = (block_tq2_0*) buffer;
|
block_tq2_0 *data_ptr = (block_tq2_0*) buffer;
|
||||||
for (size_t i = 0; i < elements; ++i) {
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
convert_from_le16(&(data_ptr[i].d));
|
ggml_convert_from_le16(&(data_ptr[i].d));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,43 +15,6 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#if defined(__gnu_linux__)
|
|
||||||
#include <endian.h>
|
|
||||||
#else
|
|
||||||
#define le64toh(x) (x)
|
|
||||||
#define le32toh(x) (x)
|
|
||||||
#define le16toh(x) (x)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// endianness conversion
|
|
||||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
|
||||||
#define convert_from_le(x) (void)(x)
|
|
||||||
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
||||||
#include <type_traits>
|
|
||||||
|
|
||||||
template <typename T, std::enable_if_t<sizeof(T) == 1, int> = 0>
|
|
||||||
static inline void convert_from_le(T * /*value*/)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T, std::enable_if_t<sizeof(T) == 2, int> = 0>
|
|
||||||
static inline void convert_from_le(T * value) {
|
|
||||||
*((uint16_t*)value) = le16toh(*((uint16_t*)value));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T, std::enable_if_t<sizeof(T) == 4, int> = 0>
|
|
||||||
static inline void convert_from_le(T * value) {
|
|
||||||
*((uint32_t*)value) = le32toh(*((uint32_t*)value));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T, std::enable_if_t<sizeof(T) == 8, int> = 0>
|
|
||||||
static inline void convert_from_le(T * value) {
|
|
||||||
*((uint64_t*)value) = le64toh(*((uint64_t*)value));
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
#error Unexpected or undefined __BYTE_ORDER__
|
|
||||||
#endif
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct type_to_gguf_type;
|
struct type_to_gguf_type;
|
||||||
|
|
||||||
|
@ -261,7 +224,7 @@ struct gguf_reader {
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool read(T & dst) const {
|
bool read(T & dst) const {
|
||||||
auto res = fread(&dst, 1, sizeof(dst), file);
|
auto res = fread(&dst, 1, sizeof(dst), file);
|
||||||
convert_from_le(&dst);
|
ggml_convert_from_le(&dst);
|
||||||
return res == sizeof(dst);
|
return res == sizeof(dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,43 +10,6 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#if defined(__gnu_linux__)
|
|
||||||
#include <endian.h>
|
|
||||||
#else
|
|
||||||
#define le64toh(x) (x)
|
|
||||||
#define le32toh(x) (x)
|
|
||||||
#define le16toh(x) (x)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// endianness conversion
|
|
||||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
|
||||||
#define convert_to_le(x)
|
|
||||||
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
||||||
#include <type_traits>
|
|
||||||
|
|
||||||
template <typename T, std::enable_if_t<sizeof(T) == 1, int> = 0>
|
|
||||||
static inline void convert_to_le(T * /*value*/)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T, std::enable_if_t<sizeof(T) == 2, int> = 0>
|
|
||||||
static inline void convert_to_le(T * value) {
|
|
||||||
*((uint16_t*)value) = htole16(*((uint16_t*)value));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T, std::enable_if_t<sizeof(T) == 4, int> = 0>
|
|
||||||
static inline void convert_to_le(T * value) {
|
|
||||||
*((uint32_t*)value) = htole32(*((uint32_t*)value));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T, std::enable_if_t<sizeof(T) == 8, int> = 0>
|
|
||||||
static inline void convert_to_le(T * value) {
|
|
||||||
*((uint64_t*)value) = htole64(*((uint64_t*)value));
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
#error Unexpected or undefined __BYTE_ORDER__
|
|
||||||
#endif
|
|
||||||
|
|
||||||
constexpr int offset_has_kv = 1000;
|
constexpr int offset_has_kv = 1000;
|
||||||
constexpr int offset_has_tensors = 2000;
|
constexpr int offset_has_tensors = 2000;
|
||||||
constexpr int offset_has_data = 3000;
|
constexpr int offset_has_data = 3000;
|
||||||
|
@ -184,7 +147,7 @@ static std::vector<std::pair<enum gguf_type, enum gguf_type>> get_kv_types(std::
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void helper_write(FILE * file, T val) {
|
static void helper_write(FILE * file, T val) {
|
||||||
convert_to_le(&val);
|
ggml_convert_to_le(&val);
|
||||||
GGML_ASSERT(fwrite(&val, 1, sizeof(val), file) == sizeof(val));
|
GGML_ASSERT(fwrite(&val, 1, sizeof(val), file) == sizeof(val));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -578,7 +541,7 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i
|
||||||
case GGUF_TYPE_UINT16:
|
case GGUF_TYPE_UINT16:
|
||||||
case GGUF_TYPE_INT16:
|
case GGUF_TYPE_INT16:
|
||||||
for (size_t j = 0; j < arr_n; ++j) {
|
for (size_t j = 0; j < arr_n; ++j) {
|
||||||
convert_to_le((uint16_t*)(data8 + j * 2));
|
ggml_convert_to_le((uint16_t*)(data8 + j * 2));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -586,7 +549,7 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i
|
||||||
case GGUF_TYPE_INT32:
|
case GGUF_TYPE_INT32:
|
||||||
case GGUF_TYPE_FLOAT32:
|
case GGUF_TYPE_FLOAT32:
|
||||||
for (size_t j = 0; j < arr_n; ++j) {
|
for (size_t j = 0; j < arr_n; ++j) {
|
||||||
convert_to_le((uint32_t*)(data8 + j * 4));
|
ggml_convert_to_le((uint32_t*)(data8 + j * 4));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -594,7 +557,7 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i
|
||||||
case GGUF_TYPE_INT64:
|
case GGUF_TYPE_INT64:
|
||||||
case GGUF_TYPE_FLOAT64:
|
case GGUF_TYPE_FLOAT64:
|
||||||
for (size_t j = 0; j < arr_n; ++j) {
|
for (size_t j = 0; j < arr_n; ++j) {
|
||||||
convert_to_le((uint64_t*)(data8 + j * 8));
|
ggml_convert_to_le((uint64_t*)(data8 + j * 8));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -619,19 +582,19 @@ static bool handcrafted_check_kv(const gguf_context * gguf_ctx, const unsigned i
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case GGUF_TYPE_UINT16:
|
case GGUF_TYPE_UINT16:
|
||||||
case GGUF_TYPE_INT16:
|
case GGUF_TYPE_INT16:
|
||||||
convert_to_le((uint16_t*)(data8));
|
ggml_convert_to_le((uint16_t*)(data8));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case GGUF_TYPE_UINT32:
|
case GGUF_TYPE_UINT32:
|
||||||
case GGUF_TYPE_INT32:
|
case GGUF_TYPE_INT32:
|
||||||
case GGUF_TYPE_FLOAT32:
|
case GGUF_TYPE_FLOAT32:
|
||||||
convert_to_le((uint32_t*)(data8));
|
ggml_convert_to_le((uint32_t*)(data8));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case GGUF_TYPE_UINT64:
|
case GGUF_TYPE_UINT64:
|
||||||
case GGUF_TYPE_INT64:
|
case GGUF_TYPE_INT64:
|
||||||
case GGUF_TYPE_FLOAT64:
|
case GGUF_TYPE_FLOAT64:
|
||||||
convert_to_le((uint64_t*)(data8));
|
ggml_convert_to_le((uint64_t*)(data8));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue