Load little-endian models on s390x
Introduce byteswap function for ggml data. Implement some of them. Currently tested on llama3.2.
This commit is contained in:
parent
96f4053934
commit
9a4b0df5e8
4 changed files with 332 additions and 1 deletions
|
@ -2144,6 +2144,7 @@ extern "C" {
|
||||||
#endif
|
#endif
|
||||||
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||||
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||||
|
typedef void (*ggml_byteswap_t) ( void * GGML_RESTRICT buffer, size_t elements);
|
||||||
|
|
||||||
struct ggml_type_traits {
|
struct ggml_type_traits {
|
||||||
const char * type_name;
|
const char * type_name;
|
||||||
|
@ -2153,6 +2154,7 @@ extern "C" {
|
||||||
bool is_quantized;
|
bool is_quantized;
|
||||||
ggml_to_float_t to_float;
|
ggml_to_float_t to_float;
|
||||||
ggml_from_float_t from_float_ref;
|
ggml_from_float_t from_float_ref;
|
||||||
|
ggml_byteswap_t byteswap;
|
||||||
};
|
};
|
||||||
|
|
||||||
GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
|
GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
|
||||||
|
|
265
ggml/src/ggml.c
265
ggml/src/ggml.c
|
@ -36,6 +36,44 @@
|
||||||
#include <syscall.h>
|
#include <syscall.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__gnu_linux__)
|
||||||
|
#include <endian.h>
|
||||||
|
#else
|
||||||
|
#define le64toh(x) (x)
|
||||||
|
#define le32toh(x) (x)
|
||||||
|
#define le16toh(x) (x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// endianness conversion
|
||||||
|
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||||
|
#define convert_from_le16(x)
|
||||||
|
#define convert_from_le32(x)
|
||||||
|
#define convert_from_le64(x)
|
||||||
|
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
|
static inline void convert_from_le16(void * value) {
|
||||||
|
uint16_t temp;
|
||||||
|
memcpy(&temp, value, sizeof(uint16_t));
|
||||||
|
temp = le16toh(temp);
|
||||||
|
memcpy(value, &temp, sizeof(uint16_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void convert_from_le32(void * value) {
|
||||||
|
uint32_t temp;
|
||||||
|
memcpy(&temp, value, sizeof(uint32_t));
|
||||||
|
temp = le32toh(temp);
|
||||||
|
memcpy(value, &temp, sizeof(uint32_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void convert_from_le64(void * value) {
|
||||||
|
uint64_t temp;
|
||||||
|
memcpy(&temp, value, sizeof(uint64_t));
|
||||||
|
temp = le64toh(temp);
|
||||||
|
memcpy(value, &temp, sizeof(uint64_t));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#error Unexpected or undefined __BYTE_ORDER__
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(__APPLE__)
|
#if defined(__APPLE__)
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <mach/mach.h>
|
#include <mach/mach.h>
|
||||||
|
@ -561,6 +599,34 @@ static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float *
|
||||||
static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc);
|
static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc);
|
||||||
static void ggml_vec_dot_bf16(int n, float * restrict s, size_t bs, ggml_bf16_t * restrict x, size_t bx, ggml_bf16_t * restrict y, size_t by, int nrc);
|
static void ggml_vec_dot_bf16(int n, float * restrict s, size_t bs, ggml_bf16_t * restrict x, size_t bx, ggml_bf16_t * restrict y, size_t by, int nrc);
|
||||||
|
|
||||||
|
static void ggml_byteswap_i16 (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_i32 (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_i64 (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_q4_0 (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_q4_1 (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_q5_0 (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_q5_1 (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_q8_0 (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_q8_1 (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_q2_k (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_q3_k (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_q4_k (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_q5_k (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_q6_k (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_iq2_xxs (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_iq2_xs (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_iq3_xxs (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_iq3_s (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_iq2_s (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_iq1_s (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_iq1_m (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_iq4_nl (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_iq4_xs (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_q8_k (void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_q4_0_4x4(void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_q4_0_4x8(void * restrict buffer, size_t elements);
|
||||||
|
static void ggml_byteswap_q4_0_8x8(void * restrict buffer, size_t elements);
|
||||||
|
|
||||||
static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
[GGML_TYPE_I8] = {
|
[GGML_TYPE_I8] = {
|
||||||
.type_name = "i8",
|
.type_name = "i8",
|
||||||
|
@ -573,30 +639,35 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.blck_size = 1,
|
.blck_size = 1,
|
||||||
.type_size = sizeof(int16_t),
|
.type_size = sizeof(int16_t),
|
||||||
.is_quantized = false,
|
.is_quantized = false,
|
||||||
|
.byteswap = ggml_byteswap_i16,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_I32] = {
|
[GGML_TYPE_I32] = {
|
||||||
.type_name = "i32",
|
.type_name = "i32",
|
||||||
.blck_size = 1,
|
.blck_size = 1,
|
||||||
.type_size = sizeof(int32_t),
|
.type_size = sizeof(int32_t),
|
||||||
.is_quantized = false,
|
.is_quantized = false,
|
||||||
|
.byteswap = ggml_byteswap_i32,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_I64] = {
|
[GGML_TYPE_I64] = {
|
||||||
.type_name = "i64",
|
.type_name = "i64",
|
||||||
.blck_size = 1,
|
.blck_size = 1,
|
||||||
.type_size = sizeof(int64_t),
|
.type_size = sizeof(int64_t),
|
||||||
.is_quantized = false,
|
.is_quantized = false,
|
||||||
|
.byteswap = ggml_byteswap_i64,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_F64] = {
|
[GGML_TYPE_F64] = {
|
||||||
.type_name = "f64",
|
.type_name = "f64",
|
||||||
.blck_size = 1,
|
.blck_size = 1,
|
||||||
.type_size = sizeof(double),
|
.type_size = sizeof(double),
|
||||||
.is_quantized = false,
|
.is_quantized = false,
|
||||||
|
.byteswap = ggml_byteswap_i64,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_F32] = {
|
[GGML_TYPE_F32] = {
|
||||||
.type_name = "f32",
|
.type_name = "f32",
|
||||||
.blck_size = 1,
|
.blck_size = 1,
|
||||||
.type_size = sizeof(float),
|
.type_size = sizeof(float),
|
||||||
.is_quantized = false,
|
.is_quantized = false,
|
||||||
|
.byteswap = ggml_byteswap_i32,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_F16] = {
|
[GGML_TYPE_F16] = {
|
||||||
.type_name = "f16",
|
.type_name = "f16",
|
||||||
|
@ -605,6 +676,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = false,
|
.is_quantized = false,
|
||||||
.to_float = (ggml_to_float_t) ggml_fp16_to_fp32_row,
|
.to_float = (ggml_to_float_t) ggml_fp16_to_fp32_row,
|
||||||
.from_float_ref = (ggml_from_float_t) ggml_fp32_to_fp16_row,
|
.from_float_ref = (ggml_from_float_t) ggml_fp32_to_fp16_row,
|
||||||
|
.byteswap = ggml_byteswap_i16,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_Q4_0] = {
|
[GGML_TYPE_Q4_0] = {
|
||||||
.type_name = "q4_0",
|
.type_name = "q4_0",
|
||||||
|
@ -613,6 +685,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q4_0,
|
.to_float = (ggml_to_float_t) dequantize_row_q4_0,
|
||||||
.from_float_ref = (ggml_from_float_t) quantize_row_q4_0_ref,
|
.from_float_ref = (ggml_from_float_t) quantize_row_q4_0_ref,
|
||||||
|
.byteswap = ggml_byteswap_q4_0,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_Q4_1] = {
|
[GGML_TYPE_Q4_1] = {
|
||||||
.type_name = "q4_1",
|
.type_name = "q4_1",
|
||||||
|
@ -621,6 +694,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q4_1,
|
.to_float = (ggml_to_float_t) dequantize_row_q4_1,
|
||||||
.from_float_ref = (ggml_from_float_t) quantize_row_q4_1_ref,
|
.from_float_ref = (ggml_from_float_t) quantize_row_q4_1_ref,
|
||||||
|
.byteswap = ggml_byteswap_q4_1,
|
||||||
},
|
},
|
||||||
[4] = { // GGML_TYPE_Q4_2
|
[4] = { // GGML_TYPE_Q4_2
|
||||||
.type_name = "DEPRECATED",
|
.type_name = "DEPRECATED",
|
||||||
|
@ -641,6 +715,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q5_0,
|
.to_float = (ggml_to_float_t) dequantize_row_q5_0,
|
||||||
.from_float_ref = (ggml_from_float_t) quantize_row_q5_0_ref,
|
.from_float_ref = (ggml_from_float_t) quantize_row_q5_0_ref,
|
||||||
|
.byteswap = ggml_byteswap_q5_0,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_Q5_1] = {
|
[GGML_TYPE_Q5_1] = {
|
||||||
.type_name = "q5_1",
|
.type_name = "q5_1",
|
||||||
|
@ -649,6 +724,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q5_1,
|
.to_float = (ggml_to_float_t) dequantize_row_q5_1,
|
||||||
.from_float_ref = (ggml_from_float_t) quantize_row_q5_1_ref,
|
.from_float_ref = (ggml_from_float_t) quantize_row_q5_1_ref,
|
||||||
|
.byteswap = ggml_byteswap_q5_1,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_Q8_0] = {
|
[GGML_TYPE_Q8_0] = {
|
||||||
.type_name = "q8_0",
|
.type_name = "q8_0",
|
||||||
|
@ -657,6 +733,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q8_0,
|
.to_float = (ggml_to_float_t) dequantize_row_q8_0,
|
||||||
.from_float_ref = (ggml_from_float_t) quantize_row_q8_0_ref,
|
.from_float_ref = (ggml_from_float_t) quantize_row_q8_0_ref,
|
||||||
|
.byteswap = ggml_byteswap_q8_0,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_Q8_1] = {
|
[GGML_TYPE_Q8_1] = {
|
||||||
.type_name = "q8_1",
|
.type_name = "q8_1",
|
||||||
|
@ -664,6 +741,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.type_size = sizeof(block_q8_1),
|
.type_size = sizeof(block_q8_1),
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.from_float_ref = (ggml_from_float_t) quantize_row_q8_1_ref,
|
.from_float_ref = (ggml_from_float_t) quantize_row_q8_1_ref,
|
||||||
|
.byteswap = ggml_byteswap_q8_1,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_Q2_K] = {
|
[GGML_TYPE_Q2_K] = {
|
||||||
.type_name = "q2_K",
|
.type_name = "q2_K",
|
||||||
|
@ -672,6 +750,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q2_K,
|
.to_float = (ggml_to_float_t) dequantize_row_q2_K,
|
||||||
.from_float_ref = (ggml_from_float_t) quantize_row_q2_K_ref,
|
.from_float_ref = (ggml_from_float_t) quantize_row_q2_K_ref,
|
||||||
|
.byteswap = ggml_byteswap_q2_k,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_Q3_K] = {
|
[GGML_TYPE_Q3_K] = {
|
||||||
.type_name = "q3_K",
|
.type_name = "q3_K",
|
||||||
|
@ -680,6 +759,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q3_K,
|
.to_float = (ggml_to_float_t) dequantize_row_q3_K,
|
||||||
.from_float_ref = (ggml_from_float_t) quantize_row_q3_K_ref,
|
.from_float_ref = (ggml_from_float_t) quantize_row_q3_K_ref,
|
||||||
|
.byteswap = ggml_byteswap_q3_k,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_Q4_K] = {
|
[GGML_TYPE_Q4_K] = {
|
||||||
.type_name = "q4_K",
|
.type_name = "q4_K",
|
||||||
|
@ -688,6 +768,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q4_K,
|
.to_float = (ggml_to_float_t) dequantize_row_q4_K,
|
||||||
.from_float_ref = (ggml_from_float_t) quantize_row_q4_K_ref,
|
.from_float_ref = (ggml_from_float_t) quantize_row_q4_K_ref,
|
||||||
|
.byteswap = ggml_byteswap_q4_k,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_Q5_K] = {
|
[GGML_TYPE_Q5_K] = {
|
||||||
.type_name = "q5_K",
|
.type_name = "q5_K",
|
||||||
|
@ -696,6 +777,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q5_K,
|
.to_float = (ggml_to_float_t) dequantize_row_q5_K,
|
||||||
.from_float_ref = (ggml_from_float_t) quantize_row_q5_K_ref,
|
.from_float_ref = (ggml_from_float_t) quantize_row_q5_K_ref,
|
||||||
|
.byteswap = ggml_byteswap_q5_k,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_Q6_K] = {
|
[GGML_TYPE_Q6_K] = {
|
||||||
.type_name = "q6_K",
|
.type_name = "q6_K",
|
||||||
|
@ -704,6 +786,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q6_K,
|
.to_float = (ggml_to_float_t) dequantize_row_q6_K,
|
||||||
.from_float_ref = (ggml_from_float_t) quantize_row_q6_K_ref,
|
.from_float_ref = (ggml_from_float_t) quantize_row_q6_K_ref,
|
||||||
|
.byteswap = ggml_byteswap_q6_k,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_IQ2_XXS] = {
|
[GGML_TYPE_IQ2_XXS] = {
|
||||||
.type_name = "iq2_xxs",
|
.type_name = "iq2_xxs",
|
||||||
|
@ -712,6 +795,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_iq2_xxs,
|
.to_float = (ggml_to_float_t) dequantize_row_iq2_xxs,
|
||||||
.from_float_ref = NULL,
|
.from_float_ref = NULL,
|
||||||
|
.byteswap = ggml_byteswap_iq2_xxs,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_IQ2_XS] = {
|
[GGML_TYPE_IQ2_XS] = {
|
||||||
.type_name = "iq2_xs",
|
.type_name = "iq2_xs",
|
||||||
|
@ -720,6 +804,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_iq2_xs,
|
.to_float = (ggml_to_float_t) dequantize_row_iq2_xs,
|
||||||
.from_float_ref = NULL,
|
.from_float_ref = NULL,
|
||||||
|
.byteswap = ggml_byteswap_iq2_xs,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_IQ3_XXS] = {
|
[GGML_TYPE_IQ3_XXS] = {
|
||||||
.type_name = "iq3_xxs",
|
.type_name = "iq3_xxs",
|
||||||
|
@ -728,6 +813,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_iq3_xxs,
|
.to_float = (ggml_to_float_t) dequantize_row_iq3_xxs,
|
||||||
.from_float_ref = (ggml_from_float_t)quantize_row_iq3_xxs_ref,
|
.from_float_ref = (ggml_from_float_t)quantize_row_iq3_xxs_ref,
|
||||||
|
.byteswap = ggml_byteswap_iq3_xxs,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_IQ3_S] = {
|
[GGML_TYPE_IQ3_S] = {
|
||||||
.type_name = "iq3_s",
|
.type_name = "iq3_s",
|
||||||
|
@ -736,6 +822,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_iq3_s,
|
.to_float = (ggml_to_float_t) dequantize_row_iq3_s,
|
||||||
.from_float_ref = (ggml_from_float_t)quantize_row_iq3_s_ref,
|
.from_float_ref = (ggml_from_float_t)quantize_row_iq3_s_ref,
|
||||||
|
.byteswap = ggml_byteswap_iq3_s,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_IQ2_S] = {
|
[GGML_TYPE_IQ2_S] = {
|
||||||
.type_name = "iq2_s",
|
.type_name = "iq2_s",
|
||||||
|
@ -744,6 +831,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_iq2_s,
|
.to_float = (ggml_to_float_t) dequantize_row_iq2_s,
|
||||||
.from_float_ref = (ggml_from_float_t)quantize_row_iq2_s_ref,
|
.from_float_ref = (ggml_from_float_t)quantize_row_iq2_s_ref,
|
||||||
|
.byteswap = ggml_byteswap_iq2_s,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_IQ1_S] = {
|
[GGML_TYPE_IQ1_S] = {
|
||||||
.type_name = "iq1_s",
|
.type_name = "iq1_s",
|
||||||
|
@ -752,6 +840,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_iq1_s,
|
.to_float = (ggml_to_float_t) dequantize_row_iq1_s,
|
||||||
.from_float_ref = NULL,
|
.from_float_ref = NULL,
|
||||||
|
.byteswap = ggml_byteswap_iq1_s,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_IQ1_M] = {
|
[GGML_TYPE_IQ1_M] = {
|
||||||
.type_name = "iq1_m",
|
.type_name = "iq1_m",
|
||||||
|
@ -760,6 +849,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_iq1_m,
|
.to_float = (ggml_to_float_t) dequantize_row_iq1_m,
|
||||||
.from_float_ref = NULL,
|
.from_float_ref = NULL,
|
||||||
|
.byteswap = ggml_byteswap_iq1_m,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_IQ4_NL] = {
|
[GGML_TYPE_IQ4_NL] = {
|
||||||
.type_name = "iq4_nl",
|
.type_name = "iq4_nl",
|
||||||
|
@ -768,6 +858,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_iq4_nl,
|
.to_float = (ggml_to_float_t) dequantize_row_iq4_nl,
|
||||||
.from_float_ref = (ggml_from_float_t)quantize_row_iq4_nl_ref,
|
.from_float_ref = (ggml_from_float_t)quantize_row_iq4_nl_ref,
|
||||||
|
.byteswap = ggml_byteswap_iq4_nl,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_IQ4_XS] = {
|
[GGML_TYPE_IQ4_XS] = {
|
||||||
.type_name = "iq4_xs",
|
.type_name = "iq4_xs",
|
||||||
|
@ -776,12 +867,14 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_iq4_xs,
|
.to_float = (ggml_to_float_t) dequantize_row_iq4_xs,
|
||||||
.from_float_ref = (ggml_from_float_t)quantize_row_iq4_xs_ref,
|
.from_float_ref = (ggml_from_float_t)quantize_row_iq4_xs_ref,
|
||||||
|
.byteswap = ggml_byteswap_iq4_xs,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_Q8_K] = {
|
[GGML_TYPE_Q8_K] = {
|
||||||
.type_name = "q8_K",
|
.type_name = "q8_K",
|
||||||
.blck_size = QK_K,
|
.blck_size = QK_K,
|
||||||
.type_size = sizeof(block_q8_K),
|
.type_size = sizeof(block_q8_K),
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
|
.byteswap = ggml_byteswap_q8_k,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_BF16] = {
|
[GGML_TYPE_BF16] = {
|
||||||
.type_name = "bf16",
|
.type_name = "bf16",
|
||||||
|
@ -790,24 +883,28 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||||
.is_quantized = false,
|
.is_quantized = false,
|
||||||
.to_float = (ggml_to_float_t) ggml_bf16_to_fp32_row,
|
.to_float = (ggml_to_float_t) ggml_bf16_to_fp32_row,
|
||||||
.from_float_ref = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref,
|
.from_float_ref = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref,
|
||||||
|
.byteswap = ggml_byteswap_i16,
|
||||||
},
|
},
|
||||||
[31] = { // GGML_TYPE_Q4_0_4_4
|
[31] = { // GGML_TYPE_Q4_0_4_4
|
||||||
.type_name = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking",
|
.type_name = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking",
|
||||||
.blck_size = 0,
|
.blck_size = 0,
|
||||||
.type_size = 0,
|
.type_size = 0,
|
||||||
.is_quantized = false,
|
.is_quantized = false,
|
||||||
|
.byteswap = ggml_byteswap_q4_0_4x4,
|
||||||
},
|
},
|
||||||
[32] = { // GGML_TYPE_Q4_0_4_8
|
[32] = { // GGML_TYPE_Q4_0_4_8
|
||||||
.type_name = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking",
|
.type_name = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking",
|
||||||
.blck_size = 0,
|
.blck_size = 0,
|
||||||
.type_size = 0,
|
.type_size = 0,
|
||||||
.is_quantized = false,
|
.is_quantized = false,
|
||||||
|
.byteswap = ggml_byteswap_q4_0_4x8,
|
||||||
},
|
},
|
||||||
[33] = { // GGML_TYPE_Q4_0_8_8
|
[33] = { // GGML_TYPE_Q4_0_8_8
|
||||||
.type_name = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking",
|
.type_name = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking",
|
||||||
.blck_size = 0,
|
.blck_size = 0,
|
||||||
.type_size = 0,
|
.type_size = 0,
|
||||||
.is_quantized = false,
|
.is_quantized = false,
|
||||||
|
.byteswap = ggml_byteswap_q4_0_8x8,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_TQ1_0] = {
|
[GGML_TYPE_TQ1_0] = {
|
||||||
.type_name = "tq1_0",
|
.type_name = "tq1_0",
|
||||||
|
@ -6499,3 +6596,171 @@ bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, cons
|
||||||
if (p0->strict_cpu != p1->strict_cpu ) return false;
|
if (p0->strict_cpu != p1->strict_cpu ) return false;
|
||||||
return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
|
return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_i16(void * restrict buffer, size_t elements) {
|
||||||
|
uint16_t *data_ptr = (uint16_t*) buffer;
|
||||||
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
|
convert_from_le16(data_ptr + i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_i32(void * restrict buffer, size_t elements) {
|
||||||
|
uint32_t *data_ptr = (uint32_t*) buffer;
|
||||||
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
|
convert_from_le32(data_ptr + i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_i64(void * restrict buffer, size_t elements) {
|
||||||
|
uint64_t *data_ptr = (uint64_t*) buffer;
|
||||||
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
|
convert_from_le64(data_ptr + i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_q4_0(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_q4_1(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_q5_0(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_q5_1(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_q8_0(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_q8_1(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_q2_k(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_q3_k(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_q4_k(void * restrict buffer, size_t elements) {
|
||||||
|
block_q4_K *data_ptr = (block_q4_K*) buffer;
|
||||||
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
|
convert_from_le16(&(data_ptr[i].d));
|
||||||
|
convert_from_le16(&(data_ptr[i].dmin));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_q5_k(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_q6_k(void * restrict buffer, size_t elements) {
|
||||||
|
block_q6_K *data_ptr = (block_q6_K*) buffer;
|
||||||
|
for (size_t i = 0; i < elements; ++i) {
|
||||||
|
convert_from_le16(&(data_ptr[i].d));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_iq2_xxs(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_iq2_xs(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_iq3_xxs(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_iq3_s(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_iq2_s(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_iq1_s(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_iq1_m(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_iq4_nl(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_iq4_xs(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_q8_k(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_q4_0_4x4(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_q4_0_4x8(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_byteswap_q4_0_8x8(void * restrict buffer, size_t elements) {
|
||||||
|
GGML_ASSERT(false && "byteswap function not implemented yet");
|
||||||
|
UNUSED(buffer);
|
||||||
|
UNUSED(elements);
|
||||||
|
}
|
||||||
|
|
|
@ -15,6 +15,52 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#if defined(__gnu_linux__)
|
||||||
|
#include <endian.h>
|
||||||
|
#else
|
||||||
|
#define le64toh(x) (x)
|
||||||
|
#define le32toh(x) (x)
|
||||||
|
#define le16toh(x) (x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// endianness conversion
|
||||||
|
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||||
|
#define convert_from_le(x)
|
||||||
|
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
template <typename T, std::enable_if_t<sizeof(T) == 1, int> = 0>
|
||||||
|
static inline void convert_from_le(T * /*value*/)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, std::enable_if_t<sizeof(T) == 2, int> = 0>
|
||||||
|
static inline void convert_from_le(T * value) {
|
||||||
|
uint16_t temp;
|
||||||
|
memcpy(&temp, value, sizeof(uint16_t));
|
||||||
|
temp = le16toh(temp);
|
||||||
|
memcpy(value, &temp, sizeof(uint16_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, std::enable_if_t<sizeof(T) == 4, int> = 0>
|
||||||
|
static inline void convert_from_le(T * value) {
|
||||||
|
uint32_t temp;
|
||||||
|
memcpy(&temp, value, sizeof(uint32_t));
|
||||||
|
temp = le32toh(temp);
|
||||||
|
memcpy(value, &temp, sizeof(uint32_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, std::enable_if_t<sizeof(T) == 8, int> = 0>
|
||||||
|
static inline void convert_from_le(T * value) {
|
||||||
|
uint64_t temp;
|
||||||
|
memcpy(&temp, value, sizeof(uint64_t));
|
||||||
|
temp = le64toh(temp);
|
||||||
|
memcpy(value, &temp, sizeof(uint64_t));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#error Unexpected or undefined __BYTE_ORDER__
|
||||||
|
#endif
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct type_to_gguf_type;
|
struct type_to_gguf_type;
|
||||||
|
|
||||||
|
@ -223,7 +269,9 @@ struct gguf_reader {
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool read(T & dst) const {
|
bool read(T & dst) const {
|
||||||
return fread(&dst, 1, sizeof(dst), file) == sizeof(dst);
|
auto res = fread(&dst, 1, sizeof(dst), file);
|
||||||
|
convert_from_le(&dst);
|
||||||
|
return res == sizeof(dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
|
|
@ -1024,6 +1024,14 @@ bool llama_model_loader::load_all_data(
|
||||||
if (ggml_backend_buffer_is_host(cur->buffer)) {
|
if (ggml_backend_buffer_is_host(cur->buffer)) {
|
||||||
file->seek(weight->offs, SEEK_SET);
|
file->seek(weight->offs, SEEK_SET);
|
||||||
file->read_raw(cur->data, n_size);
|
file->read_raw(cur->data, n_size);
|
||||||
|
|
||||||
|
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
|
auto byteswap = ggml_get_type_traits(cur->type)->byteswap;
|
||||||
|
if (byteswap != nullptr) {
|
||||||
|
byteswap(cur->data, ggml_nelements(cur) / ggml_blck_size(cur->type));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (check_tensors) {
|
if (check_tensors) {
|
||||||
validation_result.emplace_back(std::async(std::launch::async, [cur, n_size] {
|
validation_result.emplace_back(std::async(std::launch::async, [cur, n_size] {
|
||||||
return std::make_pair(cur, ggml_validate_row_data(cur->type, cur->data, n_size));
|
return std::make_pair(cur, ggml_validate_row_data(cur->type, cur->data, n_size));
|
||||||
|
@ -1052,6 +1060,14 @@ bool llama_model_loader::load_all_data(
|
||||||
read_buf.resize(n_size);
|
read_buf.resize(n_size);
|
||||||
file->seek(weight->offs, SEEK_SET);
|
file->seek(weight->offs, SEEK_SET);
|
||||||
file->read_raw(read_buf.data(), n_size);
|
file->read_raw(read_buf.data(), n_size);
|
||||||
|
|
||||||
|
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
|
auto byteswap = ggml_get_type_traits(cur->type)->byteswap;
|
||||||
|
if (byteswap != nullptr) {
|
||||||
|
byteswap(read_buf.data(), read_buf.size() / ggml_blck_size(cur->type));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
ggml_backend_tensor_set(cur, read_buf.data(), 0, n_size);
|
ggml_backend_tensor_set(cur, read_buf.data(), 0, n_size);
|
||||||
if (check_tensors && !ggml_validate_row_data(cur->type, read_buf.data(), n_size)) {
|
if (check_tensors && !ggml_validate_row_data(cur->type, read_buf.data(), n_size)) {
|
||||||
throw std::runtime_error(format("tensor '%s' has invalid data", ggml_get_name(cur)));
|
throw std::runtime_error(format("tensor '%s' has invalid data", ggml_get_name(cur)));
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue