diff
This commit is contained in:
parent
3a916678e3
commit
f1558ab38f
2 changed files with 210 additions and 189 deletions
|
@ -718,7 +718,7 @@ add_library(llama
|
||||||
)
|
)
|
||||||
|
|
||||||
target_include_directories(llama PUBLIC .)
|
target_include_directories(llama PUBLIC .)
|
||||||
target_compile_features(llama PUBLIC cxx_std_11) # don't bump
|
target_compile_features(llama PUBLIC cxx_std_20) # don't bump
|
||||||
target_link_libraries(llama PRIVATE
|
target_link_libraries(llama PRIVATE
|
||||||
ggml
|
ggml
|
||||||
${LLAMA_EXTRA_LIBS}
|
${LLAMA_EXTRA_LIBS}
|
||||||
|
|
397
ggml.cpp
397
ggml.cpp
|
@ -422,193 +422,8 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
|
||||||
static void ggml_vec_dot_f32(const int n, float * __restrict__ s, const float * __restrict__ x, const float * __restrict__ y);
|
static void ggml_vec_dot_f32(const int n, float * __restrict__ s, const float * __restrict__ x, const float * __restrict__ y);
|
||||||
static void ggml_vec_dot_f16(const int n, float * __restrict__ s, ggml_fp16_t * __restrict__ x, ggml_fp16_t * __restrict__ y);
|
static void ggml_vec_dot_f16(const int n, float * __restrict__ s, ggml_fp16_t * __restrict__ x, ggml_fp16_t * __restrict__ y);
|
||||||
|
|
||||||
static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
static ggml_type_traits_t type_traits[GGML_TYPE_COUNT];
|
||||||
// [GGML_TYPE_I8] = {
|
|
||||||
// .type_name = "i8",
|
|
||||||
// .blck_size = 1,
|
|
||||||
// .type_size = sizeof(int8_t),
|
|
||||||
// .is_quantized = false,
|
|
||||||
// },
|
|
||||||
// [GGML_TYPE_I16] = {
|
|
||||||
// .type_name = "i16",
|
|
||||||
// .blck_size = 1,
|
|
||||||
// .type_size = sizeof(int16_t),
|
|
||||||
// .is_quantized = false,
|
|
||||||
// },
|
|
||||||
// [GGML_TYPE_I32] = {
|
|
||||||
// .type_name = "i32",
|
|
||||||
// .blck_size = 1,
|
|
||||||
// .type_size = sizeof(int32_t),
|
|
||||||
// .is_quantized = false,
|
|
||||||
// },
|
|
||||||
// [GGML_TYPE_F32] = {
|
|
||||||
// .type_name = "f32",
|
|
||||||
// .blck_size = 1,
|
|
||||||
// .type_size = sizeof(float),
|
|
||||||
// .is_quantized = false,
|
|
||||||
// .vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f32,
|
|
||||||
// .vec_dot_type = GGML_TYPE_F32,
|
|
||||||
// },
|
|
||||||
// [GGML_TYPE_F16] = {
|
|
||||||
// .type_name = "f16",
|
|
||||||
// .blck_size = 1,
|
|
||||||
// .type_size = sizeof(ggml_fp16_t),
|
|
||||||
// .is_quantized = false,
|
|
||||||
// .to_float = (ggml_to_float_t) ggml_fp16_to_fp32_row,
|
|
||||||
// .from_float = (ggml_from_float_t) ggml_fp32_to_fp16_row,
|
|
||||||
// .from_float_reference = (ggml_from_float_t) ggml_fp32_to_fp16_row,
|
|
||||||
// .vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f16,
|
|
||||||
// .vec_dot_type = GGML_TYPE_F16,
|
|
||||||
// },
|
|
||||||
// [GGML_TYPE_Q4_0] = {
|
|
||||||
// .type_name = "q4_0",
|
|
||||||
// .blck_size = QK4_0,
|
|
||||||
// .type_size = sizeof(block_q4_0),
|
|
||||||
// .is_quantized = true,
|
|
||||||
// .to_float = (ggml_to_float_t) dequantize_row_q4_0,
|
|
||||||
// .from_float = quantize_row_q4_0,
|
|
||||||
// .from_float_reference = (ggml_from_float_t) quantize_row_q4_0_reference,
|
|
||||||
// .vec_dot = ggml_vec_dot_q4_0_q8_0,
|
|
||||||
// .vec_dot_type = GGML_TYPE_Q8_0,
|
|
||||||
// },
|
|
||||||
// [GGML_TYPE_Q4_1] = {
|
|
||||||
// .type_name = "q4_1",
|
|
||||||
// .blck_size = QK4_1,
|
|
||||||
// .type_size = sizeof(block_q4_1),
|
|
||||||
// .is_quantized = true,
|
|
||||||
// .to_float = (ggml_to_float_t) dequantize_row_q4_1,
|
|
||||||
// .from_float = quantize_row_q4_1,
|
|
||||||
// .from_float_reference = (ggml_from_float_t) quantize_row_q4_1_reference,
|
|
||||||
// .vec_dot = ggml_vec_dot_q4_1_q8_1,
|
|
||||||
// .vec_dot_type = GGML_TYPE_Q8_1,
|
|
||||||
// },
|
|
||||||
// [4] = { // GGML_TYPE_Q4_2
|
|
||||||
// .type_name = "DEPRECATED",
|
|
||||||
// .blck_size = 0,
|
|
||||||
// .type_size = 0,
|
|
||||||
// .is_quantized = false,
|
|
||||||
// .to_float = NULL,
|
|
||||||
// .from_float = NULL,
|
|
||||||
// .from_float_reference = NULL,
|
|
||||||
// .vec_dot = NULL,
|
|
||||||
// .vec_dot_type = GGML_TYPE_COUNT,
|
|
||||||
// },
|
|
||||||
// [5] = { // GGML_TYPE_Q4_3
|
|
||||||
// .type_name = "DEPRECATED",
|
|
||||||
// .blck_size = 0,
|
|
||||||
// .type_size = 0,
|
|
||||||
// .is_quantized = false,
|
|
||||||
// .to_float = NULL,
|
|
||||||
// .from_float = NULL,
|
|
||||||
// .from_float_reference = NULL,
|
|
||||||
// .vec_dot = NULL,
|
|
||||||
// .vec_dot_type = GGML_TYPE_COUNT,
|
|
||||||
// },
|
|
||||||
// [GGML_TYPE_Q5_0] = {
|
|
||||||
// .type_name = "q5_0",
|
|
||||||
// .blck_size = QK5_0,
|
|
||||||
// .type_size = sizeof(block_q5_0),
|
|
||||||
// .is_quantized = true,
|
|
||||||
// .to_float = (ggml_to_float_t) dequantize_row_q5_0,
|
|
||||||
// .from_float = quantize_row_q5_0,
|
|
||||||
// .from_float_reference = (ggml_from_float_t) quantize_row_q5_0_reference,
|
|
||||||
// .vec_dot = ggml_vec_dot_q5_0_q8_0,
|
|
||||||
// .vec_dot_type = GGML_TYPE_Q8_0,
|
|
||||||
// },
|
|
||||||
// [GGML_TYPE_Q5_1] = {
|
|
||||||
// .type_name = "q5_1",
|
|
||||||
// .blck_size = QK5_1,
|
|
||||||
// .type_size = sizeof(block_q5_1),
|
|
||||||
// .is_quantized = true,
|
|
||||||
// .to_float = (ggml_to_float_t) dequantize_row_q5_1,
|
|
||||||
// .from_float = quantize_row_q5_1,
|
|
||||||
// .from_float_reference = (ggml_from_float_t) quantize_row_q5_1_reference,
|
|
||||||
// .vec_dot = ggml_vec_dot_q5_1_q8_1,
|
|
||||||
// .vec_dot_type = GGML_TYPE_Q8_1,
|
|
||||||
// },
|
|
||||||
// [GGML_TYPE_Q8_0] = {
|
|
||||||
// .type_name = "q8_0",
|
|
||||||
// .blck_size = QK8_0,
|
|
||||||
// .type_size = sizeof(block_q8_0),
|
|
||||||
// .is_quantized = true,
|
|
||||||
// .to_float = (ggml_to_float_t) dequantize_row_q8_0,
|
|
||||||
// .from_float = quantize_row_q8_0,
|
|
||||||
// .from_float_reference = (ggml_from_float_t) quantize_row_q8_0_reference,
|
|
||||||
// .vec_dot = ggml_vec_dot_q8_0_q8_0,
|
|
||||||
// .vec_dot_type = GGML_TYPE_Q8_0,
|
|
||||||
// },
|
|
||||||
// [GGML_TYPE_Q8_1] = {
|
|
||||||
// .type_name = "q8_1",
|
|
||||||
// .blck_size = QK8_1,
|
|
||||||
// .type_size = sizeof(block_q8_1),
|
|
||||||
// .is_quantized = true,
|
|
||||||
// .from_float = quantize_row_q8_1,
|
|
||||||
// .from_float_reference = (ggml_from_float_t) quantize_row_q8_1_reference,
|
|
||||||
// .vec_dot_type = GGML_TYPE_Q8_1,
|
|
||||||
// },
|
|
||||||
// [GGML_TYPE_Q2_K] = {
|
|
||||||
// .type_name = "q2_K",
|
|
||||||
// .blck_size = QK_K,
|
|
||||||
// .type_size = sizeof(block_q2_K),
|
|
||||||
// .is_quantized = true,
|
|
||||||
// .to_float = (ggml_to_float_t) dequantize_row_q2_K,
|
|
||||||
// .from_float = quantize_row_q2_K,
|
|
||||||
// .from_float_reference = (ggml_from_float_t) quantize_row_q2_K_reference,
|
|
||||||
// .vec_dot = ggml_vec_dot_q2_K_q8_K,
|
|
||||||
// .vec_dot_type = GGML_TYPE_Q8_K,
|
|
||||||
// },
|
|
||||||
// [GGML_TYPE_Q3_K] = {
|
|
||||||
// .type_name = "q3_K",
|
|
||||||
// .blck_size = QK_K,
|
|
||||||
// .type_size = sizeof(block_q3_K),
|
|
||||||
// .is_quantized = true,
|
|
||||||
// .to_float = (ggml_to_float_t) dequantize_row_q3_K,
|
|
||||||
// .from_float = quantize_row_q3_K,
|
|
||||||
// .from_float_reference = (ggml_from_float_t) quantize_row_q3_K_reference,
|
|
||||||
// .vec_dot = ggml_vec_dot_q3_K_q8_K,
|
|
||||||
// .vec_dot_type = GGML_TYPE_Q8_K,
|
|
||||||
// },
|
|
||||||
// [GGML_TYPE_Q4_K] = {
|
|
||||||
// .type_name = "q4_K",
|
|
||||||
// .blck_size = QK_K,
|
|
||||||
// .type_size = sizeof(block_q4_K),
|
|
||||||
// .is_quantized = true,
|
|
||||||
// .to_float = (ggml_to_float_t) dequantize_row_q4_K,
|
|
||||||
// .from_float = quantize_row_q4_K,
|
|
||||||
// .from_float_reference = (ggml_from_float_t) quantize_row_q4_K_reference,
|
|
||||||
// .vec_dot = ggml_vec_dot_q4_K_q8_K,
|
|
||||||
// .vec_dot_type = GGML_TYPE_Q8_K,
|
|
||||||
// },
|
|
||||||
// [GGML_TYPE_Q5_K] = {
|
|
||||||
// .type_name = "q5_K",
|
|
||||||
// .blck_size = QK_K,
|
|
||||||
// .type_size = sizeof(block_q5_K),
|
|
||||||
// .is_quantized = true,
|
|
||||||
// .to_float = (ggml_to_float_t) dequantize_row_q5_K,
|
|
||||||
// .from_float = quantize_row_q5_K,
|
|
||||||
// .from_float_reference = (ggml_from_float_t) quantize_row_q5_K_reference,
|
|
||||||
// .vec_dot = ggml_vec_dot_q5_K_q8_K,
|
|
||||||
// .vec_dot_type = GGML_TYPE_Q8_K,
|
|
||||||
// },
|
|
||||||
// [GGML_TYPE_Q6_K] = {
|
|
||||||
// .type_name = "q6_K",
|
|
||||||
// .blck_size = QK_K,
|
|
||||||
// .type_size = sizeof(block_q6_K),
|
|
||||||
// .is_quantized = true,
|
|
||||||
// .to_float = (ggml_to_float_t) dequantize_row_q6_K,
|
|
||||||
// .from_float = quantize_row_q6_K,
|
|
||||||
// .from_float_reference = (ggml_from_float_t) quantize_row_q6_K_reference,
|
|
||||||
// .vec_dot = ggml_vec_dot_q6_K_q8_K,
|
|
||||||
// .vec_dot_type = GGML_TYPE_Q8_K,
|
|
||||||
// },
|
|
||||||
// [GGML_TYPE_Q8_K] = {
|
|
||||||
// .type_name = "q8_K",
|
|
||||||
// .blck_size = QK_K,
|
|
||||||
// .type_size = sizeof(block_q8_K),
|
|
||||||
// .is_quantized = true,
|
|
||||||
// .from_float = quantize_row_q8_K,
|
|
||||||
// }
|
|
||||||
};
|
|
||||||
|
|
||||||
// For internal test use
|
// For internal test use
|
||||||
ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
|
ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
|
||||||
|
@ -2210,6 +2025,212 @@ static inline int ggml_up(int n, int m) {
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
struct ggml_context * ggml_init(struct ggml_init_params params) {
|
struct ggml_context * ggml_init(struct ggml_init_params params) {
|
||||||
|
|
||||||
|
type_traits[GGML_TYPE_I8] = {
|
||||||
|
.type_name = "i8",
|
||||||
|
.blck_size = 1,
|
||||||
|
.type_size = sizeof(int8_t),
|
||||||
|
.is_quantized = false,
|
||||||
|
//.from_float =
|
||||||
|
};
|
||||||
|
type_traits[GGML_TYPE_I16] = {
|
||||||
|
.type_name = "i16",
|
||||||
|
.blck_size = 1,
|
||||||
|
.type_size = sizeof(int16_t),
|
||||||
|
.is_quantized = false,
|
||||||
|
};
|
||||||
|
type_traits[GGML_TYPE_I32] = {
|
||||||
|
.type_name = "i32",
|
||||||
|
.blck_size = 1,
|
||||||
|
.type_size = sizeof(int32_t),
|
||||||
|
.is_quantized = false,
|
||||||
|
};
|
||||||
|
|
||||||
|
type_traits[GGML_TYPE_F32] = {
|
||||||
|
.type_name = "f32",
|
||||||
|
.blck_size = 1,
|
||||||
|
.type_size = sizeof(float),
|
||||||
|
.is_quantized = false,
|
||||||
|
.vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f32,
|
||||||
|
.vec_dot_type = GGML_TYPE_F32,
|
||||||
|
};
|
||||||
|
|
||||||
|
type_traits[GGML_TYPE_F16] = {
|
||||||
|
.type_name = "f16",
|
||||||
|
.blck_size = 1,
|
||||||
|
.type_size = sizeof(ggml_fp16_t),
|
||||||
|
.is_quantized = false,
|
||||||
|
.to_float = (ggml_to_float_t) ggml_fp16_to_fp32_row,
|
||||||
|
.from_float = (ggml_from_float_t) ggml_fp32_to_fp16_row,
|
||||||
|
.from_float_reference = (ggml_from_float_t) ggml_fp32_to_fp16_row,
|
||||||
|
.vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f16,
|
||||||
|
.vec_dot_type = GGML_TYPE_F16,
|
||||||
|
};
|
||||||
|
type_traits[GGML_TYPE_Q4_0] = {
|
||||||
|
.type_name = "q4_0",
|
||||||
|
.blck_size = QK4_0,
|
||||||
|
.type_size = sizeof(block_q4_0),
|
||||||
|
.is_quantized = true,
|
||||||
|
.to_float = (ggml_to_float_t) dequantize_row_q4_0,
|
||||||
|
.from_float = quantize_row_q4_0,
|
||||||
|
.from_float_reference = (ggml_from_float_t) quantize_row_q4_0_reference,
|
||||||
|
.vec_dot = ggml_vec_dot_q4_0_q8_0,
|
||||||
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
||||||
|
};
|
||||||
|
|
||||||
|
type_traits[GGML_TYPE_Q4_1] = {
|
||||||
|
.type_name = "q4_1",
|
||||||
|
.blck_size = QK4_1,
|
||||||
|
.type_size = sizeof(block_q4_1),
|
||||||
|
.is_quantized = true,
|
||||||
|
.to_float = (ggml_to_float_t) dequantize_row_q4_1,
|
||||||
|
.from_float = quantize_row_q4_1,
|
||||||
|
.from_float_reference = (ggml_from_float_t) quantize_row_q4_1_reference,
|
||||||
|
.vec_dot = ggml_vec_dot_q4_1_q8_1,
|
||||||
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
||||||
|
};
|
||||||
|
|
||||||
|
type_traits[4] = { //GGML_TYPE_Q4_2
|
||||||
|
.type_name = "DEPRECATED",
|
||||||
|
.blck_size = 0,
|
||||||
|
.type_size = 0,
|
||||||
|
.is_quantized = false,
|
||||||
|
.to_float = NULL,
|
||||||
|
.from_float = NULL,
|
||||||
|
.from_float_reference = NULL,
|
||||||
|
.vec_dot = NULL,
|
||||||
|
.vec_dot_type = GGML_TYPE_COUNT,
|
||||||
|
};
|
||||||
|
|
||||||
|
type_traits[5] = { // GGML_TYPE_Q4_3
|
||||||
|
.type_name = "DEPRECATED",
|
||||||
|
.blck_size = 0,
|
||||||
|
.type_size = 0,
|
||||||
|
.is_quantized = false,
|
||||||
|
.to_float = NULL,
|
||||||
|
.from_float = NULL,
|
||||||
|
.from_float_reference = NULL,
|
||||||
|
.vec_dot = NULL,
|
||||||
|
.vec_dot_type = GGML_TYPE_COUNT,
|
||||||
|
};
|
||||||
|
|
||||||
|
type_traits[GGML_TYPE_Q5_0] = {
|
||||||
|
.type_name = "q5_0",
|
||||||
|
.blck_size = QK5_0,
|
||||||
|
.type_size = sizeof(block_q5_0),
|
||||||
|
.is_quantized = true,
|
||||||
|
.to_float = (ggml_to_float_t) dequantize_row_q5_0,
|
||||||
|
.from_float = quantize_row_q5_0,
|
||||||
|
.from_float_reference = (ggml_from_float_t) quantize_row_q5_0_reference,
|
||||||
|
.vec_dot = ggml_vec_dot_q5_0_q8_0,
|
||||||
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
||||||
|
};
|
||||||
|
|
||||||
|
type_traits[GGML_TYPE_Q5_1] = {
|
||||||
|
.type_name = "q5_1",
|
||||||
|
.blck_size = QK5_1,
|
||||||
|
.type_size = sizeof(block_q5_1),
|
||||||
|
.is_quantized = true,
|
||||||
|
.to_float = (ggml_to_float_t) dequantize_row_q5_1,
|
||||||
|
.from_float = quantize_row_q5_1,
|
||||||
|
.from_float_reference = (ggml_from_float_t) quantize_row_q5_1_reference,
|
||||||
|
.vec_dot = ggml_vec_dot_q5_1_q8_1,
|
||||||
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
||||||
|
};
|
||||||
|
|
||||||
|
type_traits[GGML_TYPE_Q8_0] = {
|
||||||
|
.type_name = "q8_0",
|
||||||
|
.blck_size = QK8_0,
|
||||||
|
.type_size = sizeof(block_q8_0),
|
||||||
|
.is_quantized = true,
|
||||||
|
.to_float = (ggml_to_float_t) dequantize_row_q8_0,
|
||||||
|
.from_float = quantize_row_q8_0,
|
||||||
|
.from_float_reference = (ggml_from_float_t) quantize_row_q8_0_reference,
|
||||||
|
.vec_dot = ggml_vec_dot_q8_0_q8_0,
|
||||||
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
||||||
|
};
|
||||||
|
|
||||||
|
type_traits[GGML_TYPE_Q8_1] = {
|
||||||
|
.type_name = "q8_1",
|
||||||
|
.blck_size = QK8_1,
|
||||||
|
.type_size = sizeof(block_q8_1),
|
||||||
|
.is_quantized = true,
|
||||||
|
.from_float = quantize_row_q8_1,
|
||||||
|
.from_float_reference = (ggml_from_float_t) quantize_row_q8_1_reference,
|
||||||
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
||||||
|
};
|
||||||
|
|
||||||
|
type_traits[GGML_TYPE_Q2_K] = {
|
||||||
|
.type_name = "q2_K",
|
||||||
|
.blck_size = QK_K,
|
||||||
|
.type_size = sizeof(block_q2_K),
|
||||||
|
.is_quantized = true,
|
||||||
|
.to_float = (ggml_to_float_t) dequantize_row_q2_K,
|
||||||
|
.from_float = quantize_row_q2_K,
|
||||||
|
.from_float_reference = (ggml_from_float_t) quantize_row_q2_K_reference,
|
||||||
|
.vec_dot = ggml_vec_dot_q2_K_q8_K,
|
||||||
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
type_traits[GGML_TYPE_Q3_K] = {
|
||||||
|
.type_name = "q3_K",
|
||||||
|
.blck_size = QK_K,
|
||||||
|
.type_size = sizeof(block_q3_K),
|
||||||
|
.is_quantized = true,
|
||||||
|
.to_float = (ggml_to_float_t) dequantize_row_q3_K,
|
||||||
|
.from_float = quantize_row_q3_K,
|
||||||
|
.from_float_reference = (ggml_from_float_t) quantize_row_q3_K_reference,
|
||||||
|
.vec_dot = ggml_vec_dot_q3_K_q8_K,
|
||||||
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
||||||
|
};
|
||||||
|
|
||||||
|
type_traits[GGML_TYPE_Q4_K] = {
|
||||||
|
.type_name = "q4_K",
|
||||||
|
.blck_size = QK_K,
|
||||||
|
.type_size = sizeof(block_q4_K),
|
||||||
|
.is_quantized = true,
|
||||||
|
.to_float = (ggml_to_float_t) dequantize_row_q4_K,
|
||||||
|
.from_float = quantize_row_q4_K,
|
||||||
|
.from_float_reference = (ggml_from_float_t) quantize_row_q4_K_reference,
|
||||||
|
.vec_dot = ggml_vec_dot_q4_K_q8_K,
|
||||||
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
||||||
|
};
|
||||||
|
|
||||||
|
type_traits[GGML_TYPE_Q5_K] = {
|
||||||
|
.type_name = "q5_K",
|
||||||
|
.blck_size = QK_K,
|
||||||
|
.type_size = sizeof(block_q5_K),
|
||||||
|
.is_quantized = true,
|
||||||
|
.to_float = (ggml_to_float_t) dequantize_row_q5_K,
|
||||||
|
.from_float = quantize_row_q5_K,
|
||||||
|
.from_float_reference = (ggml_from_float_t) quantize_row_q5_K_reference,
|
||||||
|
.vec_dot = ggml_vec_dot_q5_K_q8_K,
|
||||||
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
||||||
|
};
|
||||||
|
|
||||||
|
type_traits[GGML_TYPE_Q6_K] = {
|
||||||
|
.type_name = "q6_K",
|
||||||
|
.blck_size = QK_K,
|
||||||
|
.type_size = sizeof(block_q6_K),
|
||||||
|
.is_quantized = true,
|
||||||
|
.to_float = (ggml_to_float_t) dequantize_row_q6_K,
|
||||||
|
.from_float = quantize_row_q6_K,
|
||||||
|
.from_float_reference = (ggml_from_float_t) quantize_row_q6_K_reference,
|
||||||
|
.vec_dot = ggml_vec_dot_q6_K_q8_K,
|
||||||
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
||||||
|
};
|
||||||
|
|
||||||
|
type_traits[GGML_TYPE_Q8_K] = {
|
||||||
|
.type_name = "q8_K",
|
||||||
|
.blck_size = QK_K,
|
||||||
|
.type_size = sizeof(block_q8_K),
|
||||||
|
.is_quantized = true,
|
||||||
|
.from_float = quantize_row_q8_K,
|
||||||
|
//.to_float = dequantize_row_q8_K, //TODOFITXME
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
struct ggml_context * ctx = NULL;
|
struct ggml_context * ctx = NULL;
|
||||||
static bool is_first_call = true;
|
static bool is_first_call = true;
|
||||||
// make this function thread safe
|
// make this function thread safe
|
||||||
|
@ -18476,7 +18497,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
|
for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
|
||||||
struct gguf_kv * kv = &ctx->kv[i];
|
struct gguf_kv * kv = &ctx->kv[i];
|
||||||
|
|
||||||
fprintf(stderr, "%s: reading kv %d\n", __func__, i);
|
fprintf(stderr, "%s: reading kv %ld\n", __func__, i);
|
||||||
|
|
||||||
ok = ok && gguf_fread_str(file, &kv->key, &offset);
|
ok = ok && gguf_fread_str(file, &kv->key, &offset);
|
||||||
ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
|
ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue