ggml : add I32 <-> F32 conversion
ggml-ci
This commit is contained in:
parent
fc775366f1
commit
8772658b11
4 changed files with 53 additions and 36 deletions
45
ggml.c
45
ggml.c
|
@ -355,6 +355,18 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ggml_i32_to_f32_row(const int32_t * x, float * y, int n) {
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
y[i] = (float) x[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_f32_to_i32_row(const float * x, int32_t * y, int n) {
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
y[i] = (int32_t) x[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// timing
|
// timing
|
||||||
//
|
//
|
||||||
|
@ -454,6 +466,9 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
||||||
.blck_size = 1,
|
.blck_size = 1,
|
||||||
.type_size = sizeof(int32_t),
|
.type_size = sizeof(int32_t),
|
||||||
.is_quantized = false,
|
.is_quantized = false,
|
||||||
|
.to_float = (ggml_to_float_t) ggml_i32_to_f32_row,
|
||||||
|
.from_float = (ggml_from_float_t) ggml_f32_to_i32_row,
|
||||||
|
.from_float_reference = (ggml_from_float_t) ggml_f32_to_i32_row,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_F32] = {
|
[GGML_TYPE_F32] = {
|
||||||
.type_name = "f32",
|
.type_name = "f32",
|
||||||
|
@ -482,7 +497,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
||||||
.type_size = sizeof(block_q4_0),
|
.type_size = sizeof(block_q4_0),
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q4_0,
|
.to_float = (ggml_to_float_t) dequantize_row_q4_0,
|
||||||
.from_float = quantize_row_q4_0,
|
.from_float = (ggml_from_float_t) quantize_row_q4_0,
|
||||||
.from_float_reference = (ggml_from_float_t) quantize_row_q4_0_reference,
|
.from_float_reference = (ggml_from_float_t) quantize_row_q4_0_reference,
|
||||||
.vec_dot = ggml_vec_dot_q4_0_q8_0,
|
.vec_dot = ggml_vec_dot_q4_0_q8_0,
|
||||||
.vec_dot_type = GGML_TYPE_Q8_0,
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
||||||
|
@ -498,7 +513,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
||||||
.type_size = sizeof(block_q4_1),
|
.type_size = sizeof(block_q4_1),
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q4_1,
|
.to_float = (ggml_to_float_t) dequantize_row_q4_1,
|
||||||
.from_float = quantize_row_q4_1,
|
.from_float = (ggml_from_float_t) quantize_row_q4_1,
|
||||||
.from_float_reference = (ggml_from_float_t) quantize_row_q4_1_reference,
|
.from_float_reference = (ggml_from_float_t) quantize_row_q4_1_reference,
|
||||||
.vec_dot = ggml_vec_dot_q4_1_q8_1,
|
.vec_dot = ggml_vec_dot_q4_1_q8_1,
|
||||||
.vec_dot_type = GGML_TYPE_Q8_1,
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
||||||
|
@ -538,7 +553,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
||||||
.type_size = sizeof(block_q5_0),
|
.type_size = sizeof(block_q5_0),
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q5_0,
|
.to_float = (ggml_to_float_t) dequantize_row_q5_0,
|
||||||
.from_float = quantize_row_q5_0,
|
.from_float = (ggml_from_float_t) quantize_row_q5_0,
|
||||||
.from_float_reference = (ggml_from_float_t) quantize_row_q5_0_reference,
|
.from_float_reference = (ggml_from_float_t) quantize_row_q5_0_reference,
|
||||||
.vec_dot = ggml_vec_dot_q5_0_q8_0,
|
.vec_dot = ggml_vec_dot_q5_0_q8_0,
|
||||||
.vec_dot_type = GGML_TYPE_Q8_0,
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
||||||
|
@ -550,7 +565,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
||||||
.type_size = sizeof(block_q5_1),
|
.type_size = sizeof(block_q5_1),
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q5_1,
|
.to_float = (ggml_to_float_t) dequantize_row_q5_1,
|
||||||
.from_float = quantize_row_q5_1,
|
.from_float = (ggml_from_float_t) quantize_row_q5_1,
|
||||||
.from_float_reference = (ggml_from_float_t) quantize_row_q5_1_reference,
|
.from_float_reference = (ggml_from_float_t) quantize_row_q5_1_reference,
|
||||||
.vec_dot = ggml_vec_dot_q5_1_q8_1,
|
.vec_dot = ggml_vec_dot_q5_1_q8_1,
|
||||||
.vec_dot_type = GGML_TYPE_Q8_1,
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
||||||
|
@ -562,7 +577,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
||||||
.type_size = sizeof(block_q8_0),
|
.type_size = sizeof(block_q8_0),
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q8_0,
|
.to_float = (ggml_to_float_t) dequantize_row_q8_0,
|
||||||
.from_float = quantize_row_q8_0,
|
.from_float = (ggml_from_float_t) quantize_row_q8_0,
|
||||||
.from_float_reference = (ggml_from_float_t) quantize_row_q8_0_reference,
|
.from_float_reference = (ggml_from_float_t) quantize_row_q8_0_reference,
|
||||||
.vec_dot = ggml_vec_dot_q8_0_q8_0,
|
.vec_dot = ggml_vec_dot_q8_0_q8_0,
|
||||||
.vec_dot_type = GGML_TYPE_Q8_0,
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
||||||
|
@ -577,7 +592,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
||||||
.blck_size = QK8_1,
|
.blck_size = QK8_1,
|
||||||
.type_size = sizeof(block_q8_1),
|
.type_size = sizeof(block_q8_1),
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.from_float = quantize_row_q8_1,
|
.from_float = (ggml_from_float_t) quantize_row_q8_1,
|
||||||
.from_float_reference = (ggml_from_float_t) quantize_row_q8_1_reference,
|
.from_float_reference = (ggml_from_float_t) quantize_row_q8_1_reference,
|
||||||
.vec_dot_type = GGML_TYPE_Q8_1,
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
||||||
.nrows = 1,
|
.nrows = 1,
|
||||||
|
@ -588,7 +603,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
||||||
.type_size = sizeof(block_q2_K),
|
.type_size = sizeof(block_q2_K),
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q2_K,
|
.to_float = (ggml_to_float_t) dequantize_row_q2_K,
|
||||||
.from_float = quantize_row_q2_K,
|
.from_float = (ggml_from_float_t) quantize_row_q2_K,
|
||||||
.from_float_reference = (ggml_from_float_t) quantize_row_q2_K_reference,
|
.from_float_reference = (ggml_from_float_t) quantize_row_q2_K_reference,
|
||||||
.vec_dot = ggml_vec_dot_q2_K_q8_K,
|
.vec_dot = ggml_vec_dot_q2_K_q8_K,
|
||||||
.vec_dot_type = GGML_TYPE_Q8_K,
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
||||||
|
@ -600,7 +615,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
||||||
.type_size = sizeof(block_q3_K),
|
.type_size = sizeof(block_q3_K),
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q3_K,
|
.to_float = (ggml_to_float_t) dequantize_row_q3_K,
|
||||||
.from_float = quantize_row_q3_K,
|
.from_float = (ggml_from_float_t) quantize_row_q3_K,
|
||||||
.from_float_reference = (ggml_from_float_t) quantize_row_q3_K_reference,
|
.from_float_reference = (ggml_from_float_t) quantize_row_q3_K_reference,
|
||||||
.vec_dot = ggml_vec_dot_q3_K_q8_K,
|
.vec_dot = ggml_vec_dot_q3_K_q8_K,
|
||||||
.vec_dot_type = GGML_TYPE_Q8_K,
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
||||||
|
@ -612,7 +627,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
||||||
.type_size = sizeof(block_q4_K),
|
.type_size = sizeof(block_q4_K),
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q4_K,
|
.to_float = (ggml_to_float_t) dequantize_row_q4_K,
|
||||||
.from_float = quantize_row_q4_K,
|
.from_float = (ggml_from_float_t) quantize_row_q4_K,
|
||||||
.from_float_reference = (ggml_from_float_t) quantize_row_q4_K_reference,
|
.from_float_reference = (ggml_from_float_t) quantize_row_q4_K_reference,
|
||||||
.vec_dot = ggml_vec_dot_q4_K_q8_K,
|
.vec_dot = ggml_vec_dot_q4_K_q8_K,
|
||||||
.vec_dot_type = GGML_TYPE_Q8_K,
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
||||||
|
@ -624,7 +639,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
||||||
.type_size = sizeof(block_q5_K),
|
.type_size = sizeof(block_q5_K),
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q5_K,
|
.to_float = (ggml_to_float_t) dequantize_row_q5_K,
|
||||||
.from_float = quantize_row_q5_K,
|
.from_float = (ggml_from_float_t) quantize_row_q5_K,
|
||||||
.from_float_reference = (ggml_from_float_t) quantize_row_q5_K_reference,
|
.from_float_reference = (ggml_from_float_t) quantize_row_q5_K_reference,
|
||||||
.vec_dot = ggml_vec_dot_q5_K_q8_K,
|
.vec_dot = ggml_vec_dot_q5_K_q8_K,
|
||||||
.vec_dot_type = GGML_TYPE_Q8_K,
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
||||||
|
@ -636,7 +651,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
||||||
.type_size = sizeof(block_q6_K),
|
.type_size = sizeof(block_q6_K),
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_q6_K,
|
.to_float = (ggml_to_float_t) dequantize_row_q6_K,
|
||||||
.from_float = quantize_row_q6_K,
|
.from_float = (ggml_from_float_t) quantize_row_q6_K,
|
||||||
.from_float_reference = (ggml_from_float_t) quantize_row_q6_K_reference,
|
.from_float_reference = (ggml_from_float_t) quantize_row_q6_K_reference,
|
||||||
.vec_dot = ggml_vec_dot_q6_K_q8_K,
|
.vec_dot = ggml_vec_dot_q6_K_q8_K,
|
||||||
.vec_dot_type = GGML_TYPE_Q8_K,
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
||||||
|
@ -672,8 +687,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
||||||
.type_size = sizeof(block_iq3_xxs),
|
.type_size = sizeof(block_iq3_xxs),
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_iq3_xxs,
|
.to_float = (ggml_to_float_t) dequantize_row_iq3_xxs,
|
||||||
.from_float = quantize_row_iq3_xxs,
|
.from_float = (ggml_from_float_t) quantize_row_iq3_xxs,
|
||||||
.from_float_reference = (ggml_from_float_t)quantize_row_iq3_xxs_reference,
|
.from_float_reference = (ggml_from_float_t) quantize_row_iq3_xxs_reference,
|
||||||
.vec_dot = ggml_vec_dot_iq3_xxs_q8_K,
|
.vec_dot = ggml_vec_dot_iq3_xxs_q8_K,
|
||||||
.vec_dot_type = GGML_TYPE_Q8_K,
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
||||||
.nrows = 1,
|
.nrows = 1,
|
||||||
|
@ -696,8 +711,8 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
||||||
.type_size = sizeof(block_iq4_nl),
|
.type_size = sizeof(block_iq4_nl),
|
||||||
.is_quantized = true,
|
.is_quantized = true,
|
||||||
.to_float = (ggml_to_float_t) dequantize_row_iq4_nl,
|
.to_float = (ggml_to_float_t) dequantize_row_iq4_nl,
|
||||||
.from_float = quantize_row_iq4_nl,
|
.from_float = (ggml_from_float_t) quantize_row_iq4_nl,
|
||||||
.from_float_reference = (ggml_from_float_t)quantize_row_iq4_nl_reference,
|
.from_float_reference = (ggml_from_float_t) quantize_row_iq4_nl_reference,
|
||||||
.vec_dot = ggml_vec_dot_iq4_nl_q8_0,
|
.vec_dot = ggml_vec_dot_iq4_nl_q8_0,
|
||||||
.vec_dot_type = GGML_TYPE_Q8_0,
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
||||||
.nrows = 1,
|
.nrows = 1,
|
||||||
|
|
|
@ -5928,6 +5928,7 @@ struct llm_build_context {
|
||||||
|
|
||||||
// get input vectors with right size
|
// get input vectors with right size
|
||||||
const size_t stride1 = n_tokens * ggml_type_size(lctx.inp_tokens->type);
|
const size_t stride1 = n_tokens * ggml_type_size(lctx.inp_tokens->type);
|
||||||
|
|
||||||
struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0);
|
struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0);
|
||||||
struct ggml_tensor * inp_mean = ggml_view_2d(ctx0, lctx.inp_mean, n_tokens, n_tokens, stride1, 0);
|
struct ggml_tensor * inp_mean = ggml_view_2d(ctx0, lctx.inp_mean, n_tokens, n_tokens, stride1, 0);
|
||||||
struct ggml_tensor * inp_cls = ggml_view_1d(ctx0, lctx.inp_cls, n_tokens, 0);
|
struct ggml_tensor * inp_cls = ggml_view_1d(ctx0, lctx.inp_cls, n_tokens, 0);
|
||||||
|
@ -5938,8 +5939,9 @@ struct llm_build_context {
|
||||||
// token types are hardcoded to zero ("Sentence A")
|
// token types are hardcoded to zero ("Sentence A")
|
||||||
struct ggml_tensor * type_row0 = ggml_view_1d(ctx0, model.type_embd, n_embd, 0);
|
struct ggml_tensor * type_row0 = ggml_view_1d(ctx0, model.type_embd, n_embd, 0);
|
||||||
inpL = ggml_add(ctx0, inpL, type_row0);
|
inpL = ggml_add(ctx0, inpL, type_row0);
|
||||||
|
|
||||||
if (model.arch == LLM_ARCH_BERT) {
|
if (model.arch == LLM_ARCH_BERT) {
|
||||||
inpL = ggml_add(ctx0, ggml_get_rows(ctx0, model.pos_embd, inp_pos), inpL);
|
inpL = ggml_add(ctx0, ggml_get_rows(ctx0, model.pos_embd, ggml_cast(ctx0, inp_pos, GGML_TYPE_I32)), inpL);
|
||||||
}
|
}
|
||||||
cb(inpL, "inp_embd", -1);
|
cb(inpL, "inp_embd", -1);
|
||||||
|
|
||||||
|
|
|
@ -143,10 +143,10 @@ int main(int argc, char * argv[]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (qfns.from_float && qfns.to_float && qfns.vec_dot) {
|
||||||
printf("Testing %s\n", ggml_type_name((ggml_type) i));
|
printf("Testing %s\n", ggml_type_name((ggml_type) i));
|
||||||
ggml_quantize_init(ei);
|
ggml_quantize_init(ei);
|
||||||
|
|
||||||
if (qfns.from_float && qfns.to_float) {
|
|
||||||
const float total_error = total_quantization_error(qfns, test_size, test_data.data());
|
const float total_error = total_quantization_error(qfns, test_size, test_data.data());
|
||||||
const float max_quantization_error =
|
const float max_quantization_error =
|
||||||
type == GGML_TYPE_Q2_K ? MAX_QUANTIZATION_TOTAL_ERROR_2BITS :
|
type == GGML_TYPE_Q2_K ? MAX_QUANTIZATION_TOTAL_ERROR_2BITS :
|
||||||
|
|
|
@ -275,7 +275,7 @@ int main(int argc, char * argv[]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (qfns.from_float && qfns.to_float) {
|
if (qfns.from_float && qfns.to_float && qfns.vec_dot) {
|
||||||
printf("%s\n", ggml_type_name(type));
|
printf("%s\n", ggml_type_name(type));
|
||||||
|
|
||||||
ggml_quantize_init(type);
|
ggml_quantize_init(type);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue