From b0597b14938a86883ecbdc8a7abcdce6506730fe Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 17 Dec 2024 17:54:04 +0200 Subject: [PATCH 1/3] ggml : fix cpy op for IQ-quants to use reference impl ggml-ci --- ggml/src/ggml-cpu/ggml-cpu.c | 15 +++++++++++---- tests/test-backend-ops.cpp | 7 +++++-- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c index 67e67a089..b702f1171 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.c +++ b/ggml/src/ggml-cpu/ggml-cpu.c @@ -2968,8 +2968,12 @@ static void ggml_compute_forward_dup_f16( id += ne00 * (ne01 - ir1); } } - } else if (ggml_get_type_traits_cpu(dst->type)->from_float) { - ggml_from_float_t const quantize_row_q = ggml_get_type_traits_cpu(dst->type)->from_float; + } else if (ggml_get_type_traits_cpu(dst->type)->from_float || ggml_get_type_traits(dst->type)->from_float_ref) { + ggml_from_float_t quantize_row_q = ggml_get_type_traits_cpu(dst->type)->from_float; + if (!quantize_row_q) { + quantize_row_q = ggml_get_type_traits(dst->type)->from_float_ref; + } + float * src0_f32 = (float *) params->wdata + (ne00 + CACHE_LINE_SIZE_F32) * ith; size_t id = 0; @@ -3565,8 +3569,11 @@ static void ggml_compute_forward_dup_f32( id += rs * (ne01 - ir1); } } - } else if (ggml_get_type_traits_cpu(dst->type)->from_float) { - ggml_from_float_t const quantize_row_q = ggml_get_type_traits_cpu(dst->type)->from_float; + } else if (ggml_get_type_traits_cpu(dst->type)->from_float || ggml_get_type_traits(dst->type)->from_float_ref) { + ggml_from_float_t quantize_row_q = ggml_get_type_traits_cpu(dst->type)->from_float; + if (!quantize_row_q) { + quantize_row_q = ggml_get_type_traits(dst->type)->from_float_ref; + } size_t id = 0; size_t rs = nb0 * (ne00 / ggml_blck_size(dst->type)); diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index b9454ba59..53b6af8b5 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -3549,8 +3549,11 @@ static std::vector> make_test_cases_eval() { for (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_F32}) { for (ggml_type type_dst : all_types) { - test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 4, 4, 4})); - test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 2, 3, 4}, {0, 2, 1, 3})); // cpy by rows + //if (type_dst == GGML_TYPE_IQ2_S || type_dst == GGML_TYPE_IQ3_XXS || type_dst == GGML_TYPE_IQ3_S) { + // continue; + //} + test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 4, 4, 4})); + test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 2, 3, 4}, {0, 2, 1, 3})); // cpy by rows } } for (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_F32}) { From 8cc7145cc7aa479fd7d38b921f0ff45bf9bf471c Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 17 Dec 2024 18:03:47 +0200 Subject: [PATCH 2/3] ggml : disable tests involving i-matrix quantization --- ggml/src/ggml-cpu/ggml-cpu.c | 15 ++++----------- tests/test-backend-ops.cpp | 6 +++--- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c index b702f1171..67e67a089 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.c +++ b/ggml/src/ggml-cpu/ggml-cpu.c @@ -2968,12 +2968,8 @@ static void ggml_compute_forward_dup_f16( id += ne00 * (ne01 - ir1); } } - } else if (ggml_get_type_traits_cpu(dst->type)->from_float || ggml_get_type_traits(dst->type)->from_float_ref) { - ggml_from_float_t quantize_row_q = ggml_get_type_traits_cpu(dst->type)->from_float; - if (!quantize_row_q) { - quantize_row_q = ggml_get_type_traits(dst->type)->from_float_ref; - } - + } else if (ggml_get_type_traits_cpu(dst->type)->from_float) { + ggml_from_float_t const quantize_row_q = ggml_get_type_traits_cpu(dst->type)->from_float; float * src0_f32 = (float *) params->wdata + (ne00 + CACHE_LINE_SIZE_F32) * ith; size_t id = 0; @@ -3569,11 +3565,8 @@ static void ggml_compute_forward_dup_f32( id += rs * (ne01 - ir1); } } - } else if (ggml_get_type_traits_cpu(dst->type)->from_float || ggml_get_type_traits(dst->type)->from_float_ref) { - ggml_from_float_t quantize_row_q = ggml_get_type_traits_cpu(dst->type)->from_float; - if (!quantize_row_q) { - quantize_row_q = ggml_get_type_traits(dst->type)->from_float_ref; - } + } else if (ggml_get_type_traits_cpu(dst->type)->from_float) { + ggml_from_float_t const quantize_row_q = ggml_get_type_traits_cpu(dst->type)->from_float; size_t id = 0; size_t rs = nb0 * (ne00 / ggml_blck_size(dst->type)); diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 53b6af8b5..5d8d3b0f5 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -3549,9 +3549,9 @@ static std::vector> make_test_cases_eval() { for (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_F32}) { for (ggml_type type_dst : all_types) { - //if (type_dst == GGML_TYPE_IQ2_S || type_dst == GGML_TYPE_IQ3_XXS || type_dst == GGML_TYPE_IQ3_S) { - // continue; - //} + if (type_dst == GGML_TYPE_IQ2_S || type_dst == GGML_TYPE_IQ3_XXS || type_dst == GGML_TYPE_IQ3_S) { + continue; + } test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 4, 4, 4})); test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 2, 3, 4}, {0, 2, 1, 3})); // cpy by rows } From 4fbb801a9d512e2ede0e8e72a53af6a7695429cf Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 17 Dec 2024 18:09:02 +0200 Subject: [PATCH 3/3] ggml : update ggml_backend_cpu_device_supports_op ggml-ci --- ggml/src/ggml-cpu/ggml-cpu.cpp | 3 +++ tests/test-backend-ops.cpp | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-cpu/ggml-cpu.cpp b/ggml/src/ggml-cpu/ggml-cpu.cpp index c390957af..0b6419f83 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.cpp +++ b/ggml/src/ggml-cpu/ggml-cpu.cpp @@ -394,8 +394,11 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st switch (op->op) { case GGML_OP_CPY: return + op->type != GGML_TYPE_IQ3_XXS && + op->type != GGML_TYPE_IQ3_S && op->type != GGML_TYPE_IQ2_XXS && op->type != GGML_TYPE_IQ2_XS && + op->type != GGML_TYPE_IQ2_S && op->type != GGML_TYPE_IQ1_S && op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float case GGML_OP_MUL_MAT: diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 5d8d3b0f5..ccdd3fb57 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -3549,9 +3549,6 @@ static std::vector> make_test_cases_eval() { for (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_F32}) { for (ggml_type type_dst : all_types) { - if (type_dst == GGML_TYPE_IQ2_S || type_dst == GGML_TYPE_IQ3_XXS || type_dst == GGML_TYPE_IQ3_S) { - continue; - } test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 4, 4, 4})); test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 2, 3, 4}, {0, 2, 1, 3})); // cpy by rows }