From 9d49a414103d2dbbf3057676b3f79138304bfd63 Mon Sep 17 00:00:00 2001 From: "Meng, Hengyu" Date: Tue, 2 Apr 2024 07:07:46 +0000 Subject: [PATCH] disable iqx on windows as WA --- ggml-common.h | 3 ++ ggml-sycl.cpp | 90 +++++++++++++++++---------------------------------- 2 files changed, 32 insertions(+), 61 deletions(-) diff --git a/ggml-common.h b/ggml-common.h index b2d67d5db..7bb55ee9f 100644 --- a/ggml-common.h +++ b/ggml-common.h @@ -447,6 +447,8 @@ static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_ #define GGML_COMMON_IMPL #elif defined(GGML_COMMON_IMPL_SYCL) + +#if defined(__gnu_linux__) // workaround for windows #include #define GGML_TABLE_BEGIN(type, name, size) static dpct::global_memory name(sycl::range<1>(size), { @@ -454,6 +456,7 @@ static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_ #define GGML_COMMON_IMPL #endif +#endif #if defined(GGML_COMMON_IMPL) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index 6fd6ebd3a..779a3ee9d 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -4436,6 +4436,7 @@ static void dequantize_block_q6_K(const void * __restrict__ vx, dst_t * __restri #endif } +#if defined(__gnu_linux__) template static void dequantize_block_iq2_xxs(const void * __restrict__ vx, dst_t * __restrict__ yy, const sycl::nd_item<3> &item_ct1, @@ -4580,6 +4581,7 @@ static void dequantize_block_iq1_s(const void * __restrict__ vx, dst_t * __restr #endif } +#endif /* DPCT1110:4: The total declared local variable size in device function @@ -8079,7 +8081,7 @@ template static void template static void mul_mat_vec_q(const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst, const int ncols, const int nrows, const sycl::nd_item<3> &item_ct1, - const uint32_t *iq3xxs_grid_ptr, const uint64_t *ksigns64_ptr) { + const uint32_t *iq3xxs_grid_ptr=nullptr, const uint64_t *ksigns64_ptr=nullptr) { const int row = item_ct1.get_group(2) * item_ct1.get_local_range(1) + item_ct1.get_local_id(1); @@ -9951,6 +9953,7 @@ static void dequantize_row_q6_K_sycl(const void *vx, dst_t *y, const int k, } +#if defined(__gnu_linux__) template static void dequantize_row_iq2_xxs_sycl(const void *vx, dst_t *y, const int k, dpct::queue_ptr stream) { @@ -10095,6 +10098,7 @@ static void dequantize_row_iq1_s_sycl(const void *vx, dst_t *y, const int k, }); } } +#endif template static void convert_unary_sycl(const void *__restrict__ vx, @@ -10140,6 +10144,7 @@ static to_fp16_sycl_t ggml_get_to_fp16_sycl(ggml_type type) try { return dequantize_row_q5_K_sycl; case GGML_TYPE_Q6_K: return dequantize_row_q6_K_sycl; +#if defined(__gnu_linux__) case GGML_TYPE_IQ2_XXS: return dequantize_row_iq2_xxs_sycl; case GGML_TYPE_IQ2_XS: @@ -10150,6 +10155,7 @@ static to_fp16_sycl_t ggml_get_to_fp16_sycl(ggml_type type) try { return dequantize_row_iq3_s_sycl; case GGML_TYPE_IQ1_S: return dequantize_row_iq1_s_sycl; +#endif case GGML_TYPE_F32: return convert_unary_sycl; default: @@ -10184,6 +10190,7 @@ static to_fp32_sycl_t ggml_get_to_fp32_sycl(ggml_type type) { return dequantize_row_q5_K_sycl; case GGML_TYPE_Q6_K: return dequantize_row_q6_K_sycl; +#if defined(__gnu_linux__) case GGML_TYPE_IQ2_XXS: return dequantize_row_iq2_xxs_sycl; case GGML_TYPE_IQ2_XS: @@ -10194,6 +10201,7 @@ static to_fp32_sycl_t ggml_get_to_fp32_sycl(ggml_type type) { return dequantize_row_iq3_s_sycl; case GGML_TYPE_IQ1_S: return dequantize_row_iq1_s_sycl; +#endif case GGML_TYPE_F16: return convert_unary_sycl; default: @@ -10415,12 +10423,8 @@ static void mul_mat_vec_q4_0_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - iq3xxs_grid.init(*stream); - ksigns64.init(*stream); stream->submit([&](sycl::handler &cgh) { - auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr(); - auto ksigns64_ptr_ct1 = ksigns64.get_ptr(); cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), @@ -10428,8 +10432,7 @@ static void mul_mat_vec_q4_0_q8_1_sycl(const void *vx, const void *vy, [[intel::reqd_sub_group_size(32)]] { mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1, - iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1); + vx, vy, dst, ncols, nrows, item_ct1); }); }); } @@ -10444,12 +10447,8 @@ static void mul_mat_vec_q4_1_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - iq3xxs_grid.init(*stream); - ksigns64.init(*stream); stream->submit([&](sycl::handler &cgh) { - auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr(); - auto ksigns64_ptr_ct1 = ksigns64.get_ptr(); cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), @@ -10457,8 +10456,7 @@ static void mul_mat_vec_q4_1_q8_1_sycl(const void *vx, const void *vy, [[intel::reqd_sub_group_size(32)]] { mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1, - iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1); + vx, vy, dst, ncols, nrows, item_ct1); }); }); } @@ -10473,12 +10471,8 @@ static void mul_mat_vec_q5_0_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - iq3xxs_grid.init(*stream); - ksigns64.init(*stream); stream->submit([&](sycl::handler &cgh) { - auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr(); - auto ksigns64_ptr_ct1 = ksigns64.get_ptr(); cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), @@ -10486,8 +10480,7 @@ static void mul_mat_vec_q5_0_q8_1_sycl(const void *vx, const void *vy, [[intel::reqd_sub_group_size(32)]] { mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1, - iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1); + vx, vy, dst, ncols, nrows, item_ct1); }); }); } @@ -10502,12 +10495,8 @@ static void mul_mat_vec_q5_1_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - iq3xxs_grid.init(*stream); - ksigns64.init(*stream); stream->submit([&](sycl::handler &cgh) { - auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr(); - auto ksigns64_ptr_ct1 = ksigns64.get_ptr(); cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), @@ -10515,8 +10504,7 @@ static void mul_mat_vec_q5_1_q8_1_sycl(const void *vx, const void *vy, [[intel::reqd_sub_group_size(32)]] { mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1, - iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1); + vx, vy, dst, ncols, nrows, item_ct1); }); }); } @@ -10531,12 +10519,8 @@ static void mul_mat_vec_q8_0_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - iq3xxs_grid.init(*stream); - ksigns64.init(*stream); stream->submit([&](sycl::handler &cgh) { - auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr(); - auto ksigns64_ptr_ct1 = ksigns64.get_ptr(); cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), @@ -10544,8 +10528,7 @@ static void mul_mat_vec_q8_0_q8_1_sycl(const void *vx, const void *vy, [[intel::reqd_sub_group_size(32)]] { mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1, - iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1); + vx, vy, dst, ncols, nrows, item_ct1); }); }); } @@ -10560,12 +10543,8 @@ static void mul_mat_vec_q2_K_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - iq3xxs_grid.init(*stream); - ksigns64.init(*stream); stream->submit([&](sycl::handler &cgh) { - auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr(); - auto ksigns64_ptr_ct1 = ksigns64.get_ptr(); cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), @@ -10573,8 +10552,7 @@ static void mul_mat_vec_q2_K_q8_1_sycl(const void *vx, const void *vy, [[intel::reqd_sub_group_size(32)]] { mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1, - iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1); + vx, vy, dst, ncols, nrows, item_ct1); }); }); } @@ -10589,12 +10567,8 @@ static void mul_mat_vec_q3_K_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - iq3xxs_grid.init(*stream); - ksigns64.init(*stream); stream->submit([&](sycl::handler &cgh) { - auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr(); - auto ksigns64_ptr_ct1 = ksigns64.get_ptr(); cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), @@ -10602,8 +10576,7 @@ static void mul_mat_vec_q3_K_q8_1_sycl(const void *vx, const void *vy, [[intel::reqd_sub_group_size(32)]] { mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1, - iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1); + vx, vy, dst, ncols, nrows, item_ct1); }); }); } @@ -10618,12 +10591,8 @@ static void mul_mat_vec_q4_K_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - iq3xxs_grid.init(*stream); - ksigns64.init(*stream); stream->submit([&](sycl::handler &cgh) { - auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr(); - auto ksigns64_ptr_ct1 = ksigns64.get_ptr(); cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), @@ -10631,8 +10600,7 @@ static void mul_mat_vec_q4_K_q8_1_sycl(const void *vx, const void *vy, [[intel::reqd_sub_group_size(32)]] { mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1, - iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1); + vx, vy, dst, ncols, nrows, item_ct1); }); }); } @@ -10647,12 +10615,8 @@ static void mul_mat_vec_q5_K_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - iq3xxs_grid.init(*stream); - ksigns64.init(*stream); stream->submit([&](sycl::handler &cgh) { - auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr(); - auto ksigns64_ptr_ct1 = ksigns64.get_ptr(); cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), @@ -10660,8 +10624,7 @@ static void mul_mat_vec_q5_K_q8_1_sycl(const void *vx, const void *vy, [[intel::reqd_sub_group_size(32)]] { mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1, - iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1); + vx, vy, dst, ncols, nrows, item_ct1); }); }); } @@ -10676,12 +10639,8 @@ static void mul_mat_vec_q6_K_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - iq3xxs_grid.init(*stream); - ksigns64.init(*stream); stream->submit([&](sycl::handler &cgh) { - auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr(); - auto ksigns64_ptr_ct1 = ksigns64.get_ptr(); cgh.parallel_for( sycl::nd_range<3>(block_nums * block_dims, block_dims), @@ -10689,13 +10648,13 @@ static void mul_mat_vec_q6_K_q8_1_sycl(const void *vx, const void *vy, [[intel::reqd_sub_group_size(32)]] { mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1, - iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1); + vx, vy, dst, ncols, nrows, item_ct1); }); }); } } +#if defined(__gnu_linux__) static void mul_mat_vec_iq2_xxs_q8_1_sycl(const void *vx, const void *vy, float *dst, const int ncols, const int nrows, @@ -10838,6 +10797,7 @@ static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy, }); } } +#endif static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy, float *dst, const int ncols_x, @@ -13670,6 +13630,7 @@ inline void ggml_sycl_op_mul_mat_vec_q( case GGML_TYPE_Q6_K: mul_mat_vec_q6_K_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); break; +#if defined(__gnu_linux__) case GGML_TYPE_IQ2_XXS: mul_mat_vec_iq2_xxs_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); break; @@ -13685,6 +13646,7 @@ inline void ggml_sycl_op_mul_mat_vec_q( case GGML_TYPE_IQ1_S: mul_mat_vec_iq1_s_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); break; +#endif default: GGML_ASSERT(false); break; @@ -17065,6 +17027,12 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons a_type == GGML_TYPE_IQ4_XS) { return false; } +#if defined(__gnu_linux__) + if (a_type == GGML_TYPE_IQ2_XXS || a_type == GGML_TYPE_IQ2_XS || + a_type == GGML_TYPE_IQ3_XXS || a_type == GGML_TYPE_IQ3_S || + a_type == GGML_TYPE_IQ1_S) { + return false; +#endif return true; } break; case GGML_OP_GET_ROWS: