disable iqx on windows as WA

This commit is contained in:
Meng, Hengyu 2024-04-02 07:07:46 +00:00
parent f87f7b8986
commit 9d49a41410
2 changed files with 32 additions and 61 deletions

View file

@ -447,6 +447,8 @@ static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_
#define GGML_COMMON_IMPL
#elif defined(GGML_COMMON_IMPL_SYCL)
#if defined(__gnu_linux__) // workaround for windows
#include <cstdint>
#define GGML_TABLE_BEGIN(type, name, size) static dpct::global_memory<const type, 1> name(sycl::range<1>(size), {
@ -454,6 +456,7 @@ static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_
#define GGML_COMMON_IMPL
#endif
#endif
#if defined(GGML_COMMON_IMPL)

View file

@ -4436,6 +4436,7 @@ static void dequantize_block_q6_K(const void * __restrict__ vx, dst_t * __restri
#endif
}
#if defined(__gnu_linux__)
template<typename dst_t>
static void dequantize_block_iq2_xxs(const void * __restrict__ vx, dst_t * __restrict__ yy,
const sycl::nd_item<3> &item_ct1,
@ -4580,6 +4581,7 @@ static void dequantize_block_iq1_s(const void * __restrict__ vx, dst_t * __restr
#endif
}
#endif
/*
DPCT1110:4: The total declared local variable size in device function
@ -8079,7 +8081,7 @@ template <bool need_check> static void
template <int qk, int qi, typename block_q_t, int vdr, vec_dot_q_sycl_t vec_dot_q_sycl>
static void mul_mat_vec_q(const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst, const int ncols, const int nrows,
const sycl::nd_item<3> &item_ct1,
const uint32_t *iq3xxs_grid_ptr, const uint64_t *ksigns64_ptr) {
const uint32_t *iq3xxs_grid_ptr=nullptr, const uint64_t *ksigns64_ptr=nullptr) {
const int row = item_ct1.get_group(2) * item_ct1.get_local_range(1) +
item_ct1.get_local_id(1);
@ -9951,6 +9953,7 @@ static void dequantize_row_q6_K_sycl(const void *vx, dst_t *y, const int k,
}
#if defined(__gnu_linux__)
template <typename dst_t>
static void dequantize_row_iq2_xxs_sycl(const void *vx, dst_t *y, const int k,
dpct::queue_ptr stream) {
@ -10095,6 +10098,7 @@ static void dequantize_row_iq1_s_sycl(const void *vx, dst_t *y, const int k,
});
}
}
#endif
template <typename src_t, typename dst_t>
static void convert_unary_sycl(const void *__restrict__ vx,
@ -10140,6 +10144,7 @@ static to_fp16_sycl_t ggml_get_to_fp16_sycl(ggml_type type) try {
return dequantize_row_q5_K_sycl;
case GGML_TYPE_Q6_K:
return dequantize_row_q6_K_sycl;
#if defined(__gnu_linux__)
case GGML_TYPE_IQ2_XXS:
return dequantize_row_iq2_xxs_sycl;
case GGML_TYPE_IQ2_XS:
@ -10150,6 +10155,7 @@ static to_fp16_sycl_t ggml_get_to_fp16_sycl(ggml_type type) try {
return dequantize_row_iq3_s_sycl;
case GGML_TYPE_IQ1_S:
return dequantize_row_iq1_s_sycl;
#endif
case GGML_TYPE_F32:
return convert_unary_sycl<float>;
default:
@ -10184,6 +10190,7 @@ static to_fp32_sycl_t ggml_get_to_fp32_sycl(ggml_type type) {
return dequantize_row_q5_K_sycl;
case GGML_TYPE_Q6_K:
return dequantize_row_q6_K_sycl;
#if defined(__gnu_linux__)
case GGML_TYPE_IQ2_XXS:
return dequantize_row_iq2_xxs_sycl;
case GGML_TYPE_IQ2_XS:
@ -10194,6 +10201,7 @@ static to_fp32_sycl_t ggml_get_to_fp32_sycl(ggml_type type) {
return dequantize_row_iq3_s_sycl;
case GGML_TYPE_IQ1_S:
return dequantize_row_iq1_s_sycl;
#endif
case GGML_TYPE_F16:
return convert_unary_sycl<sycl::half>;
default:
@ -10415,12 +10423,8 @@ static void mul_mat_vec_q4_0_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10428,8 +10432,7 @@ static void mul_mat_vec_q4_0_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK4_0, QI4_0, block_q4_0,
VDR_Q4_0_Q8_1_MMVQ, vec_dot_q4_0_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
@ -10444,12 +10447,8 @@ static void mul_mat_vec_q4_1_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10457,8 +10456,7 @@ static void mul_mat_vec_q4_1_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK4_0, QI4_1, block_q4_1,
VDR_Q4_1_Q8_1_MMVQ, vec_dot_q4_1_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
@ -10473,12 +10471,8 @@ static void mul_mat_vec_q5_0_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10486,8 +10480,7 @@ static void mul_mat_vec_q5_0_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK5_0, QI5_0, block_q5_0,
VDR_Q5_0_Q8_1_MMVQ, vec_dot_q5_0_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
@ -10502,12 +10495,8 @@ static void mul_mat_vec_q5_1_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10515,8 +10504,7 @@ static void mul_mat_vec_q5_1_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK5_1, QI5_1, block_q5_1,
VDR_Q5_1_Q8_1_MMVQ, vec_dot_q5_1_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
@ -10531,12 +10519,8 @@ static void mul_mat_vec_q8_0_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10544,8 +10528,7 @@ static void mul_mat_vec_q8_0_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK8_0, QI8_0, block_q8_0,
VDR_Q8_0_Q8_1_MMVQ, vec_dot_q8_0_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
@ -10560,12 +10543,8 @@ static void mul_mat_vec_q2_K_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10573,8 +10552,7 @@ static void mul_mat_vec_q2_K_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK_K, QI2_K, block_q2_K,
VDR_Q2_K_Q8_1_MMVQ, vec_dot_q2_K_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
@ -10589,12 +10567,8 @@ static void mul_mat_vec_q3_K_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10602,8 +10576,7 @@ static void mul_mat_vec_q3_K_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK_K, QI3_K, block_q3_K,
VDR_Q3_K_Q8_1_MMVQ, vec_dot_q3_K_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
@ -10618,12 +10591,8 @@ static void mul_mat_vec_q4_K_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10631,8 +10600,7 @@ static void mul_mat_vec_q4_K_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK_K, QI4_K, block_q4_K,
VDR_Q4_K_Q8_1_MMVQ, vec_dot_q4_K_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
@ -10647,12 +10615,8 @@ static void mul_mat_vec_q5_K_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10660,8 +10624,7 @@ static void mul_mat_vec_q5_K_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK_K, QI5_K, block_q5_K,
VDR_Q5_K_Q8_1_MMVQ, vec_dot_q5_K_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
@ -10676,12 +10639,8 @@ static void mul_mat_vec_q6_K_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10689,13 +10648,13 @@ static void mul_mat_vec_q6_K_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK_K, QI6_K, block_q6_K,
VDR_Q6_K_Q8_1_MMVQ, vec_dot_q6_K_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
}
#if defined(__gnu_linux__)
static void mul_mat_vec_iq2_xxs_q8_1_sycl(const void *vx, const void *vy,
float *dst, const int ncols,
const int nrows,
@ -10838,6 +10797,7 @@ static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy,
});
}
}
#endif
static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy,
float *dst, const int ncols_x,
@ -13670,6 +13630,7 @@ inline void ggml_sycl_op_mul_mat_vec_q(
case GGML_TYPE_Q6_K:
mul_mat_vec_q6_K_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream);
break;
#if defined(__gnu_linux__)
case GGML_TYPE_IQ2_XXS:
mul_mat_vec_iq2_xxs_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream);
break;
@ -13685,6 +13646,7 @@ inline void ggml_sycl_op_mul_mat_vec_q(
case GGML_TYPE_IQ1_S:
mul_mat_vec_iq1_s_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream);
break;
#endif
default:
GGML_ASSERT(false);
break;
@ -17065,6 +17027,12 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
a_type == GGML_TYPE_IQ4_XS) {
return false;
}
#if defined(__gnu_linux__)
if (a_type == GGML_TYPE_IQ2_XXS || a_type == GGML_TYPE_IQ2_XS ||
a_type == GGML_TYPE_IQ3_XXS || a_type == GGML_TYPE_IQ3_S ||
a_type == GGML_TYPE_IQ1_S) {
return false;
#endif
return true;
} break;
case GGML_OP_GET_ROWS: