disable iqx on windows as WA

This commit is contained in:
Meng, Hengyu 2024-04-02 07:07:46 +00:00
parent f87f7b8986
commit 9d49a41410
2 changed files with 32 additions and 61 deletions

View file

@ -447,6 +447,8 @@ static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_
#define GGML_COMMON_IMPL #define GGML_COMMON_IMPL
#elif defined(GGML_COMMON_IMPL_SYCL) #elif defined(GGML_COMMON_IMPL_SYCL)
#if defined(__gnu_linux__) // workaround for windows
#include <cstdint> #include <cstdint>
#define GGML_TABLE_BEGIN(type, name, size) static dpct::global_memory<const type, 1> name(sycl::range<1>(size), { #define GGML_TABLE_BEGIN(type, name, size) static dpct::global_memory<const type, 1> name(sycl::range<1>(size), {
@ -454,6 +456,7 @@ static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_
#define GGML_COMMON_IMPL #define GGML_COMMON_IMPL
#endif #endif
#endif
#if defined(GGML_COMMON_IMPL) #if defined(GGML_COMMON_IMPL)

View file

@ -4436,6 +4436,7 @@ static void dequantize_block_q6_K(const void * __restrict__ vx, dst_t * __restri
#endif #endif
} }
#if defined(__gnu_linux__)
template<typename dst_t> template<typename dst_t>
static void dequantize_block_iq2_xxs(const void * __restrict__ vx, dst_t * __restrict__ yy, static void dequantize_block_iq2_xxs(const void * __restrict__ vx, dst_t * __restrict__ yy,
const sycl::nd_item<3> &item_ct1, const sycl::nd_item<3> &item_ct1,
@ -4580,6 +4581,7 @@ static void dequantize_block_iq1_s(const void * __restrict__ vx, dst_t * __restr
#endif #endif
} }
#endif
/* /*
DPCT1110:4: The total declared local variable size in device function DPCT1110:4: The total declared local variable size in device function
@ -8079,7 +8081,7 @@ template <bool need_check> static void
template <int qk, int qi, typename block_q_t, int vdr, vec_dot_q_sycl_t vec_dot_q_sycl> template <int qk, int qi, typename block_q_t, int vdr, vec_dot_q_sycl_t vec_dot_q_sycl>
static void mul_mat_vec_q(const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst, const int ncols, const int nrows, static void mul_mat_vec_q(const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst, const int ncols, const int nrows,
const sycl::nd_item<3> &item_ct1, const sycl::nd_item<3> &item_ct1,
const uint32_t *iq3xxs_grid_ptr, const uint64_t *ksigns64_ptr) { const uint32_t *iq3xxs_grid_ptr=nullptr, const uint64_t *ksigns64_ptr=nullptr) {
const int row = item_ct1.get_group(2) * item_ct1.get_local_range(1) + const int row = item_ct1.get_group(2) * item_ct1.get_local_range(1) +
item_ct1.get_local_id(1); item_ct1.get_local_id(1);
@ -9951,6 +9953,7 @@ static void dequantize_row_q6_K_sycl(const void *vx, dst_t *y, const int k,
} }
#if defined(__gnu_linux__)
template <typename dst_t> template <typename dst_t>
static void dequantize_row_iq2_xxs_sycl(const void *vx, dst_t *y, const int k, static void dequantize_row_iq2_xxs_sycl(const void *vx, dst_t *y, const int k,
dpct::queue_ptr stream) { dpct::queue_ptr stream) {
@ -10095,6 +10098,7 @@ static void dequantize_row_iq1_s_sycl(const void *vx, dst_t *y, const int k,
}); });
} }
} }
#endif
template <typename src_t, typename dst_t> template <typename src_t, typename dst_t>
static void convert_unary_sycl(const void *__restrict__ vx, static void convert_unary_sycl(const void *__restrict__ vx,
@ -10140,6 +10144,7 @@ static to_fp16_sycl_t ggml_get_to_fp16_sycl(ggml_type type) try {
return dequantize_row_q5_K_sycl; return dequantize_row_q5_K_sycl;
case GGML_TYPE_Q6_K: case GGML_TYPE_Q6_K:
return dequantize_row_q6_K_sycl; return dequantize_row_q6_K_sycl;
#if defined(__gnu_linux__)
case GGML_TYPE_IQ2_XXS: case GGML_TYPE_IQ2_XXS:
return dequantize_row_iq2_xxs_sycl; return dequantize_row_iq2_xxs_sycl;
case GGML_TYPE_IQ2_XS: case GGML_TYPE_IQ2_XS:
@ -10150,6 +10155,7 @@ static to_fp16_sycl_t ggml_get_to_fp16_sycl(ggml_type type) try {
return dequantize_row_iq3_s_sycl; return dequantize_row_iq3_s_sycl;
case GGML_TYPE_IQ1_S: case GGML_TYPE_IQ1_S:
return dequantize_row_iq1_s_sycl; return dequantize_row_iq1_s_sycl;
#endif
case GGML_TYPE_F32: case GGML_TYPE_F32:
return convert_unary_sycl<float>; return convert_unary_sycl<float>;
default: default:
@ -10184,6 +10190,7 @@ static to_fp32_sycl_t ggml_get_to_fp32_sycl(ggml_type type) {
return dequantize_row_q5_K_sycl; return dequantize_row_q5_K_sycl;
case GGML_TYPE_Q6_K: case GGML_TYPE_Q6_K:
return dequantize_row_q6_K_sycl; return dequantize_row_q6_K_sycl;
#if defined(__gnu_linux__)
case GGML_TYPE_IQ2_XXS: case GGML_TYPE_IQ2_XXS:
return dequantize_row_iq2_xxs_sycl; return dequantize_row_iq2_xxs_sycl;
case GGML_TYPE_IQ2_XS: case GGML_TYPE_IQ2_XS:
@ -10194,6 +10201,7 @@ static to_fp32_sycl_t ggml_get_to_fp32_sycl(ggml_type type) {
return dequantize_row_iq3_s_sycl; return dequantize_row_iq3_s_sycl;
case GGML_TYPE_IQ1_S: case GGML_TYPE_IQ1_S:
return dequantize_row_iq1_s_sycl; return dequantize_row_iq1_s_sycl;
#endif
case GGML_TYPE_F16: case GGML_TYPE_F16:
return convert_unary_sycl<sycl::half>; return convert_unary_sycl<sycl::half>;
default: default:
@ -10415,12 +10423,8 @@ static void mul_mat_vec_q4_0_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{ {
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) { stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for( cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims), sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10428,8 +10432,7 @@ static void mul_mat_vec_q4_0_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] { [[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK4_0, QI4_0, block_q4_0, mul_mat_vec_q<QK4_0, QI4_0, block_q4_0,
VDR_Q4_0_Q8_1_MMVQ, vec_dot_q4_0_q8_1>( VDR_Q4_0_Q8_1_MMVQ, vec_dot_q4_0_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1, vx, vy, dst, ncols, nrows, item_ct1);
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
}); });
}); });
} }
@ -10444,12 +10447,8 @@ static void mul_mat_vec_q4_1_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{ {
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) { stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for( cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims), sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10457,8 +10456,7 @@ static void mul_mat_vec_q4_1_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] { [[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK4_0, QI4_1, block_q4_1, mul_mat_vec_q<QK4_0, QI4_1, block_q4_1,
VDR_Q4_1_Q8_1_MMVQ, vec_dot_q4_1_q8_1>( VDR_Q4_1_Q8_1_MMVQ, vec_dot_q4_1_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1, vx, vy, dst, ncols, nrows, item_ct1);
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
}); });
}); });
} }
@ -10473,12 +10471,8 @@ static void mul_mat_vec_q5_0_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{ {
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) { stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for( cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims), sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10486,8 +10480,7 @@ static void mul_mat_vec_q5_0_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] { [[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK5_0, QI5_0, block_q5_0, mul_mat_vec_q<QK5_0, QI5_0, block_q5_0,
VDR_Q5_0_Q8_1_MMVQ, vec_dot_q5_0_q8_1>( VDR_Q5_0_Q8_1_MMVQ, vec_dot_q5_0_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1, vx, vy, dst, ncols, nrows, item_ct1);
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
}); });
}); });
} }
@ -10502,12 +10495,8 @@ static void mul_mat_vec_q5_1_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{ {
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) { stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for( cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims), sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10515,8 +10504,7 @@ static void mul_mat_vec_q5_1_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] { [[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK5_1, QI5_1, block_q5_1, mul_mat_vec_q<QK5_1, QI5_1, block_q5_1,
VDR_Q5_1_Q8_1_MMVQ, vec_dot_q5_1_q8_1>( VDR_Q5_1_Q8_1_MMVQ, vec_dot_q5_1_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1, vx, vy, dst, ncols, nrows, item_ct1);
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
}); });
}); });
} }
@ -10531,12 +10519,8 @@ static void mul_mat_vec_q8_0_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{ {
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) { stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for( cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims), sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10544,8 +10528,7 @@ static void mul_mat_vec_q8_0_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] { [[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK8_0, QI8_0, block_q8_0, mul_mat_vec_q<QK8_0, QI8_0, block_q8_0,
VDR_Q8_0_Q8_1_MMVQ, vec_dot_q8_0_q8_1>( VDR_Q8_0_Q8_1_MMVQ, vec_dot_q8_0_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1, vx, vy, dst, ncols, nrows, item_ct1);
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
}); });
}); });
} }
@ -10560,12 +10543,8 @@ static void mul_mat_vec_q2_K_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{ {
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) { stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for( cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims), sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10573,8 +10552,7 @@ static void mul_mat_vec_q2_K_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] { [[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK_K, QI2_K, block_q2_K, mul_mat_vec_q<QK_K, QI2_K, block_q2_K,
VDR_Q2_K_Q8_1_MMVQ, vec_dot_q2_K_q8_1>( VDR_Q2_K_Q8_1_MMVQ, vec_dot_q2_K_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1, vx, vy, dst, ncols, nrows, item_ct1);
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
}); });
}); });
} }
@ -10589,12 +10567,8 @@ static void mul_mat_vec_q3_K_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{ {
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) { stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for( cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims), sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10602,8 +10576,7 @@ static void mul_mat_vec_q3_K_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] { [[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK_K, QI3_K, block_q3_K, mul_mat_vec_q<QK_K, QI3_K, block_q3_K,
VDR_Q3_K_Q8_1_MMVQ, vec_dot_q3_K_q8_1>( VDR_Q3_K_Q8_1_MMVQ, vec_dot_q3_K_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1, vx, vy, dst, ncols, nrows, item_ct1);
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
}); });
}); });
} }
@ -10618,12 +10591,8 @@ static void mul_mat_vec_q4_K_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{ {
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) { stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for( cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims), sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10631,8 +10600,7 @@ static void mul_mat_vec_q4_K_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] { [[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK_K, QI4_K, block_q4_K, mul_mat_vec_q<QK_K, QI4_K, block_q4_K,
VDR_Q4_K_Q8_1_MMVQ, vec_dot_q4_K_q8_1>( VDR_Q4_K_Q8_1_MMVQ, vec_dot_q4_K_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1, vx, vy, dst, ncols, nrows, item_ct1);
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
}); });
}); });
} }
@ -10647,12 +10615,8 @@ static void mul_mat_vec_q5_K_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{ {
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) { stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for( cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims), sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10660,8 +10624,7 @@ static void mul_mat_vec_q5_K_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] { [[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK_K, QI5_K, block_q5_K, mul_mat_vec_q<QK_K, QI5_K, block_q5_K,
VDR_Q5_K_Q8_1_MMVQ, vec_dot_q5_K_q8_1>( VDR_Q5_K_Q8_1_MMVQ, vec_dot_q5_K_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1, vx, vy, dst, ncols, nrows, item_ct1);
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
}); });
}); });
} }
@ -10676,12 +10639,8 @@ static void mul_mat_vec_q6_K_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{ {
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) { stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for( cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims), sycl::nd_range<3>(block_nums * block_dims, block_dims),
@ -10689,13 +10648,13 @@ static void mul_mat_vec_q6_K_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] { [[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK_K, QI6_K, block_q6_K, mul_mat_vec_q<QK_K, QI6_K, block_q6_K,
VDR_Q6_K_Q8_1_MMVQ, vec_dot_q6_K_q8_1>( VDR_Q6_K_Q8_1_MMVQ, vec_dot_q6_K_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1, vx, vy, dst, ncols, nrows, item_ct1);
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
}); });
}); });
} }
} }
#if defined(__gnu_linux__)
static void mul_mat_vec_iq2_xxs_q8_1_sycl(const void *vx, const void *vy, static void mul_mat_vec_iq2_xxs_q8_1_sycl(const void *vx, const void *vy,
float *dst, const int ncols, float *dst, const int ncols,
const int nrows, const int nrows,
@ -10838,6 +10797,7 @@ static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy,
}); });
} }
} }
#endif
static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy, static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy,
float *dst, const int ncols_x, float *dst, const int ncols_x,
@ -13670,6 +13630,7 @@ inline void ggml_sycl_op_mul_mat_vec_q(
case GGML_TYPE_Q6_K: case GGML_TYPE_Q6_K:
mul_mat_vec_q6_K_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); mul_mat_vec_q6_K_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream);
break; break;
#if defined(__gnu_linux__)
case GGML_TYPE_IQ2_XXS: case GGML_TYPE_IQ2_XXS:
mul_mat_vec_iq2_xxs_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); mul_mat_vec_iq2_xxs_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream);
break; break;
@ -13685,6 +13646,7 @@ inline void ggml_sycl_op_mul_mat_vec_q(
case GGML_TYPE_IQ1_S: case GGML_TYPE_IQ1_S:
mul_mat_vec_iq1_s_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); mul_mat_vec_iq1_s_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream);
break; break;
#endif
default: default:
GGML_ASSERT(false); GGML_ASSERT(false);
break; break;
@ -17065,6 +17027,12 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
a_type == GGML_TYPE_IQ4_XS) { a_type == GGML_TYPE_IQ4_XS) {
return false; return false;
} }
#if defined(__gnu_linux__)
if (a_type == GGML_TYPE_IQ2_XXS || a_type == GGML_TYPE_IQ2_XS ||
a_type == GGML_TYPE_IQ3_XXS || a_type == GGML_TYPE_IQ3_S ||
a_type == GGML_TYPE_IQ1_S) {
return false;
#endif
return true; return true;
} break; } break;
case GGML_OP_GET_ROWS: case GGML_OP_GET_ROWS: