array instead of global_memory
This commit is contained in:
parent
d2ecac551d
commit
d100b7511c
2 changed files with 29 additions and 76 deletions
|
@ -448,15 +448,13 @@ static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_
|
||||||
#define GGML_COMMON_IMPL
|
#define GGML_COMMON_IMPL
|
||||||
#elif defined(GGML_COMMON_IMPL_SYCL)
|
#elif defined(GGML_COMMON_IMPL_SYCL)
|
||||||
|
|
||||||
#if defined(__gnu_linux__) // workaround for windows
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
|
||||||
#define GGML_TABLE_BEGIN(type, name, size) static dpct::global_memory<const type, 1> name(sycl::range<1>(size), {
|
#define GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
|
||||||
#define GGML_TABLE_END() });
|
#define GGML_TABLE_END() };
|
||||||
|
|
||||||
#define GGML_COMMON_IMPL
|
#define GGML_COMMON_IMPL
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(GGML_COMMON_IMPL)
|
#if defined(GGML_COMMON_IMPL)
|
||||||
|
|
||||||
|
|
|
@ -4436,7 +4436,6 @@ static void dequantize_block_q6_K(const void * __restrict__ vx, dst_t * __restri
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__gnu_linux__)
|
|
||||||
template<typename dst_t>
|
template<typename dst_t>
|
||||||
static void dequantize_block_iq2_xxs(const void * __restrict__ vx, dst_t * __restrict__ yy,
|
static void dequantize_block_iq2_xxs(const void * __restrict__ vx, dst_t * __restrict__ yy,
|
||||||
const sycl::nd_item<3> &item_ct1,
|
const sycl::nd_item<3> &item_ct1,
|
||||||
|
@ -4581,7 +4580,6 @@ static void dequantize_block_iq1_s(const void * __restrict__ vx, dst_t * __restr
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
DPCT1110:4: The total declared local variable size in device function
|
DPCT1110:4: The total declared local variable size in device function
|
||||||
|
@ -9953,23 +9951,19 @@ static void dequantize_row_q6_K_sycl(const void *vx, dst_t *y, const int k,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#if defined(__gnu_linux__)
|
|
||||||
template <typename dst_t>
|
template <typename dst_t>
|
||||||
static void dequantize_row_iq2_xxs_sycl(const void *vx, dst_t *y, const int k,
|
static void dequantize_row_iq2_xxs_sycl(const void *vx, dst_t *y, const int k,
|
||||||
dpct::queue_ptr stream) {
|
dpct::queue_ptr stream) {
|
||||||
const int nb = k / QK_K;
|
const int nb = k / QK_K;
|
||||||
{
|
{
|
||||||
iq2xxs_grid.init(*stream);
|
|
||||||
ksigns_iq2xs.init(*stream);
|
|
||||||
kmask_iq2xs.init(*stream);
|
|
||||||
|
|
||||||
dpct::has_capability_or_fail(stream->get_device(),
|
dpct::has_capability_or_fail(stream->get_device(),
|
||||||
{sycl::aspect::fp16});
|
{sycl::aspect::fp16});
|
||||||
|
|
||||||
stream->submit([&](sycl::handler &cgh) {
|
stream->submit([&](sycl::handler &cgh) {
|
||||||
auto iq2xxs_grid_ptr_ct1 = iq2xxs_grid.get_ptr();
|
auto iq2xxs_grid_ptr_ct1 = &iq2xxs_grid[0];
|
||||||
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
|
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
|
||||||
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
|
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
|
||||||
|
|
||||||
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||||
sycl::range<3>(1, 1, 32),
|
sycl::range<3>(1, 1, 32),
|
||||||
|
@ -9988,17 +9982,14 @@ static void dequantize_row_iq2_xs_sycl(const void *vx, dst_t *y, const int k,
|
||||||
dpct::queue_ptr stream) {
|
dpct::queue_ptr stream) {
|
||||||
const int nb = k / QK_K;
|
const int nb = k / QK_K;
|
||||||
{
|
{
|
||||||
iq2xs_grid.init(*stream);
|
|
||||||
ksigns_iq2xs.init(*stream);
|
|
||||||
kmask_iq2xs.init(*stream);
|
|
||||||
|
|
||||||
dpct::has_capability_or_fail(stream->get_device(),
|
dpct::has_capability_or_fail(stream->get_device(),
|
||||||
{sycl::aspect::fp16});
|
{sycl::aspect::fp16});
|
||||||
|
|
||||||
stream->submit([&](sycl::handler &cgh) {
|
stream->submit([&](sycl::handler &cgh) {
|
||||||
auto iq2xs_grid_ptr_ct1 = iq2xs_grid.get_ptr();
|
auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
|
||||||
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
|
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
|
||||||
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
|
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
|
||||||
|
|
||||||
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||||
sycl::range<3>(1, 1, 32),
|
sycl::range<3>(1, 1, 32),
|
||||||
|
@ -10017,17 +10008,14 @@ static void dequantize_row_iq3_xxs_sycl(const void *vx, dst_t *y, const int k,
|
||||||
dpct::queue_ptr stream) {
|
dpct::queue_ptr stream) {
|
||||||
const int nb = k / QK_K;
|
const int nb = k / QK_K;
|
||||||
{
|
{
|
||||||
iq3xxs_grid.init(*stream);
|
|
||||||
ksigns_iq2xs.init(*stream);
|
|
||||||
kmask_iq2xs.init(*stream);
|
|
||||||
|
|
||||||
dpct::has_capability_or_fail(stream->get_device(),
|
dpct::has_capability_or_fail(stream->get_device(),
|
||||||
{sycl::aspect::fp16});
|
{sycl::aspect::fp16});
|
||||||
|
|
||||||
stream->submit([&](sycl::handler &cgh) {
|
stream->submit([&](sycl::handler &cgh) {
|
||||||
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
auto iq3xxs_grid_ptr_ct1 = &iq3xxs_grid[0];
|
||||||
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
|
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
|
||||||
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
|
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
|
||||||
|
|
||||||
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||||
sycl::range<3>(1, 1, 32),
|
sycl::range<3>(1, 1, 32),
|
||||||
|
@ -10046,17 +10034,14 @@ static void dequantize_row_iq3_s_sycl(const void *vx, dst_t *y, const int k,
|
||||||
dpct::queue_ptr stream) {
|
dpct::queue_ptr stream) {
|
||||||
const int nb = k / QK_K;
|
const int nb = k / QK_K;
|
||||||
{
|
{
|
||||||
iq3s_grid.init(*stream);
|
|
||||||
ksigns_iq2xs.init(*stream);
|
|
||||||
kmask_iq2xs.init(*stream);
|
|
||||||
|
|
||||||
dpct::has_capability_or_fail(stream->get_device(),
|
dpct::has_capability_or_fail(stream->get_device(),
|
||||||
{sycl::aspect::fp16});
|
{sycl::aspect::fp16});
|
||||||
|
|
||||||
stream->submit([&](sycl::handler &cgh) {
|
stream->submit([&](sycl::handler &cgh) {
|
||||||
auto iq3s_grid_ptr_ct1 = iq3s_grid.get_ptr();
|
auto iq3s_grid_ptr_ct1 = &iq3s_grid[0];
|
||||||
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
|
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
|
||||||
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
|
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
|
||||||
|
|
||||||
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||||
sycl::range<3>(1, 1, 32),
|
sycl::range<3>(1, 1, 32),
|
||||||
|
@ -10075,17 +10060,14 @@ static void dequantize_row_iq1_s_sycl(const void *vx, dst_t *y, const int k,
|
||||||
dpct::queue_ptr stream) {
|
dpct::queue_ptr stream) {
|
||||||
const int nb = k / QK_K;
|
const int nb = k / QK_K;
|
||||||
{
|
{
|
||||||
iq1s_grid_gpu.init(*stream);
|
|
||||||
ksigns_iq2xs.init(*stream);
|
|
||||||
kmask_iq2xs.init(*stream);
|
|
||||||
|
|
||||||
dpct::has_capability_or_fail(stream->get_device(),
|
dpct::has_capability_or_fail(stream->get_device(),
|
||||||
{sycl::aspect::fp16});
|
{sycl::aspect::fp16});
|
||||||
|
|
||||||
stream->submit([&](sycl::handler &cgh) {
|
stream->submit([&](sycl::handler &cgh) {
|
||||||
auto iq1s_grid_ptr_ct1 = iq1s_grid_gpu.get_ptr();
|
auto iq1s_grid_ptr_ct1 = &iq1s_grid_gpu[0];
|
||||||
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
|
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
|
||||||
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
|
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
|
||||||
|
|
||||||
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
|
||||||
sycl::range<3>(1, 1, 32),
|
sycl::range<3>(1, 1, 32),
|
||||||
|
@ -10098,7 +10080,6 @@ static void dequantize_row_iq1_s_sycl(const void *vx, dst_t *y, const int k,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
template <typename src_t, typename dst_t>
|
template <typename src_t, typename dst_t>
|
||||||
static void convert_unary_sycl(const void *__restrict__ vx,
|
static void convert_unary_sycl(const void *__restrict__ vx,
|
||||||
|
@ -10144,7 +10125,6 @@ static to_fp16_sycl_t ggml_get_to_fp16_sycl(ggml_type type) try {
|
||||||
return dequantize_row_q5_K_sycl;
|
return dequantize_row_q5_K_sycl;
|
||||||
case GGML_TYPE_Q6_K:
|
case GGML_TYPE_Q6_K:
|
||||||
return dequantize_row_q6_K_sycl;
|
return dequantize_row_q6_K_sycl;
|
||||||
#if defined(__gnu_linux__)
|
|
||||||
case GGML_TYPE_IQ2_XXS:
|
case GGML_TYPE_IQ2_XXS:
|
||||||
return dequantize_row_iq2_xxs_sycl;
|
return dequantize_row_iq2_xxs_sycl;
|
||||||
case GGML_TYPE_IQ2_XS:
|
case GGML_TYPE_IQ2_XS:
|
||||||
|
@ -10155,7 +10135,6 @@ static to_fp16_sycl_t ggml_get_to_fp16_sycl(ggml_type type) try {
|
||||||
return dequantize_row_iq3_s_sycl;
|
return dequantize_row_iq3_s_sycl;
|
||||||
case GGML_TYPE_IQ1_S:
|
case GGML_TYPE_IQ1_S:
|
||||||
return dequantize_row_iq1_s_sycl;
|
return dequantize_row_iq1_s_sycl;
|
||||||
#endif
|
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
return convert_unary_sycl<float>;
|
return convert_unary_sycl<float>;
|
||||||
default:
|
default:
|
||||||
|
@ -10190,7 +10169,6 @@ static to_fp32_sycl_t ggml_get_to_fp32_sycl(ggml_type type) {
|
||||||
return dequantize_row_q5_K_sycl;
|
return dequantize_row_q5_K_sycl;
|
||||||
case GGML_TYPE_Q6_K:
|
case GGML_TYPE_Q6_K:
|
||||||
return dequantize_row_q6_K_sycl;
|
return dequantize_row_q6_K_sycl;
|
||||||
#if defined(__gnu_linux__)
|
|
||||||
case GGML_TYPE_IQ2_XXS:
|
case GGML_TYPE_IQ2_XXS:
|
||||||
return dequantize_row_iq2_xxs_sycl;
|
return dequantize_row_iq2_xxs_sycl;
|
||||||
case GGML_TYPE_IQ2_XS:
|
case GGML_TYPE_IQ2_XS:
|
||||||
|
@ -10201,7 +10179,6 @@ static to_fp32_sycl_t ggml_get_to_fp32_sycl(ggml_type type) {
|
||||||
return dequantize_row_iq3_s_sycl;
|
return dequantize_row_iq3_s_sycl;
|
||||||
case GGML_TYPE_IQ1_S:
|
case GGML_TYPE_IQ1_S:
|
||||||
return dequantize_row_iq1_s_sycl;
|
return dequantize_row_iq1_s_sycl;
|
||||||
#endif
|
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
return convert_unary_sycl<sycl::half>;
|
return convert_unary_sycl<sycl::half>;
|
||||||
default:
|
default:
|
||||||
|
@ -10654,7 +10631,7 @@ static void mul_mat_vec_q6_K_q8_1_sycl(const void *vx, const void *vy,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__gnu_linux__)
|
|
||||||
static void mul_mat_vec_iq2_xxs_q8_1_sycl(const void *vx, const void *vy,
|
static void mul_mat_vec_iq2_xxs_q8_1_sycl(const void *vx, const void *vy,
|
||||||
float *dst, const int ncols,
|
float *dst, const int ncols,
|
||||||
const int nrows,
|
const int nrows,
|
||||||
|
@ -10664,15 +10641,11 @@ static void mul_mat_vec_iq2_xxs_q8_1_sycl(const void *vx, const void *vy,
|
||||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||||
{
|
{
|
||||||
iq2xxs_grid.init(*stream);
|
|
||||||
ksigns_iq2xs.init(*stream);
|
|
||||||
kmask_iq2xs.init(*stream);
|
|
||||||
|
|
||||||
|
|
||||||
stream->submit([&](sycl::handler &cgh) {
|
stream->submit([&](sycl::handler &cgh) {
|
||||||
auto iq2xxs_grid_ptr_ct1 = iq2xxs_grid.get_ptr();
|
auto iq2xxs_grid_ptr_ct1 = &iq2xxs_grid[0];
|
||||||
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
|
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
|
||||||
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
|
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
|
||||||
|
|
||||||
cgh.parallel_for(
|
cgh.parallel_for(
|
||||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||||
|
@ -10695,12 +10668,10 @@ static void mul_mat_vec_iq2_xs_q8_1_sycl(const void *vx, const void *vy,
|
||||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||||
{
|
{
|
||||||
iq2xs_grid.init(*stream);
|
|
||||||
ksigns64.init(*stream);
|
|
||||||
|
|
||||||
stream->submit([&](sycl::handler &cgh) {
|
stream->submit([&](sycl::handler &cgh) {
|
||||||
auto iq2xs_grid_ptr_ct1 = iq2xs_grid.get_ptr();
|
auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
|
||||||
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
auto ksigns64_ptr_ct1 = &ksigns64[0];
|
||||||
|
|
||||||
cgh.parallel_for(
|
cgh.parallel_for(
|
||||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||||
|
@ -10723,12 +10694,10 @@ static void mul_mat_vec_iq3_xxs_q8_1_sycl(const void *vx, const void *vy,
|
||||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||||
{
|
{
|
||||||
iq3xxs_grid.init(*stream);
|
|
||||||
ksigns64.init(*stream);
|
|
||||||
|
|
||||||
stream->submit([&](sycl::handler &cgh) {
|
stream->submit([&](sycl::handler &cgh) {
|
||||||
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
|
auto iq3xxs_grid_ptr_ct1 = &iq3xxs_grid[0];
|
||||||
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
auto ksigns64_ptr_ct1 = &ksigns64[0];
|
||||||
|
|
||||||
cgh.parallel_for(
|
cgh.parallel_for(
|
||||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||||
|
@ -10751,12 +10720,10 @@ static void mul_mat_vec_iq3_s_q8_1_sycl(const void *vx, const void *vy,
|
||||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||||
{
|
{
|
||||||
iq3s_grid.init(*stream);
|
|
||||||
ksigns64.init(*stream);
|
|
||||||
|
|
||||||
stream->submit([&](sycl::handler &cgh) {
|
stream->submit([&](sycl::handler &cgh) {
|
||||||
auto iq3s_grid_ptr_ct1 = iq3s_grid.get_ptr();
|
auto iq3s_grid_ptr_ct1 = &iq3s_grid[0];
|
||||||
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
auto ksigns64_ptr_ct1 = &ksigns64[0];
|
||||||
|
|
||||||
cgh.parallel_for(
|
cgh.parallel_for(
|
||||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||||
|
@ -10779,12 +10746,10 @@ static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy,
|
||||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||||
{
|
{
|
||||||
iq1s_grid_gpu.init(*stream);
|
|
||||||
ksigns64.init(*stream);
|
|
||||||
|
|
||||||
stream->submit([&](sycl::handler &cgh) {
|
stream->submit([&](sycl::handler &cgh) {
|
||||||
auto iq1s_grid_ptr_ct1 = iq1s_grid_gpu.get_ptr();
|
auto iq1s_grid_ptr_ct1 = &iq1s_grid_gpu[0];
|
||||||
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
auto ksigns64_ptr_ct1 = &ksigns64[0];
|
||||||
|
|
||||||
cgh.parallel_for(
|
cgh.parallel_for(
|
||||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||||
|
@ -10797,7 +10762,6 @@ static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy,
|
static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy,
|
||||||
float *dst, const int ncols_x,
|
float *dst, const int ncols_x,
|
||||||
|
@ -13630,7 +13594,6 @@ inline void ggml_sycl_op_mul_mat_vec_q(
|
||||||
case GGML_TYPE_Q6_K:
|
case GGML_TYPE_Q6_K:
|
||||||
mul_mat_vec_q6_K_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream);
|
mul_mat_vec_q6_K_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream);
|
||||||
break;
|
break;
|
||||||
#if defined(__gnu_linux__)
|
|
||||||
case GGML_TYPE_IQ2_XXS:
|
case GGML_TYPE_IQ2_XXS:
|
||||||
mul_mat_vec_iq2_xxs_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream);
|
mul_mat_vec_iq2_xxs_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream);
|
||||||
break;
|
break;
|
||||||
|
@ -13646,7 +13609,6 @@ inline void ggml_sycl_op_mul_mat_vec_q(
|
||||||
case GGML_TYPE_IQ1_S:
|
case GGML_TYPE_IQ1_S:
|
||||||
mul_mat_vec_iq1_s_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream);
|
mul_mat_vec_iq1_s_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream);
|
||||||
break;
|
break;
|
||||||
#endif
|
|
||||||
default:
|
default:
|
||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
break;
|
break;
|
||||||
|
@ -17027,13 +16989,6 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
|
||||||
a_type == GGML_TYPE_IQ4_XS) {
|
a_type == GGML_TYPE_IQ4_XS) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
#if defined(__gnu_linux__)
|
|
||||||
if (a_type == GGML_TYPE_IQ2_XXS || a_type == GGML_TYPE_IQ2_XS ||
|
|
||||||
a_type == GGML_TYPE_IQ3_XXS || a_type == GGML_TYPE_IQ3_S ||
|
|
||||||
a_type == GGML_TYPE_IQ1_S) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return true;
|
return true;
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_GET_ROWS:
|
case GGML_OP_GET_ROWS:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue