sycl: Use syclcompat::dp4a (#10267)
* sycl: Use syclcompat::dp4a
* Using the syclcompat version allow the compiler to optimize the
operation with native function
* Update news section
* Update CI Windows oneAPI version to 2025.0
* Reword doc
* Call syclcompat::dp4a inside dpct::dp4a
This reverts commit 90cb61d692
.
This commit is contained in:
parent
1607a5e5b0
commit
5a54af4d4f
4 changed files with 9 additions and 27 deletions
|
@ -968,8 +968,8 @@ vec_dot_iq3_xxs_q8_1(const void *__restrict__ vbq,
|
|||
grid1[0] ^ signs[0], signs[0], std::minus<>());
|
||||
const int grid_h = dpct::vectorized_binary<sycl::uchar4>(
|
||||
grid2[0] ^ signs[1], signs[1], std::minus<>());
|
||||
sumi = dpct::dp4a(grid_l, *((int *)q8 + 0), sumi);
|
||||
sumi = dpct::dp4a(grid_h, *((int *)q8 + 1), sumi);
|
||||
sumi = dpct::dp4a(grid_l, *((const int *)q8 + 0), sumi);
|
||||
sumi = dpct::dp4a(grid_h, *((const int *)q8 + 1), sumi);
|
||||
q8 += 8;
|
||||
aux32 >>= 7;
|
||||
}
|
||||
|
@ -1009,8 +1009,8 @@ vec_dot_iq3_s_q8_1(const void *__restrict__ vbq,
|
|||
grid1[0] ^ signs0, signs0, std::minus<>());
|
||||
const int grid_h = dpct::vectorized_binary<sycl::uchar4>(
|
||||
grid2[0] ^ signs1, signs1, std::minus<>());
|
||||
sumi = dpct::dp4a(grid_l, *((int *)q8 + 0), sumi);
|
||||
sumi = dpct::dp4a(grid_h, *((int *)q8 + 1), sumi);
|
||||
sumi = dpct::dp4a(grid_l, *((const int *)q8 + 0), sumi);
|
||||
sumi = dpct::dp4a(grid_h, *((const int *)q8 + 1), sumi);
|
||||
q8 += 8;
|
||||
}
|
||||
const float d =
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue