ggml : add ggml-common.h to deduplicate shared code (#5940)
* ggml : add ggml-common.h to shared code ggml-ci * scripts : update sync scripts * sycl : reuse quantum tables ggml-ci * ggml : minor * ggml : minor * sycl : try to fix build
This commit is contained in:
parent
9674aaf35c
commit
8a3012a4ad
10 changed files with 799 additions and 2536 deletions
384
ggml-sycl.cpp
384
ggml-sycl.cpp
|
@ -3144,6 +3144,8 @@ namespace dpct
|
|||
|
||||
} // COPY from DPCT head files
|
||||
|
||||
#define GGML_COMMON_IMPL_SYCL
|
||||
#include "ggml-common.h"
|
||||
|
||||
static int g_ggml_sycl_debug=0;
|
||||
#define GGML_SYCL_DEBUG(...) do{if(g_ggml_sycl_debug) printf(__VA_ARGS__);}while(0)
|
||||
|
@ -4745,388 +4747,6 @@ static void dequantize_block_q6_K(const void * __restrict__ vx, dst_t * __restri
|
|||
#endif
|
||||
}
|
||||
|
||||
static dpct::global_memory<const uint64_t, 1>
|
||||
iq2xxs_grid(sycl::range<1>(256),
|
||||
{
|
||||
0x0808080808080808, 0x080808080808082b, 0x0808080808081919,
|
||||
0x0808080808082b08, 0x0808080808082b2b, 0x0808080808190819,
|
||||
0x0808080808191908, 0x08080808082b0808, 0x08080808082b082b,
|
||||
0x08080808082b2b08, 0x08080808082b2b2b, 0x0808080819080819,
|
||||
0x0808080819081908, 0x0808080819190808, 0x0808080819192b08,
|
||||
0x08080808192b0819, 0x08080808192b1908, 0x080808082b080808,
|
||||
0x080808082b08082b, 0x080808082b082b2b, 0x080808082b2b082b,
|
||||
0x0808081908080819, 0x0808081908081908, 0x0808081908190808,
|
||||
0x0808081908191919, 0x0808081919080808, 0x080808192b081908,
|
||||
0x080808192b192b08, 0x0808082b08080808, 0x0808082b0808082b,
|
||||
0x0808082b082b082b, 0x0808082b2b08082b, 0x0808190808080819,
|
||||
0x0808190808081908, 0x0808190808190808, 0x08081908082b0819,
|
||||
0x08081908082b1908, 0x0808190819080808, 0x080819081908082b,
|
||||
0x0808190819082b08, 0x08081908192b0808, 0x080819082b080819,
|
||||
0x080819082b081908, 0x080819082b190808, 0x080819082b2b1908,
|
||||
0x0808191908080808, 0x080819190808082b, 0x0808191908082b08,
|
||||
0x08081919082b0808, 0x080819191908192b, 0x08081919192b2b19,
|
||||
0x080819192b080808, 0x080819192b190819, 0x0808192b08082b19,
|
||||
0x0808192b08190808, 0x0808192b19080808, 0x0808192b2b081908,
|
||||
0x0808192b2b2b1908, 0x08082b0808080808, 0x08082b0808081919,
|
||||
0x08082b0808082b08, 0x08082b0808191908, 0x08082b08082b2b08,
|
||||
0x08082b0819080819, 0x08082b0819081908, 0x08082b0819190808,
|
||||
0x08082b081919082b, 0x08082b082b082b08, 0x08082b1908081908,
|
||||
0x08082b1919080808, 0x08082b2b0808082b, 0x08082b2b08191908,
|
||||
0x0819080808080819, 0x0819080808081908, 0x0819080808190808,
|
||||
0x08190808082b0819, 0x0819080819080808, 0x08190808192b0808,
|
||||
0x081908082b081908, 0x081908082b190808, 0x081908082b191919,
|
||||
0x0819081908080808, 0x0819081908082b08, 0x08190819082b0808,
|
||||
0x0819081919190808, 0x0819081919192b2b, 0x081908192b080808,
|
||||
0x0819082b082b1908, 0x0819082b19081919, 0x0819190808080808,
|
||||
0x0819190808082b08, 0x08191908082b0808, 0x08191908082b1919,
|
||||
0x0819190819082b19, 0x081919082b080808, 0x0819191908192b08,
|
||||
0x08191919192b082b, 0x0819192b08080808, 0x0819192b0819192b,
|
||||
0x08192b0808080819, 0x08192b0808081908, 0x08192b0808190808,
|
||||
0x08192b0819080808, 0x08192b082b080819, 0x08192b1908080808,
|
||||
0x08192b1908081919, 0x08192b192b2b0808, 0x08192b2b19190819,
|
||||
0x082b080808080808, 0x082b08080808082b, 0x082b080808082b2b,
|
||||
0x082b080819081908, 0x082b0808192b0819, 0x082b08082b080808,
|
||||
0x082b08082b08082b, 0x082b0819082b2b19, 0x082b081919082b08,
|
||||
0x082b082b08080808, 0x082b082b0808082b, 0x082b190808080819,
|
||||
0x082b190808081908, 0x082b190808190808, 0x082b190819080808,
|
||||
0x082b19081919192b, 0x082b191908080808, 0x082b191919080819,
|
||||
0x082b1919192b1908, 0x082b192b2b190808, 0x082b2b0808082b08,
|
||||
0x082b2b08082b0808, 0x082b2b082b191908, 0x082b2b2b19081908,
|
||||
0x1908080808080819, 0x1908080808081908, 0x1908080808190808,
|
||||
0x1908080808192b08, 0x19080808082b0819, 0x19080808082b1908,
|
||||
0x1908080819080808, 0x1908080819082b08, 0x190808081919192b,
|
||||
0x19080808192b0808, 0x190808082b080819, 0x190808082b081908,
|
||||
0x190808082b190808, 0x1908081908080808, 0x19080819082b0808,
|
||||
0x19080819192b0819, 0x190808192b080808, 0x190808192b081919,
|
||||
0x1908082b08080819, 0x1908082b08190808, 0x1908082b19082b08,
|
||||
0x1908082b1919192b, 0x1908082b192b2b08, 0x1908190808080808,
|
||||
0x1908190808082b08, 0x19081908082b0808, 0x190819082b080808,
|
||||
0x190819082b192b19, 0x190819190819082b, 0x19081919082b1908,
|
||||
0x1908192b08080808, 0x19082b0808080819, 0x19082b0808081908,
|
||||
0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919,
|
||||
0x19082b1908080808, 0x19082b1919192b08, 0x19082b19192b0819,
|
||||
0x19082b192b08082b, 0x19082b2b19081919, 0x19082b2b2b190808,
|
||||
0x1919080808080808, 0x1919080808082b08, 0x1919080808190819,
|
||||
0x1919080808192b19, 0x19190808082b0808, 0x191908082b080808,
|
||||
0x191908082b082b08, 0x1919081908081908, 0x191908191908082b,
|
||||
0x191908192b2b1908, 0x1919082b2b190819, 0x191919082b190808,
|
||||
0x191919082b19082b, 0x1919191908082b2b, 0x1919192b08080819,
|
||||
0x1919192b19191908, 0x19192b0808080808, 0x19192b0808190819,
|
||||
0x19192b0808192b19, 0x19192b08192b1908, 0x19192b1919080808,
|
||||
0x19192b2b08082b08, 0x192b080808081908, 0x192b080808190808,
|
||||
0x192b080819080808, 0x192b0808192b2b08, 0x192b081908080808,
|
||||
0x192b081919191919, 0x192b082b08192b08, 0x192b082b192b0808,
|
||||
0x192b190808080808, 0x192b190808081919, 0x192b191908190808,
|
||||
0x192b19190819082b, 0x192b19192b081908, 0x192b2b081908082b,
|
||||
0x2b08080808080808, 0x2b0808080808082b, 0x2b08080808082b2b,
|
||||
0x2b08080819080819, 0x2b0808082b08082b, 0x2b08081908081908,
|
||||
0x2b08081908192b08, 0x2b08081919080808, 0x2b08082b08190819,
|
||||
0x2b08190808080819, 0x2b08190808081908, 0x2b08190808190808,
|
||||
0x2b08190808191919, 0x2b08190819080808, 0x2b081908192b0808,
|
||||
0x2b08191908080808, 0x2b0819191908192b, 0x2b0819192b191908,
|
||||
0x2b08192b08082b19, 0x2b08192b19080808, 0x2b08192b192b0808,
|
||||
0x2b082b080808082b, 0x2b082b1908081908, 0x2b082b2b08190819,
|
||||
0x2b19080808081908, 0x2b19080808190808, 0x2b190808082b1908,
|
||||
0x2b19080819080808, 0x2b1908082b2b0819, 0x2b1908190819192b,
|
||||
0x2b1908192b080808, 0x2b19082b19081919, 0x2b19190808080808,
|
||||
0x2b191908082b082b, 0x2b19190819081908, 0x2b19191919190819,
|
||||
0x2b192b082b080819, 0x2b192b19082b0808, 0x2b2b08080808082b,
|
||||
0x2b2b080819190808, 0x2b2b08082b081919, 0x2b2b081908082b19,
|
||||
0x2b2b082b08080808, 0x2b2b190808192b08, 0x2b2b2b0819190808,
|
||||
0x2b2b2b1908081908,
|
||||
});
|
||||
|
||||
static dpct::global_memory<const uint64_t, 1>
|
||||
iq2xs_grid(sycl::range<1>(512),
|
||||
{
|
||||
0x0808080808080808, 0x080808080808082b, 0x0808080808081919,
|
||||
0x0808080808082b08, 0x0808080808082b2b, 0x0808080808190819,
|
||||
0x0808080808191908, 0x080808080819192b, 0x0808080808192b19,
|
||||
0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
|
||||
0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908,
|
||||
0x080808081908192b, 0x0808080819082b19, 0x0808080819190808,
|
||||
0x080808081919082b, 0x0808080819191919, 0x0808080819192b08,
|
||||
0x08080808192b0819, 0x08080808192b1908, 0x080808082b080808,
|
||||
0x080808082b08082b, 0x080808082b081919, 0x080808082b082b08,
|
||||
0x080808082b190819, 0x080808082b191908, 0x080808082b192b19,
|
||||
0x080808082b2b0808, 0x0808081908080819, 0x0808081908081908,
|
||||
0x080808190808192b, 0x0808081908082b19, 0x0808081908190808,
|
||||
0x080808190819082b, 0x0808081908191919, 0x0808081908192b08,
|
||||
0x0808081908192b2b, 0x08080819082b0819, 0x08080819082b1908,
|
||||
0x0808081919080808, 0x080808191908082b, 0x0808081919081919,
|
||||
0x0808081919082b08, 0x0808081919190819, 0x0808081919191908,
|
||||
0x08080819192b0808, 0x08080819192b2b08, 0x080808192b080819,
|
||||
0x080808192b081908, 0x080808192b190808, 0x0808082b08080808,
|
||||
0x0808082b0808082b, 0x0808082b08081919, 0x0808082b08082b08,
|
||||
0x0808082b08190819, 0x0808082b08191908, 0x0808082b082b0808,
|
||||
0x0808082b19080819, 0x0808082b19081908, 0x0808082b19190808,
|
||||
0x0808082b19191919, 0x0808082b2b080808, 0x0808082b2b082b2b,
|
||||
0x0808190808080819, 0x0808190808081908, 0x080819080808192b,
|
||||
0x0808190808082b19, 0x0808190808190808, 0x080819080819082b,
|
||||
0x0808190808191919, 0x0808190808192b08, 0x08081908082b0819,
|
||||
0x08081908082b1908, 0x0808190819080808, 0x080819081908082b,
|
||||
0x0808190819081919, 0x0808190819082b08, 0x0808190819190819,
|
||||
0x0808190819191908, 0x080819081919192b, 0x08081908192b0808,
|
||||
0x080819082b080819, 0x080819082b081908, 0x080819082b190808,
|
||||
0x0808191908080808, 0x080819190808082b, 0x0808191908081919,
|
||||
0x0808191908082b08, 0x0808191908190819, 0x0808191908191908,
|
||||
0x08081919082b0808, 0x0808191919080819, 0x0808191919081908,
|
||||
0x0808191919190808, 0x08081919192b0819, 0x080819192b080808,
|
||||
0x0808192b08080819, 0x0808192b08081908, 0x0808192b08190808,
|
||||
0x0808192b082b192b, 0x0808192b19080808, 0x0808192b1908082b,
|
||||
0x0808192b2b081908, 0x08082b0808080808, 0x08082b080808082b,
|
||||
0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808082b2b,
|
||||
0x08082b0808190819, 0x08082b0808191908, 0x08082b08082b0808,
|
||||
0x08082b08082b1919, 0x08082b0819080819, 0x08082b0819081908,
|
||||
0x08082b0819190808, 0x08082b0819192b08, 0x08082b082b080808,
|
||||
0x08082b082b2b0808, 0x08082b082b2b2b2b, 0x08082b1908080819,
|
||||
0x08082b1908081908, 0x08082b1908190808, 0x08082b1919080808,
|
||||
0x08082b192b080819, 0x08082b192b082b19, 0x08082b2b08080808,
|
||||
0x08082b2b082b0808, 0x08082b2b082b2b08, 0x08082b2b2b19192b,
|
||||
0x08082b2b2b2b0808, 0x0819080808080819, 0x0819080808081908,
|
||||
0x081908080808192b, 0x0819080808082b19, 0x0819080808190808,
|
||||
0x081908080819082b, 0x0819080808191919, 0x0819080808192b08,
|
||||
0x08190808082b0819, 0x08190808082b1908, 0x0819080819080808,
|
||||
0x081908081908082b, 0x0819080819081919, 0x0819080819082b08,
|
||||
0x0819080819190819, 0x0819080819191908, 0x08190808192b0808,
|
||||
0x08190808192b2b2b, 0x081908082b080819, 0x081908082b081908,
|
||||
0x081908082b190808, 0x0819081908080808, 0x081908190808082b,
|
||||
0x0819081908081919, 0x0819081908082b08, 0x0819081908190819,
|
||||
0x0819081908191908, 0x08190819082b0808, 0x0819081919080819,
|
||||
0x0819081919081908, 0x0819081919190808, 0x081908192b080808,
|
||||
0x081908192b191908, 0x081908192b19192b, 0x0819082b08080819,
|
||||
0x0819082b08081908, 0x0819082b0808192b, 0x0819082b08190808,
|
||||
0x0819082b19080808, 0x0819082b192b0808, 0x0819190808080808,
|
||||
0x081919080808082b, 0x0819190808081919, 0x0819190808082b08,
|
||||
0x0819190808190819, 0x0819190808191908, 0x08191908082b0808,
|
||||
0x0819190819080819, 0x0819190819081908, 0x0819190819082b19,
|
||||
0x0819190819190808, 0x08191908192b1908, 0x081919082b080808,
|
||||
0x0819191908080819, 0x0819191908081908, 0x0819191908190808,
|
||||
0x0819191919080808, 0x0819192b08080808, 0x0819192b08191908,
|
||||
0x0819192b19082b19, 0x08192b0808080819, 0x08192b0808081908,
|
||||
0x08192b0808190808, 0x08192b080819082b, 0x08192b0819080808,
|
||||
0x08192b0819191908, 0x08192b082b08192b, 0x08192b1908080808,
|
||||
0x08192b1908081919, 0x08192b19192b192b, 0x08192b2b19190819,
|
||||
0x08192b2b2b2b2b19, 0x082b080808080808, 0x082b08080808082b,
|
||||
0x082b080808081919, 0x082b080808082b08, 0x082b080808082b2b,
|
||||
0x082b080808190819, 0x082b080808191908, 0x082b0808082b0808,
|
||||
0x082b080819080819, 0x082b080819081908, 0x082b080819190808,
|
||||
0x082b08082b080808, 0x082b08082b2b0808, 0x082b081908080819,
|
||||
0x082b081908081908, 0x082b081908190808, 0x082b081919080808,
|
||||
0x082b081919082b08, 0x082b0819192b1919, 0x082b082b08080808,
|
||||
0x082b082b082b082b, 0x082b082b2b080808, 0x082b082b2b2b2b08,
|
||||
0x082b190808080819, 0x082b190808081908, 0x082b190808190808,
|
||||
0x082b1908082b2b19, 0x082b190819080808, 0x082b191908080808,
|
||||
0x082b191919080819, 0x082b19191919082b, 0x082b19192b192b19,
|
||||
0x082b192b08080819, 0x082b192b08192b2b, 0x082b192b2b2b192b,
|
||||
0x082b2b0808080808, 0x082b2b0808082b08, 0x082b2b0808082b2b,
|
||||
0x082b2b08082b0808, 0x082b2b0819191919, 0x082b2b082b082b08,
|
||||
0x082b2b082b2b082b, 0x082b2b19192b2b08, 0x082b2b192b190808,
|
||||
0x082b2b2b08082b08, 0x082b2b2b082b0808, 0x082b2b2b2b08082b,
|
||||
0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819,
|
||||
0x1908080808081908, 0x190808080808192b, 0x1908080808082b19,
|
||||
0x1908080808190808, 0x190808080819082b, 0x1908080808191919,
|
||||
0x1908080808192b08, 0x19080808082b0819, 0x19080808082b1908,
|
||||
0x1908080819080808, 0x190808081908082b, 0x1908080819081919,
|
||||
0x1908080819082b08, 0x1908080819082b2b, 0x1908080819190819,
|
||||
0x1908080819191908, 0x19080808192b0808, 0x19080808192b1919,
|
||||
0x190808082b080819, 0x190808082b081908, 0x190808082b190808,
|
||||
0x1908081908080808, 0x190808190808082b, 0x1908081908081919,
|
||||
0x1908081908082b08, 0x1908081908190819, 0x1908081908191908,
|
||||
0x19080819082b0808, 0x1908081919080819, 0x1908081919081908,
|
||||
0x1908081919190808, 0x190808192b080808, 0x190808192b081919,
|
||||
0x190808192b2b082b, 0x1908082b08080819, 0x1908082b08081908,
|
||||
0x1908082b08190808, 0x1908082b0819082b, 0x1908082b082b2b19,
|
||||
0x1908082b19080808, 0x1908190808080808, 0x190819080808082b,
|
||||
0x1908190808081919, 0x1908190808082b08, 0x1908190808190819,
|
||||
0x1908190808191908, 0x1908190808192b19, 0x19081908082b0808,
|
||||
0x1908190819080819, 0x1908190819081908, 0x1908190819190808,
|
||||
0x190819082b080808, 0x190819082b191908, 0x1908191908080819,
|
||||
0x1908191908081908, 0x1908191908190808, 0x19081919082b1908,
|
||||
0x1908191919080808, 0x190819192b192b2b, 0x1908192b08080808,
|
||||
0x1908192b08082b2b, 0x1908192b19081908, 0x1908192b19190808,
|
||||
0x19082b0808080819, 0x19082b0808081908, 0x19082b0808190808,
|
||||
0x19082b0819080808, 0x19082b0819081919, 0x19082b0819191908,
|
||||
0x19082b08192b082b, 0x19082b1908080808, 0x19082b1908190819,
|
||||
0x19082b1919081908, 0x19082b1919190808, 0x19082b19192b2b19,
|
||||
0x19082b2b08081908, 0x1919080808080808, 0x191908080808082b,
|
||||
0x1919080808081919, 0x1919080808082b08, 0x1919080808190819,
|
||||
0x1919080808191908, 0x19190808082b0808, 0x19190808082b2b08,
|
||||
0x1919080819080819, 0x1919080819081908, 0x1919080819190808,
|
||||
0x191908082b080808, 0x1919081908080819, 0x1919081908081908,
|
||||
0x1919081908190808, 0x1919081908191919, 0x1919081919080808,
|
||||
0x191908191908082b, 0x1919082b08080808, 0x1919082b19081908,
|
||||
0x1919082b2b2b2b2b, 0x1919190808080819, 0x1919190808081908,
|
||||
0x1919190808190808, 0x19191908082b0819, 0x1919190819080808,
|
||||
0x19191908192b0808, 0x191919082b080819, 0x191919082b2b0819,
|
||||
0x1919191908080808, 0x1919191908082b08, 0x191919192b080808,
|
||||
0x191919192b082b08, 0x1919192b082b0819, 0x1919192b192b2b08,
|
||||
0x1919192b2b2b0819, 0x19192b0808080808, 0x19192b0808191908,
|
||||
0x19192b0819080819, 0x19192b0819190808, 0x19192b082b192b19,
|
||||
0x19192b1908192b2b, 0x19192b1919080808, 0x19192b191908082b,
|
||||
0x19192b2b2b081919, 0x192b080808080819, 0x192b080808081908,
|
||||
0x192b080808190808, 0x192b080819080808, 0x192b080819191908,
|
||||
0x192b0808192b082b, 0x192b08082b08192b, 0x192b08082b2b2b19,
|
||||
0x192b081908080808, 0x192b082b082b1908, 0x192b082b19082b2b,
|
||||
0x192b082b2b19082b, 0x192b190808080808, 0x192b19080819192b,
|
||||
0x192b191908190808, 0x192b191919080808, 0x192b191919081919,
|
||||
0x192b19192b2b1908, 0x192b2b0808080819, 0x192b2b08192b2b2b,
|
||||
0x192b2b19082b1919, 0x192b2b2b0808192b, 0x192b2b2b19191908,
|
||||
0x192b2b2b192b082b, 0x2b08080808080808, 0x2b0808080808082b,
|
||||
0x2b08080808081919, 0x2b08080808082b08, 0x2b08080808190819,
|
||||
0x2b08080808191908, 0x2b080808082b0808, 0x2b080808082b2b2b,
|
||||
0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808,
|
||||
0x2b0808082b080808, 0x2b0808082b08082b, 0x2b0808082b2b2b08,
|
||||
0x2b0808082b2b2b2b, 0x2b08081908080819, 0x2b08081908081908,
|
||||
0x2b0808190808192b, 0x2b08081908190808, 0x2b08081919080808,
|
||||
0x2b08081919190819, 0x2b08081919192b19, 0x2b08082b08080808,
|
||||
0x2b08082b082b0808, 0x2b08082b2b080808, 0x2b08082b2b08082b,
|
||||
0x2b08082b2b2b0808, 0x2b08082b2b2b2b08, 0x2b08190808080819,
|
||||
0x2b08190808081908, 0x2b08190808190808, 0x2b0819080819082b,
|
||||
0x2b08190808191919, 0x2b08190819080808, 0x2b081908192b0808,
|
||||
0x2b0819082b082b19, 0x2b08191908080808, 0x2b08191919081908,
|
||||
0x2b0819192b2b1919, 0x2b08192b08192b08, 0x2b08192b192b2b2b,
|
||||
0x2b082b0808080808, 0x2b082b0808082b08, 0x2b082b08082b1919,
|
||||
0x2b082b0819192b2b, 0x2b082b082b080808, 0x2b082b082b08082b,
|
||||
0x2b082b082b2b2b08, 0x2b082b190808192b, 0x2b082b2b082b082b,
|
||||
0x2b082b2b2b080808, 0x2b082b2b2b082b08, 0x2b082b2b2b19192b,
|
||||
0x2b082b2b2b2b2b08, 0x2b19080808080819, 0x2b19080808081908,
|
||||
0x2b19080808190808, 0x2b19080819080808, 0x2b1908081919192b,
|
||||
0x2b1908082b081908, 0x2b19081908080808, 0x2b190819082b082b,
|
||||
0x2b190819192b1908, 0x2b19082b1919192b, 0x2b19082b2b082b19,
|
||||
0x2b19190808080808, 0x2b19190808081919, 0x2b19190819081908,
|
||||
0x2b19190819190808, 0x2b19190819192b08, 0x2b191919082b2b19,
|
||||
0x2b1919192b190808, 0x2b1919192b19082b, 0x2b19192b19080819,
|
||||
0x2b192b0819190819, 0x2b192b082b2b192b, 0x2b192b1919082b19,
|
||||
0x2b192b2b08191919, 0x2b192b2b192b0808, 0x2b2b080808080808,
|
||||
0x2b2b08080808082b, 0x2b2b080808082b08, 0x2b2b080808082b2b,
|
||||
0x2b2b0808082b0808, 0x2b2b0808082b2b2b, 0x2b2b08082b2b0808,
|
||||
0x2b2b081919190819, 0x2b2b081919192b19, 0x2b2b08192b2b192b,
|
||||
0x2b2b082b08080808, 0x2b2b082b0808082b, 0x2b2b082b08082b08,
|
||||
0x2b2b082b082b2b2b, 0x2b2b082b2b080808, 0x2b2b082b2b2b0808,
|
||||
0x2b2b190819080808, 0x2b2b19082b191919, 0x2b2b192b192b1919,
|
||||
0x2b2b192b2b192b08, 0x2b2b2b0808082b2b, 0x2b2b2b08082b0808,
|
||||
0x2b2b2b08082b082b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b0808,
|
||||
0x2b2b2b082b2b2b08, 0x2b2b2b1908081908, 0x2b2b2b192b081908,
|
||||
0x2b2b2b192b08192b, 0x2b2b2b2b082b2b08, 0x2b2b2b2b082b2b2b,
|
||||
0x2b2b2b2b2b190819, 0x2b2b2b2b2b2b2b2b,
|
||||
});
|
||||
|
||||
static dpct::global_memory<const uint32_t, 1> iq3xxs_grid(
|
||||
sycl::range<1>(256),
|
||||
{
|
||||
0x04040404, 0x04040414, 0x04040424, 0x04040c0c, 0x04040c1c, 0x04040c3e,
|
||||
0x04041404, 0x04041414, 0x04041c0c, 0x04042414, 0x04043e1c, 0x04043e2c,
|
||||
0x040c040c, 0x040c041c, 0x040c0c04, 0x040c0c14, 0x040c140c, 0x040c142c,
|
||||
0x040c1c04, 0x040c1c14, 0x040c240c, 0x040c2c24, 0x040c3e04, 0x04140404,
|
||||
0x04140414, 0x04140424, 0x04140c0c, 0x04141404, 0x04141414, 0x04141c0c,
|
||||
0x04141c1c, 0x04141c3e, 0x04142c0c, 0x04142c3e, 0x04143e2c, 0x041c040c,
|
||||
0x041c043e, 0x041c0c04, 0x041c0c14, 0x041c142c, 0x041c3e04, 0x04240c1c,
|
||||
0x04241c3e, 0x04242424, 0x04242c3e, 0x04243e1c, 0x04243e2c, 0x042c040c,
|
||||
0x042c043e, 0x042c1c14, 0x042c2c14, 0x04341c2c, 0x04343424, 0x043e0c04,
|
||||
0x043e0c24, 0x043e0c34, 0x043e241c, 0x043e340c, 0x0c04040c, 0x0c04041c,
|
||||
0x0c040c04, 0x0c040c14, 0x0c04140c, 0x0c04141c, 0x0c041c04, 0x0c041c14,
|
||||
0x0c041c24, 0x0c04243e, 0x0c042c04, 0x0c0c0404, 0x0c0c0414, 0x0c0c0c0c,
|
||||
0x0c0c1404, 0x0c0c1414, 0x0c14040c, 0x0c14041c, 0x0c140c04, 0x0c140c14,
|
||||
0x0c14140c, 0x0c141c04, 0x0c143e14, 0x0c1c0404, 0x0c1c0414, 0x0c1c1404,
|
||||
0x0c1c1c0c, 0x0c1c2434, 0x0c1c3434, 0x0c24040c, 0x0c24042c, 0x0c242c04,
|
||||
0x0c2c1404, 0x0c2c1424, 0x0c2c2434, 0x0c2c3e0c, 0x0c34042c, 0x0c3e1414,
|
||||
0x0c3e2404, 0x14040404, 0x14040414, 0x14040c0c, 0x14040c1c, 0x14041404,
|
||||
0x14041414, 0x14041434, 0x14041c0c, 0x14042414, 0x140c040c, 0x140c041c,
|
||||
0x140c042c, 0x140c0c04, 0x140c0c14, 0x140c140c, 0x140c1c04, 0x140c341c,
|
||||
0x140c343e, 0x140c3e04, 0x14140404, 0x14140414, 0x14140c0c, 0x14140c3e,
|
||||
0x14141404, 0x14141414, 0x14141c3e, 0x14142404, 0x14142c2c, 0x141c040c,
|
||||
0x141c0c04, 0x141c0c24, 0x141c3e04, 0x141c3e24, 0x14241c2c, 0x14242c1c,
|
||||
0x142c041c, 0x142c143e, 0x142c240c, 0x142c3e24, 0x143e040c, 0x143e041c,
|
||||
0x143e0c34, 0x143e242c, 0x1c04040c, 0x1c040c04, 0x1c040c14, 0x1c04140c,
|
||||
0x1c04141c, 0x1c042c04, 0x1c04342c, 0x1c043e14, 0x1c0c0404, 0x1c0c0414,
|
||||
0x1c0c1404, 0x1c0c1c0c, 0x1c0c2424, 0x1c0c2434, 0x1c14040c, 0x1c14041c,
|
||||
0x1c140c04, 0x1c14142c, 0x1c142c14, 0x1c143e14, 0x1c1c0c0c, 0x1c1c1c1c,
|
||||
0x1c241c04, 0x1c24243e, 0x1c243e14, 0x1c2c0404, 0x1c2c0434, 0x1c2c1414,
|
||||
0x1c2c2c2c, 0x1c340c24, 0x1c341c34, 0x1c34341c, 0x1c3e1c1c, 0x1c3e3404,
|
||||
0x24040424, 0x24040c3e, 0x24041c2c, 0x24041c3e, 0x24042c1c, 0x24042c3e,
|
||||
0x240c3e24, 0x24141404, 0x24141c3e, 0x24142404, 0x24143404, 0x24143434,
|
||||
0x241c043e, 0x241c242c, 0x24240424, 0x24242c0c, 0x24243424, 0x242c142c,
|
||||
0x242c241c, 0x242c3e04, 0x243e042c, 0x243e0c04, 0x243e0c14, 0x243e1c04,
|
||||
0x2c040c14, 0x2c04240c, 0x2c043e04, 0x2c0c0404, 0x2c0c0434, 0x2c0c1434,
|
||||
0x2c0c2c2c, 0x2c140c24, 0x2c141c14, 0x2c143e14, 0x2c1c0414, 0x2c1c2c1c,
|
||||
0x2c240c04, 0x2c24141c, 0x2c24143e, 0x2c243e14, 0x2c2c0414, 0x2c2c1c0c,
|
||||
0x2c342c04, 0x2c3e1424, 0x2c3e2414, 0x34041424, 0x34042424, 0x34042434,
|
||||
0x34043424, 0x340c140c, 0x340c340c, 0x34140c3e, 0x34143424, 0x341c1c04,
|
||||
0x341c1c34, 0x34242424, 0x342c042c, 0x342c2c14, 0x34341c1c, 0x343e041c,
|
||||
0x343e140c, 0x3e04041c, 0x3e04042c, 0x3e04043e, 0x3e040c04, 0x3e041c14,
|
||||
0x3e042c14, 0x3e0c1434, 0x3e0c2404, 0x3e140c14, 0x3e14242c, 0x3e142c14,
|
||||
0x3e1c0404, 0x3e1c0c2c, 0x3e1c1c1c, 0x3e1c3404, 0x3e24140c, 0x3e24240c,
|
||||
0x3e2c0404, 0x3e2c0414, 0x3e2c1424, 0x3e341c04,
|
||||
});
|
||||
|
||||
static dpct::global_memory<const uint8_t, 1> ksigns_iq2xs(
|
||||
sycl::range<1>(128),
|
||||
{
|
||||
0, 129, 130, 3, 132, 5, 6, 135, 136, 9, 10, 139, 12,
|
||||
141, 142, 15, 144, 17, 18, 147, 20, 149, 150, 23, 24, 153,
|
||||
154, 27, 156, 29, 30, 159, 160, 33, 34, 163, 36, 165, 166,
|
||||
39, 40, 169, 170, 43, 172, 45, 46, 175, 48, 177, 178, 51,
|
||||
180, 53, 54, 183, 184, 57, 58, 187, 60, 189, 190, 63, 192,
|
||||
65, 66, 195, 68, 197, 198, 71, 72, 201, 202, 75, 204, 77,
|
||||
78, 207, 80, 209, 210, 83, 212, 85, 86, 215, 216, 89, 90,
|
||||
219, 92, 221, 222, 95, 96, 225, 226, 99, 228, 101, 102, 231,
|
||||
232, 105, 106, 235, 108, 237, 238, 111, 240, 113, 114, 243, 116,
|
||||
245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
|
||||
});
|
||||
|
||||
static dpct::global_memory<const uint64_t, 1>
|
||||
ksigns64(sycl::range<1>(128),
|
||||
{
|
||||
0x0000000000000000, 0xff000000000000ff, 0xff0000000000ff00,
|
||||
0x000000000000ffff, 0xff00000000ff0000, 0x0000000000ff00ff,
|
||||
0x0000000000ffff00, 0xff00000000ffffff, 0xff000000ff000000,
|
||||
0x00000000ff0000ff, 0x00000000ff00ff00, 0xff000000ff00ffff,
|
||||
0x00000000ffff0000, 0xff000000ffff00ff, 0xff000000ffffff00,
|
||||
0x00000000ffffffff, 0xff0000ff00000000, 0x000000ff000000ff,
|
||||
0x000000ff0000ff00, 0xff0000ff0000ffff, 0x000000ff00ff0000,
|
||||
0xff0000ff00ff00ff, 0xff0000ff00ffff00, 0x000000ff00ffffff,
|
||||
0x000000ffff000000, 0xff0000ffff0000ff, 0xff0000ffff00ff00,
|
||||
0x000000ffff00ffff, 0xff0000ffffff0000, 0x000000ffffff00ff,
|
||||
0x000000ffffffff00, 0xff0000ffffffffff, 0xff00ff0000000000,
|
||||
0x0000ff00000000ff, 0x0000ff000000ff00, 0xff00ff000000ffff,
|
||||
0x0000ff0000ff0000, 0xff00ff0000ff00ff, 0xff00ff0000ffff00,
|
||||
0x0000ff0000ffffff, 0x0000ff00ff000000, 0xff00ff00ff0000ff,
|
||||
0xff00ff00ff00ff00, 0x0000ff00ff00ffff, 0xff00ff00ffff0000,
|
||||
0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0xff00ff00ffffffff,
|
||||
0x0000ffff00000000, 0xff00ffff000000ff, 0xff00ffff0000ff00,
|
||||
0x0000ffff0000ffff, 0xff00ffff00ff0000, 0x0000ffff00ff00ff,
|
||||
0x0000ffff00ffff00, 0xff00ffff00ffffff, 0xff00ffffff000000,
|
||||
0x0000ffffff0000ff, 0x0000ffffff00ff00, 0xff00ffffff00ffff,
|
||||
0x0000ffffffff0000, 0xff00ffffffff00ff, 0xff00ffffffffff00,
|
||||
0x0000ffffffffffff, 0xffff000000000000, 0x00ff0000000000ff,
|
||||
0x00ff00000000ff00, 0xffff00000000ffff, 0x00ff000000ff0000,
|
||||
0xffff000000ff00ff, 0xffff000000ffff00, 0x00ff000000ffffff,
|
||||
0x00ff0000ff000000, 0xffff0000ff0000ff, 0xffff0000ff00ff00,
|
||||
0x00ff0000ff00ffff, 0xffff0000ffff0000, 0x00ff0000ffff00ff,
|
||||
0x00ff0000ffffff00, 0xffff0000ffffffff, 0x00ff00ff00000000,
|
||||
0xffff00ff000000ff, 0xffff00ff0000ff00, 0x00ff00ff0000ffff,
|
||||
0xffff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00,
|
||||
0xffff00ff00ffffff, 0xffff00ffff000000, 0x00ff00ffff0000ff,
|
||||
0x00ff00ffff00ff00, 0xffff00ffff00ffff, 0x00ff00ffffff0000,
|
||||
0xffff00ffffff00ff, 0xffff00ffffffff00, 0x00ff00ffffffffff,
|
||||
0x00ffff0000000000, 0xffffff00000000ff, 0xffffff000000ff00,
|
||||
0x00ffff000000ffff, 0xffffff0000ff0000, 0x00ffff0000ff00ff,
|
||||
0x00ffff0000ffff00, 0xffffff0000ffffff, 0xffffff00ff000000,
|
||||
0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0xffffff00ff00ffff,
|
||||
0x00ffff00ffff0000, 0xffffff00ffff00ff, 0xffffff00ffffff00,
|
||||
0x00ffff00ffffffff, 0xffffffff00000000, 0x00ffffff000000ff,
|
||||
0x00ffffff0000ff00, 0xffffffff0000ffff, 0x00ffffff00ff0000,
|
||||
0xffffffff00ff00ff, 0xffffffff00ffff00, 0x00ffffff00ffffff,
|
||||
0x00ffffffff000000, 0xffffffffff0000ff, 0xffffffffff00ff00,
|
||||
0x00ffffffff00ffff, 0xffffffffffff0000, 0x00ffffffffff00ff,
|
||||
0x00ffffffffffff00, 0xffffffffffffffff,
|
||||
});
|
||||
//#endif
|
||||
|
||||
static dpct::global_memory<const uint8_t, 1>
|
||||
kmask_iq2xs(sycl::range<1>(8), {1, 2, 4, 8, 16, 32, 64, 128});
|
||||
|
||||
template<typename dst_t>
|
||||
static void dequantize_block_iq2_xxs(const void * __restrict__ vx, dst_t * __restrict__ yy,
|
||||
const sycl::nd_item<3> &item_ct1,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue