ggml, ci : Windows ARM runner and build fixes (#5979)

* windows arm ci

* fix `error C2078: too many initializers` with ggml_vld1q_u32 macro for MSVC ARM64

* fix `warning C4146: unary minus operator applied to unsigned type, result still unsigned`

* fix `error C2065: '__fp16': undeclared identifier`
This commit is contained in:
Michael Podvitskiy 2024-03-11 10:28:51 +01:00 committed by GitHub
parent 332bdfd798
commit 3202361c5b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 21 additions and 15 deletions

View file

@ -9374,15 +9374,15 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * restrict s, size_t bs, const void *
const uint8x16_t idx_l = vld1q_u8(qs); qs += 16;
idx.vec_index = vorrq_u16(vmovl_u8(vget_low_u8 (idx_l)), vandq_u16(vshlq_u16(vdupq_n_u16(qh[ib32+0]), hshift), m256));
const uint32x4_t aux32x4_0 = {iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]],
iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]};
const uint32x4_t aux32x4_1 = {iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]],
iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]};
const uint32x4_t aux32x4_0 = ggml_vld1q_u32(iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]],
iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]);
const uint32x4_t aux32x4_1 = ggml_vld1q_u32(iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]],
iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]);
idx.vec_index = vorrq_u16(vmovl_u8(vget_high_u8(idx_l)), vandq_u16(vshlq_u16(vdupq_n_u16(qh[ib32+1]), hshift), m256));
const uint32x4_t aux32x4_2 = {iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]],
iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]};
const uint32x4_t aux32x4_3 = {iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]],
iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]};
const uint32x4_t aux32x4_2 = ggml_vld1q_u32(iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]],
iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]);
const uint32x4_t aux32x4_3 = ggml_vld1q_u32(iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]],
iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]);
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[0] | (signs[1] << 16)));