k-quants : fix build on armv7 (android only) (#2920)
* k-quants : fix build on armv7 * ggml : cleanup unused arm32 specific impl * k-quants : avoid some unused vzero / mzero define * ggml-alloc : use 4g for MEASURE_MAX_SIZE in 32-bit arm
This commit is contained in:
parent
571083f508
commit
21f3d1be86
3 changed files with 42 additions and 51 deletions
46
ggml.c
46
ggml.c
|
@ -817,46 +817,6 @@ static inline float hsum_float_4x4(const __m128 a, const __m128 b, const __m128
|
|||
|
||||
#if !defined(__aarch64__)
|
||||
|
||||
inline static uint16_t vaddvq_u8(uint8x16_t v) {
|
||||
return
|
||||
(uint16_t)vgetq_lane_u8(v, 0) + (uint16_t)vgetq_lane_u8(v, 1) +
|
||||
(uint16_t)vgetq_lane_u8(v, 2) + (uint16_t)vgetq_lane_u8(v, 3) +
|
||||
(uint16_t)vgetq_lane_u8(v, 4) + (uint16_t)vgetq_lane_u8(v, 5) +
|
||||
(uint16_t)vgetq_lane_u8(v, 6) + (uint16_t)vgetq_lane_u8(v, 7) +
|
||||
(uint16_t)vgetq_lane_u8(v, 8) + (uint16_t)vgetq_lane_u8(v, 9) +
|
||||
(uint16_t)vgetq_lane_u8(v, 10) + (uint16_t)vgetq_lane_u8(v, 11) +
|
||||
(uint16_t)vgetq_lane_u8(v, 12) + (uint16_t)vgetq_lane_u8(v, 13) +
|
||||
(uint16_t)vgetq_lane_u8(v, 14) + (uint16_t)vgetq_lane_u8(v, 15);
|
||||
}
|
||||
|
||||
inline static int16_t vaddvq_s8(int8x16_t v) {
|
||||
return
|
||||
(int16_t)vgetq_lane_s8(v, 0) + (int16_t)vgetq_lane_s8(v, 1) +
|
||||
(int16_t)vgetq_lane_s8(v, 2) + (int16_t)vgetq_lane_s8(v, 3) +
|
||||
(int16_t)vgetq_lane_s8(v, 4) + (int16_t)vgetq_lane_s8(v, 5) +
|
||||
(int16_t)vgetq_lane_s8(v, 6) + (int16_t)vgetq_lane_s8(v, 7) +
|
||||
(int16_t)vgetq_lane_s8(v, 8) + (int16_t)vgetq_lane_s8(v, 9) +
|
||||
(int16_t)vgetq_lane_s8(v, 10) + (int16_t)vgetq_lane_s8(v, 11) +
|
||||
(int16_t)vgetq_lane_s8(v, 12) + (int16_t)vgetq_lane_s8(v, 13) +
|
||||
(int16_t)vgetq_lane_s8(v, 14) + (int16_t)vgetq_lane_s8(v, 15);
|
||||
}
|
||||
|
||||
inline static int32_t vaddvq_s16(int16x8_t v) {
|
||||
return
|
||||
(int32_t)vgetq_lane_s16(v, 0) + (int32_t)vgetq_lane_s16(v, 1) +
|
||||
(int32_t)vgetq_lane_s16(v, 2) + (int32_t)vgetq_lane_s16(v, 3) +
|
||||
(int32_t)vgetq_lane_s16(v, 4) + (int32_t)vgetq_lane_s16(v, 5) +
|
||||
(int32_t)vgetq_lane_s16(v, 6) + (int32_t)vgetq_lane_s16(v, 7);
|
||||
}
|
||||
|
||||
inline static uint32_t vaddvq_u16(uint16x8_t v) {
|
||||
return
|
||||
(uint32_t)vgetq_lane_u16(v, 0) + (uint32_t)vgetq_lane_u16(v, 1) +
|
||||
(uint32_t)vgetq_lane_u16(v, 2) + (uint32_t)vgetq_lane_u16(v, 3) +
|
||||
(uint32_t)vgetq_lane_u16(v, 4) + (uint32_t)vgetq_lane_u16(v, 5) +
|
||||
(uint32_t)vgetq_lane_u16(v, 6) + (uint32_t)vgetq_lane_u16(v, 7);
|
||||
}
|
||||
|
||||
inline static int32_t vaddvq_s32(int32x4_t v) {
|
||||
return vgetq_lane_s32(v, 0) + vgetq_lane_s32(v, 1) + vgetq_lane_s32(v, 2) + vgetq_lane_s32(v, 3);
|
||||
}
|
||||
|
@ -865,12 +825,6 @@ inline static float vaddvq_f32(float32x4_t v) {
|
|||
return vgetq_lane_f32(v, 0) + vgetq_lane_f32(v, 1) + vgetq_lane_f32(v, 2) + vgetq_lane_f32(v, 3);
|
||||
}
|
||||
|
||||
inline static float vminvq_f32(float32x4_t v) {
|
||||
return
|
||||
MIN(MIN(vgetq_lane_f32(v, 0), vgetq_lane_f32(v, 1)),
|
||||
MIN(vgetq_lane_f32(v, 2), vgetq_lane_f32(v, 3)));
|
||||
}
|
||||
|
||||
inline static float vmaxvq_f32(float32x4_t v) {
|
||||
return
|
||||
MAX(MAX(vgetq_lane_f32(v, 0), vgetq_lane_f32(v, 1)),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue