ggml : optimize loongarch_asx extend i16,i8,u8 to i32,i16
This commit is contained in:
parent
e6d955ebcd
commit
45aa1db6cf
1 changed files with 3 additions and 18 deletions
|
@ -501,30 +501,15 @@ static __m256i lasx_shuffle_b(__m256i a, __m256i b) {
|
|||
}
|
||||
|
||||
static __m256i lasx_extu8_16(__m128i a) {
|
||||
__m128i zero = __lsx_vldi(0);
|
||||
__m128i vlo = __lsx_vilvl_b(zero, a);
|
||||
__m128i vhi = __lsx_vilvh_b(zero, a);
|
||||
return lasx_set_q(vhi, vlo);
|
||||
return __lasx_vext2xv_hu_bu(____m256i(a));
|
||||
}
|
||||
|
||||
static __m256i lasx_ext8_16(__m128i a) {
|
||||
__m128i sign = __lsx_vslti_b(a, 0);
|
||||
__m128i vlo = __lsx_vilvl_b(sign, a);
|
||||
__m128i vhi = __lsx_vilvh_b(sign, a);
|
||||
return lasx_set_q(vhi, vlo);
|
||||
return __lasx_vext2xv_h_b(____m256i(a));
|
||||
}
|
||||
|
||||
static __m256i lasx_ext16_32(__m128i a) {
|
||||
__m256i tmp1;
|
||||
tmp1 = __lasx_xvinsgr2vr_w(tmp1, __lsx_vpickve2gr_h(a, 0), 0);
|
||||
tmp1 = __lasx_xvinsgr2vr_w(tmp1, __lsx_vpickve2gr_h(a, 1), 1);
|
||||
tmp1 = __lasx_xvinsgr2vr_w(tmp1, __lsx_vpickve2gr_h(a, 2), 2);
|
||||
tmp1 = __lasx_xvinsgr2vr_w(tmp1, __lsx_vpickve2gr_h(a, 3), 3);
|
||||
tmp1 = __lasx_xvinsgr2vr_w(tmp1, __lsx_vpickve2gr_h(a, 4), 4);
|
||||
tmp1 = __lasx_xvinsgr2vr_w(tmp1, __lsx_vpickve2gr_h(a, 5), 5);
|
||||
tmp1 = __lasx_xvinsgr2vr_w(tmp1, __lsx_vpickve2gr_h(a, 6), 6);
|
||||
tmp1 = __lasx_xvinsgr2vr_w(tmp1, __lsx_vpickve2gr_h(a, 7), 7);
|
||||
return tmp1;
|
||||
return __lasx_vext2xv_w_h(____m256i(a));
|
||||
}
|
||||
|
||||
static __m128i lasx_extracti128( __m256i a, int pos) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue