Rebase to the latest upstream

This commit is contained in:
Charles Xu 2024-09-25 13:58:08 +02:00
parent 7276e2b31c
commit 900e27f026

View file

@ -2432,6 +2432,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
}
#endif // #if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8)
#elif defined(__AVX2__) || defined(__AVX512F__)
{
const block_q4_0x8 * b_ptr_start = (const block_q4_0x8 *)vx;
const block_q8_0x4 * a_ptr_start = (const block_q8_0x4 *)vy;
int64_t b_nb = n / QK4_0;
@ -2446,7 +2447,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
__m256i requiredOrder = _mm256_set_epi32(3, 2, 1, 0, 7, 6, 5, 4);
int64_t xstart = 0;
int anr = nr - nr%16; // Used to align nr with boundary of 16
#ifdef __AVX512F__
#ifdef __AVX512F__
int anc = nc - nc%16; // Used to align nc with boundary of 16
// Mask to mask out nibbles from packed bytes expanded to 512 bit length
const __m512i m4bexpanded = _mm512_set1_epi8(0x0F);
@ -2845,7 +2846,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
xstart = anc/8;
y = 0;
}
#endif // __AVX512F__
#endif // __AVX512F__
// Take group of four block_q8_0x4 structures at each pass of the loop and perform dot product operation
@ -3180,6 +3181,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
}
}
return;
}
#endif // #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__)
float sumf[4][8];
int sumi;