ggml : fix q5_1
This commit is contained in:
parent
67b079fc98
commit
f6f2ff9557
1 changed files with 64 additions and 70 deletions
|
@ -4965,6 +4965,9 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||||
const int qk = QK8_1;
|
const int qk = QK8_1;
|
||||||
const int nb = n / qk;
|
const int nb = n / qk;
|
||||||
|
|
||||||
|
int ib = 0;
|
||||||
|
float sumf = 0;
|
||||||
|
|
||||||
assert(n % qk == 0);
|
assert(n % qk == 0);
|
||||||
assert(qk == QK5_1);
|
assert(qk == QK5_1);
|
||||||
assert(nrc == 1);
|
assert(nrc == 1);
|
||||||
|
@ -4989,13 +4992,11 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||||
uint64_t tmp0[4];
|
uint64_t tmp0[4];
|
||||||
uint64_t tmp1[4];
|
uint64_t tmp1[4];
|
||||||
|
|
||||||
assert(nb % 2 == 0); // TODO: handle odd nb
|
for (; ib + 1 < nb; ib += 2) {
|
||||||
|
const block_q5_1 * restrict x0 = &x[ib];
|
||||||
for (int i = 0; i < nb; i += 2) {
|
const block_q5_1 * restrict x1 = &x[ib + 1];
|
||||||
const block_q5_1 * restrict x0 = &x[i];
|
const block_q8_1 * restrict y0 = &y[ib];
|
||||||
const block_q5_1 * restrict x1 = &x[i + 1];
|
const block_q8_1 * restrict y1 = &y[ib + 1];
|
||||||
const block_q8_1 * restrict y0 = &y[i];
|
|
||||||
const block_q8_1 * restrict y1 = &y[i + 1];
|
|
||||||
|
|
||||||
const uint8x16_t m4b = vdupq_n_u8(0x0F);
|
const uint8x16_t m4b = vdupq_n_u8(0x0F);
|
||||||
|
|
||||||
|
@ -5050,7 +5051,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||||
ggml_vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d));
|
ggml_vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d));
|
||||||
}
|
}
|
||||||
|
|
||||||
*s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1) + summs0 + summs1;
|
sumf = vaddvq_f32(sumv0) + vaddvq_f32(sumv1) + summs0 + summs1;
|
||||||
#elif defined(__wasm_simd128__)
|
#elif defined(__wasm_simd128__)
|
||||||
v128_t sumv = wasm_f32x4_splat(0.0f);
|
v128_t sumv = wasm_f32x4_splat(0.0f);
|
||||||
|
|
||||||
|
@ -5060,9 +5061,9 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||||
uint64_t tmp[4];
|
uint64_t tmp[4];
|
||||||
|
|
||||||
// TODO: check if unrolling this is better
|
// TODO: check if unrolling this is better
|
||||||
for (int i = 0; i < nb; ++i) {
|
for (; ib < nb; ++ib) {
|
||||||
const block_q5_1 * restrict x0 = &x[i];
|
const block_q5_1 * restrict x0 = &x[ib];
|
||||||
const block_q8_1 * restrict y0 = &y[i];
|
const block_q8_1 * restrict y0 = &y[ib];
|
||||||
|
|
||||||
summs += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s);
|
summs += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s);
|
||||||
|
|
||||||
|
@ -5114,7 +5115,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||||
wasm_f32x4_splat(GGML_FP16_TO_FP32(x0->d) * GGML_FP16_TO_FP32(y0->d))));
|
wasm_f32x4_splat(GGML_FP16_TO_FP32(x0->d) * GGML_FP16_TO_FP32(y0->d))));
|
||||||
}
|
}
|
||||||
|
|
||||||
*s = wasm_f32x4_extract_lane(sumv, 0) + wasm_f32x4_extract_lane(sumv, 1) +
|
sumf = wasm_f32x4_extract_lane(sumv, 0) + wasm_f32x4_extract_lane(sumv, 1) +
|
||||||
wasm_f32x4_extract_lane(sumv, 2) + wasm_f32x4_extract_lane(sumv, 3) + summs;
|
wasm_f32x4_extract_lane(sumv, 2) + wasm_f32x4_extract_lane(sumv, 3) + summs;
|
||||||
#elif defined(__AVX2__)
|
#elif defined(__AVX2__)
|
||||||
// Initialize accumulator with zeros
|
// Initialize accumulator with zeros
|
||||||
|
@ -5123,25 +5124,25 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||||
float summs = 0.0f;
|
float summs = 0.0f;
|
||||||
|
|
||||||
// Main loop
|
// Main loop
|
||||||
for (int i = 0; i < nb; i++) {
|
for (; ib < nb; ++ib) {
|
||||||
const __m256 dx = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d));
|
const __m256 dx = _mm256_set1_ps(GGML_FP16_TO_FP32(x[ib].d));
|
||||||
|
|
||||||
summs += GGML_FP16_TO_FP32(x[i].m) * GGML_FP16_TO_FP32(y[i].s);
|
summs += GGML_FP16_TO_FP32(x[ib].m) * GGML_FP16_TO_FP32(y[ib].s);
|
||||||
|
|
||||||
__m256i qx = bytes_from_nibbles_32(x[i].qs);
|
__m256i qx = bytes_from_nibbles_32(x[ib].qs);
|
||||||
__m256i bxhi = bytes_from_bits_32(x[i].qh);
|
__m256i bxhi = bytes_from_bits_32(x[ib].qh);
|
||||||
bxhi = _mm256_and_si256(bxhi, _mm256_set1_epi8(0x10));
|
bxhi = _mm256_and_si256(bxhi, _mm256_set1_epi8(0x10));
|
||||||
qx = _mm256_or_si256(qx, bxhi);
|
qx = _mm256_or_si256(qx, bxhi);
|
||||||
|
|
||||||
const __m256 dy = _mm256_set1_ps(GGML_FP16_TO_FP32(y[i].d));
|
const __m256 dy = _mm256_set1_ps(GGML_FP16_TO_FP32(y[ib].d));
|
||||||
const __m256i qy = _mm256_loadu_si256((const __m256i *)y[i].qs);
|
const __m256i qy = _mm256_loadu_si256((const __m256i *)y[ib].qs);
|
||||||
|
|
||||||
const __m256 q = mul_sum_us8_pairs_float(qx, qy);
|
const __m256 q = mul_sum_us8_pairs_float(qx, qy);
|
||||||
|
|
||||||
acc = _mm256_fmadd_ps(q, _mm256_mul_ps(dx, dy), acc);
|
acc = _mm256_fmadd_ps(q, _mm256_mul_ps(dx, dy), acc);
|
||||||
}
|
}
|
||||||
|
|
||||||
*s = hsum_float_8(acc) + summs;
|
sumf = hsum_float_8(acc) + summs;
|
||||||
#elif defined(__AVX__)
|
#elif defined(__AVX__)
|
||||||
// Initialize accumulator with zeros
|
// Initialize accumulator with zeros
|
||||||
__m256 acc = _mm256_setzero_ps();
|
__m256 acc = _mm256_setzero_ps();
|
||||||
|
@ -5150,13 +5151,13 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||||
float summs = 0.0f;
|
float summs = 0.0f;
|
||||||
|
|
||||||
// Main loop
|
// Main loop
|
||||||
for (int i = 0; i < nb; i++) {
|
for (; ib < nb; ++ib) {
|
||||||
const __m256 dx = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d));
|
const __m256 dx = _mm256_set1_ps(GGML_FP16_TO_FP32(x[ib].d));
|
||||||
|
|
||||||
summs += GGML_FP16_TO_FP32(x[i].m) * GGML_FP16_TO_FP32(y[i].s);
|
summs += GGML_FP16_TO_FP32(x[ib].m) * GGML_FP16_TO_FP32(y[ib].s);
|
||||||
|
|
||||||
__m256i bx_0 = bytes_from_nibbles_32(x[i].qs);
|
__m256i bx_0 = bytes_from_nibbles_32(x[ib].qs);
|
||||||
const __m256i bxhi = bytes_from_bits_32(x[i].qh);
|
const __m256i bxhi = bytes_from_bits_32(x[ib].qh);
|
||||||
__m128i bxhil = _mm256_castsi256_si128(bxhi);
|
__m128i bxhil = _mm256_castsi256_si128(bxhi);
|
||||||
__m128i bxhih = _mm256_extractf128_si256(bxhi, 1);
|
__m128i bxhih = _mm256_extractf128_si256(bxhi, 1);
|
||||||
bxhil = _mm_and_si128(bxhil, mask);
|
bxhil = _mm_and_si128(bxhil, mask);
|
||||||
|
@ -5167,15 +5168,15 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||||
bxh = _mm_or_si128(bxh, bxhih);
|
bxh = _mm_or_si128(bxh, bxhih);
|
||||||
bx_0 = MM256_SET_M128I(bxh, bxl);
|
bx_0 = MM256_SET_M128I(bxh, bxl);
|
||||||
|
|
||||||
const __m256 dy = _mm256_set1_ps(GGML_FP16_TO_FP32(y[i].d));
|
const __m256 dy = _mm256_set1_ps(GGML_FP16_TO_FP32(y[ib].d));
|
||||||
const __m256i by_0 = _mm256_loadu_si256((const __m256i *)y[i].qs);
|
const __m256i by_0 = _mm256_loadu_si256((const __m256i *)y[ib].qs);
|
||||||
|
|
||||||
const __m256 q = mul_sum_us8_pairs_float(bx_0, by_0);
|
const __m256 q = mul_sum_us8_pairs_float(bx_0, by_0);
|
||||||
|
|
||||||
acc = _mm256_add_ps(_mm256_mul_ps(q, _mm256_mul_ps(dx, dy)), acc);
|
acc = _mm256_add_ps(_mm256_mul_ps(q, _mm256_mul_ps(dx, dy)), acc);
|
||||||
}
|
}
|
||||||
|
|
||||||
*s = hsum_float_8(acc) + summs;
|
sumf = hsum_float_8(acc) + summs;
|
||||||
#elif defined(__riscv_v_intrinsic)
|
#elif defined(__riscv_v_intrinsic)
|
||||||
float sumf = 0.0;
|
float sumf = 0.0;
|
||||||
|
|
||||||
|
@ -5187,8 +5188,8 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||||
vuint32m2_t vt_1 = __riscv_vid_v_u32m2(vl);
|
vuint32m2_t vt_1 = __riscv_vid_v_u32m2(vl);
|
||||||
vuint32m2_t vt_2 = __riscv_vadd_vx_u32m2(vt_1, 12, vl);
|
vuint32m2_t vt_2 = __riscv_vadd_vx_u32m2(vt_1, 12, vl);
|
||||||
|
|
||||||
for (int i = 0; i < nb; i++) {
|
for (; ib < nb; ++ib) {
|
||||||
memcpy(&qh, x[i].qh, sizeof(uint32_t));
|
memcpy(&qh, x[ib].qh, sizeof(uint32_t));
|
||||||
|
|
||||||
// load qh
|
// load qh
|
||||||
vuint32m2_t vqh = __riscv_vmv_v_x_u32m2(qh, vl);
|
vuint32m2_t vqh = __riscv_vmv_v_x_u32m2(qh, vl);
|
||||||
|
@ -5210,10 +5211,10 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||||
vuint8mf2_t xh_1 = __riscv_vncvt_x_x_w_u8mf2(xhc_1, vl);
|
vuint8mf2_t xh_1 = __riscv_vncvt_x_x_w_u8mf2(xhc_1, vl);
|
||||||
|
|
||||||
// load
|
// load
|
||||||
vuint8mf2_t tx = __riscv_vle8_v_u8mf2(x[i].qs, vl);
|
vuint8mf2_t tx = __riscv_vle8_v_u8mf2(x[ib].qs, vl);
|
||||||
|
|
||||||
vint8mf2_t y0 = __riscv_vle8_v_i8mf2(y[i].qs, vl);
|
vint8mf2_t y0 = __riscv_vle8_v_i8mf2(y[ib].qs, vl);
|
||||||
vint8mf2_t y1 = __riscv_vle8_v_i8mf2(y[i].qs+16, vl);
|
vint8mf2_t y1 = __riscv_vle8_v_i8mf2(y[ib].qs+16, vl);
|
||||||
|
|
||||||
vuint8mf2_t x_at = __riscv_vand_vx_u8mf2(tx, 0x0F, vl);
|
vuint8mf2_t x_at = __riscv_vand_vx_u8mf2(tx, 0x0F, vl);
|
||||||
vuint8mf2_t x_lt = __riscv_vsrl_vx_u8mf2(tx, 0x04, vl);
|
vuint8mf2_t x_lt = __riscv_vsrl_vx_u8mf2(tx, 0x04, vl);
|
||||||
|
@ -5234,11 +5235,9 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||||
|
|
||||||
int sumi = __riscv_vmv_x_s_i32m1_i32(vs2);
|
int sumi = __riscv_vmv_x_s_i32m1_i32(vs2);
|
||||||
|
|
||||||
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s);
|
sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s);
|
||||||
}
|
}
|
||||||
|
|
||||||
*s = sumf;
|
|
||||||
|
|
||||||
#elif defined(__POWER9_VECTOR__)
|
#elif defined(__POWER9_VECTOR__)
|
||||||
const vector signed char lowMask = vec_splats((signed char)0xF);
|
const vector signed char lowMask = vec_splats((signed char)0xF);
|
||||||
const vector signed int v0 = vec_splats((int32_t)0);
|
const vector signed int v0 = vec_splats((int32_t)0);
|
||||||
|
@ -5247,31 +5246,31 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||||
vector float vsumf0 = vec_splats(0.0f);
|
vector float vsumf0 = vec_splats(0.0f);
|
||||||
|
|
||||||
#pragma GCC unroll 4
|
#pragma GCC unroll 4
|
||||||
for (int i = 0; i < nb; ++i) {
|
for (; ib < nb; ++ib) {
|
||||||
__builtin_prefetch(x[i].qs, 0, 1);
|
__builtin_prefetch(x[ib].qs, 0, 1);
|
||||||
__builtin_prefetch(y[i].qs, 0, 1);
|
__builtin_prefetch(y[ib].qs, 0, 1);
|
||||||
|
|
||||||
vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[i].d));
|
vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[ib].d));
|
||||||
vector float vyd = vec_splats(GGML_FP16_TO_FP32(y[i].d));
|
vector float vyd = vec_splats(GGML_FP16_TO_FP32(y[ib].d));
|
||||||
vector float vd = vec_mul(vxd, vyd);
|
vector float vd = vec_mul(vxd, vyd);
|
||||||
|
|
||||||
vector float vxmin = vec_splats(GGML_FP16_TO_FP32(x[i].m));
|
vector float vxmin = vec_splats(GGML_FP16_TO_FP32(x[ib].m));
|
||||||
vector float vys = {GGML_FP16_TO_FP32(y[i].s), 0.f, 0.f, 0.f};
|
vector float vys = {GGML_FP16_TO_FP32(y[ib].s), 0.f, 0.f, 0.f};
|
||||||
vsumf0 = vec_madd(vxmin, vys, vsumf0);
|
vsumf0 = vec_madd(vxmin, vys, vsumf0);
|
||||||
|
|
||||||
vector unsigned long long aux64x2_0 = {(uint64_t)(table_b2b_0[x[i].qh[0]]), (uint64_t)(table_b2b_0[x[i].qh[1]])};
|
vector unsigned long long aux64x2_0 = {(uint64_t)(table_b2b_0[x[ib].qh[0]]), (uint64_t)(table_b2b_0[x[ib].qh[1]])};
|
||||||
vector unsigned long long aux64x2_1 = {(uint64_t)(table_b2b_0[x[i].qh[2]]), (uint64_t)(table_b2b_0[x[i].qh[3]])};
|
vector unsigned long long aux64x2_1 = {(uint64_t)(table_b2b_0[x[ib].qh[2]]), (uint64_t)(table_b2b_0[x[ib].qh[3]])};
|
||||||
|
|
||||||
vector signed char qh0 = (vector signed char)aux64x2_0;
|
vector signed char qh0 = (vector signed char)aux64x2_0;
|
||||||
vector signed char qh1 = (vector signed char)aux64x2_1;
|
vector signed char qh1 = (vector signed char)aux64x2_1;
|
||||||
|
|
||||||
vector signed char qxs = (vector signed char)vec_xl( 0, x[i].qs);
|
vector signed char qxs = (vector signed char)vec_xl( 0, x[ib].qs);
|
||||||
|
|
||||||
vector unsigned char q5x0 = (vector unsigned char)vec_or(vec_and(qxs, lowMask), qh0);
|
vector unsigned char q5x0 = (vector unsigned char)vec_or(vec_and(qxs, lowMask), qh0);
|
||||||
vector unsigned char q5x1 = (vector unsigned char)vec_or(vec_sr(qxs, v4), qh1);
|
vector unsigned char q5x1 = (vector unsigned char)vec_or(vec_sr(qxs, v4), qh1);
|
||||||
|
|
||||||
vector signed char q8y0 = vec_xl( 0, y[i].qs);
|
vector signed char q8y0 = vec_xl( 0, y[ib].qs);
|
||||||
vector signed char q8y1 = vec_xl( 16, y[i].qs);
|
vector signed char q8y1 = vec_xl( 16, y[ib].qs);
|
||||||
|
|
||||||
vector signed int vsumi0 = v0;
|
vector signed int vsumi0 = v0;
|
||||||
|
|
||||||
|
@ -5284,7 +5283,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||||
vsumf0 = vec_add(vsumf0, vec_sld(vsumf0, vsumf0, 4));
|
vsumf0 = vec_add(vsumf0, vec_sld(vsumf0, vsumf0, 4));
|
||||||
vsumf0 = vec_add(vsumf0, vec_sld(vsumf0, vsumf0, 8));
|
vsumf0 = vec_add(vsumf0, vec_sld(vsumf0, vsumf0, 8));
|
||||||
|
|
||||||
*s = vec_extract(vsumf0, 0);
|
sumf = vec_extract(vsumf0, 0);
|
||||||
|
|
||||||
#elif defined(__loongarch_asx)
|
#elif defined(__loongarch_asx)
|
||||||
// Initialize accumulator with zeros
|
// Initialize accumulator with zeros
|
||||||
|
@ -5293,33 +5292,29 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||||
float summs = 0.0f;
|
float summs = 0.0f;
|
||||||
|
|
||||||
// Main loop
|
// Main loop
|
||||||
for (int i = 0; i < nb; i++) {
|
for (; ib < nb; ++ib) {
|
||||||
const __m256 dx = __lasx_xvreplfr2vr_s(GGML_FP16_TO_FP32(x[i].d));
|
const __m256 dx = __lasx_xvreplfr2vr_s(GGML_FP16_TO_FP32(x[ib].d));
|
||||||
|
|
||||||
summs += GGML_FP16_TO_FP32(x[i].m) * GGML_FP16_TO_FP32(y[i].s);
|
summs += GGML_FP16_TO_FP32(x[ib].m) * GGML_FP16_TO_FP32(y[ib].s);
|
||||||
|
|
||||||
__m256i qx = bytes_from_nibbles_32(x[i].qs);
|
__m256i qx = bytes_from_nibbles_32(x[ib].qs);
|
||||||
__m256i bxhi = bytes_from_bits_32(x[i].qh);
|
__m256i bxhi = bytes_from_bits_32(x[ib].qh);
|
||||||
bxhi = __lasx_xvand_v(bxhi, __lasx_xvreplgr2vr_b(0x10));
|
bxhi = __lasx_xvand_v(bxhi, __lasx_xvreplgr2vr_b(0x10));
|
||||||
qx = __lasx_xvor_v(qx, bxhi);
|
qx = __lasx_xvor_v(qx, bxhi);
|
||||||
|
|
||||||
const __m256 dy = __lasx_xvreplfr2vr_s(GGML_FP16_TO_FP32(y[i].d));
|
const __m256 dy = __lasx_xvreplfr2vr_s(GGML_FP16_TO_FP32(y[ib].d));
|
||||||
const __m256i qy = __lasx_xvld((const __m256i *)y[i].qs, 0);
|
const __m256i qy = __lasx_xvld((const __m256i *)y[ib].qs, 0);
|
||||||
|
|
||||||
const __m256 q = mul_sum_us8_pairs_float(qx, qy);
|
const __m256 q = mul_sum_us8_pairs_float(qx, qy);
|
||||||
|
|
||||||
acc = __lasx_xvfmadd_s(q, __lasx_xvfmul_s(dx, dy), acc);
|
acc = __lasx_xvfmadd_s(q, __lasx_xvfmul_s(dx, dy), acc);
|
||||||
}
|
}
|
||||||
|
|
||||||
*s = hsum_float_8(acc) + summs;
|
sumf = hsum_float_8(acc) + summs;
|
||||||
|
#endif
|
||||||
#else
|
for (; ib < nb; ++ib) {
|
||||||
// scalar
|
|
||||||
float sumf = 0.0;
|
|
||||||
|
|
||||||
for (int i = 0; i < nb; i++) {
|
|
||||||
uint32_t qh;
|
uint32_t qh;
|
||||||
memcpy(&qh, x[i].qh, sizeof(qh));
|
memcpy(&qh, x[ib].qh, sizeof(qh));
|
||||||
|
|
||||||
int sumi = 0;
|
int sumi = 0;
|
||||||
|
|
||||||
|
@ -5327,17 +5322,16 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||||
const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10;
|
const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10;
|
||||||
const uint8_t xh_1 = ((qh >> (j + 12)) ) & 0x10;
|
const uint8_t xh_1 = ((qh >> (j + 12)) ) & 0x10;
|
||||||
|
|
||||||
const int32_t x0 = (x[i].qs[j] & 0xF) | xh_0;
|
const int32_t x0 = (x[ib].qs[j] & 0xF) | xh_0;
|
||||||
const int32_t x1 = (x[i].qs[j] >> 4) | xh_1;
|
const int32_t x1 = (x[ib].qs[j] >> 4) | xh_1;
|
||||||
|
|
||||||
sumi += (x0 * y[i].qs[j]) + (x1 * y[i].qs[j + qk/2]);
|
sumi += (x0 * y[ib].qs[j]) + (x1 * y[ib].qs[j + qk/2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s);
|
sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s);
|
||||||
}
|
}
|
||||||
|
|
||||||
*s = sumf;
|
*s = sumf;
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
|
void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue