cuda : no longer ggml headers last
ggml-ci
This commit is contained in:
parent
1932d614c5
commit
80196bd76c
2 changed files with 7 additions and 9 deletions
|
@ -1,3 +1,7 @@
|
|||
#include "ggml-cuda.h"
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <assert.h>
|
||||
#include <atomic>
|
||||
|
@ -121,11 +125,6 @@
|
|||
|
||||
#endif // defined(GGML_USE_HIPBLAS)
|
||||
|
||||
// ggml-cuda need half type so keep ggml headers include at last
|
||||
#include "ggml-cuda.h"
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
|
||||
#define CUDART_HMAX 11070 // CUDA 11.7, min. ver. for which __hmax and __hmax2 are known to work (may be higher than needed)
|
||||
|
||||
#define CC_PASCAL 600
|
||||
|
|
|
@ -9498,7 +9498,6 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void *
|
|||
float sumf = 0;
|
||||
|
||||
for (int ib = 0; ib < nb; ib += 2) {
|
||||
|
||||
q4bits.val[0] = vld1q_u8(x[ib+0].qs);
|
||||
q4bits.val[1] = vld1q_u8(x[ib+1].qs);
|
||||
q8b.val[0] = vld1q_s8(y[ib+0].qs);
|
||||
|
@ -9514,9 +9513,9 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void *
|
|||
prod_1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[0], q8b.val[0]), q4b.val[1], q8b.val[1]);
|
||||
prod_2 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[2], q8b.val[2]), q4b.val[3], q8b.val[3]);
|
||||
|
||||
//sumf += (float)x[ib+0].d * (float)y[ib+0].d * vaddvq_s32(prod_1) + (float)x[ib+1].d * (float)y[ib+1].d * vaddvq_s32(prod_2);
|
||||
sumf += GGML_FP16_TO_FP32(x[ib+0].d) * GGML_FP16_TO_FP32(y[ib+0].d) * vaddvq_s32(prod_1) + GGML_FP16_TO_FP32(x[ib+1].d) * GGML_FP16_TO_FP32(y[ib+1].d) * vaddvq_s32(prod_2);
|
||||
|
||||
sumf +=
|
||||
GGML_FP16_TO_FP32(x[ib+0].d) * GGML_FP16_TO_FP32(y[ib+0].d) * vaddvq_s32(prod_1) +
|
||||
GGML_FP16_TO_FP32(x[ib+1].d) * GGML_FP16_TO_FP32(y[ib+1].d) * vaddvq_s32(prod_2);
|
||||
}
|
||||
|
||||
*s = sumf;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue