parent
101f7a6e73
commit
5070815dcf
2 changed files with 23 additions and 0 deletions
|
@ -457,6 +457,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
|||
{
|
||||
printf("\rGenerating (%d / %d tokens)", (1 + params.n_predict - remaining_tokens), params.n_predict);
|
||||
}
|
||||
fflush(stdout);
|
||||
|
||||
bool evalres = false;
|
||||
|
||||
|
|
|
@ -787,8 +787,30 @@ void dequantize_row_q4_1(const void * restrict x, float * restrict y, int k) {
|
|||
#define GGML_V1_F32Cx8 __m256
|
||||
#define GGML_V1_F32Cx8_ZERO _mm256_setzero_ps()
|
||||
#define GGML_V1_F32Cx8_SET1(x) _mm256_set1_ps(x)
|
||||
#if defined(__F16C__)
|
||||
#define GGML_V1_F32Cx8_LOAD(x) _mm256_cvtph_ps(_mm_loadu_si128((__m128i *)(x)))
|
||||
#define GGML_V1_F32Cx8_STORE(x, y) _mm_storeu_si128((__m128i *)(x), _mm256_cvtps_ph(y, 0))
|
||||
#else
|
||||
static inline __m256 __avx_f32cx8_load(ggml_v1_fp16_t *x) {
|
||||
float tmp[8];
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
tmp[i] = GGML_V1_FP16_TO_FP32(x[i]);
|
||||
|
||||
return _mm256_loadu_ps(tmp);
|
||||
}
|
||||
static inline void __avx_f32cx8_store(ggml_v1_fp16_t *x, __m256 y) {
|
||||
float arr[8];
|
||||
|
||||
_mm256_storeu_ps(arr, y);
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
x[i] = GGML_V1_FP32_TO_FP16(arr[i]);
|
||||
}
|
||||
#define GGML_V1_F32Cx8_LOAD(x) __avx_f32cx8_load(x)
|
||||
#define GGML_V1_F32Cx8_STORE(x, y) __avx_f32cx8_store(x, y)
|
||||
#endif
|
||||
|
||||
#define GGML_V1_F32Cx8_FMA GGML_V1_F32x8_FMA
|
||||
#define GGML_V1_F32Cx8_ADD _mm256_add_ps
|
||||
#define GGML_V1_F32Cx8_MUL _mm256_mul_ps
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue