cosmopolitan/third_party/ggml/fp16.c

139 lines
5.8 KiB
C
Raw Normal View History

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi
GGML
Copyright (c) 2023 Georgi Gerganov
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "third_party/ggml/fp16.h"
#include "libc/runtime/runtime.h"
#include "libc/str/str.h"
#include "third_party/ggml/fp16.internal.h"
#include "third_party/libcxx/math.h"
asm(".ident\t\"\\n\\n\
GGML (MIT License)\\n\
Copyright (c) 2023 Georgi Gerganov\"");
asm(".include \"libc/disclaimer.inc\"");
// clang-format off
#if defined(__ARM_NEON) || defined(__wasm_simd128__)
#define B1(c,s,n) 0x ## n ## c , 0x ## n ## s
#define B2(c,s,n) B1(c,s,n ## c), B1(c,s,n ## s)
#define B3(c,s,n) B2(c,s,n ## c), B2(c,s,n ## s)
#define B4(c,s,n) B3(c,s,n ## c), B3(c,s,n ## s)
#define B5(c,s,n) B4(c,s,n ## c), B4(c,s,n ## s)
#define B6(c,s,n) B5(c,s,n ## c), B5(c,s,n ## s)
#define B7(c,s,n) B6(c,s,n ## c), B6(c,s,n ## s)
#define B8(c,s ) B7(c,s, c), B7(c,s, s)
2023-05-13 15:19:44 +00:00
// precomputed tables for expanding 8bits to 8 bytes:
const uint64_t table_b2b_0[1 << 8] = { B8(00, 10) }; // ( b) << 4
const uint64_t table_b2b_1[1 << 8] = { B8(10, 00) }; // (!b) << 4
#endif
//
// global data
//
// precomputed gelu table for f16 (128 KB)
ggml_fp16_t table_gelu_f16[1 << 16];
// precomputed silu table for f16 (128 KB)
ggml_fp16_t table_silu_f16[1 << 16];
// precomputed exp table for f16 (128 KB)
ggml_fp16_t table_exp_f16[1 << 16];
// precomputed f32 table for f16 (256 KB)
float table_f32_f16[1 << 16];
// note: do not use these inside ggml.c
// these are meant to be used via the ggml.h API
float ggml_fp16_to_fp32(ggml_fp16_t x) {
return (float) GGML_FP16_TO_FP32(x);
}
ggml_fp16_t ggml_fp32_to_fp16(float x) {
return GGML_FP32_TO_FP16(x);
}
void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, size_t n) {
size_t i = 0;
#ifdef __F16C__
for (; i + 7 < n; i += 8) {
__m128i x_vec = _mm_loadu_si128((const __m128i *)(x + i));
__m256 y_vec = _mm256_cvtph_ps(x_vec);
_mm256_storeu_ps(y + i, y_vec);
}
#endif
for (; i < n; i++) {
y[i] = GGML_FP16_TO_FP32(x[i]);
}
}
void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, size_t n) {
size_t i = 0;
2023-05-15 23:32:10 +00:00
#ifdef __F16C__
for (; i + 7 < n; i += 8) {
__m256 x_vec = _mm256_loadu_ps(x + i);
__m128i y_vec = _mm256_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
_mm_storeu_si128((__m128i *)(y + i), y_vec);
}
for(; i + 3 < n; i += 4) {
__m128 x_vec = _mm_loadu_ps(x + i);
__m128i y_vec = _mm_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
_mm_storel_epi64((__m128i *)(y + i), y_vec);
}
#endif
for (; i < n; i++) {
y[i] = GGML_FP32_TO_FP16(x[i]);
}
}
static const float GELU_COEF_A = 0.044715f;
static const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f;
inline static float ggml_gelu_f32(float x) {
return 0.5f*x*(1.0f + tanhf(SQRT_2_OVER_PI*x*(1.0f + GELU_COEF_A*x*x)));
}
// Sigmoid Linear Unit (SiLU) function
inline static float ggml_silu_f32(float x) {
return x/(1.0f + expf(-x));
}
void ggml_fp16_init(void) {
ggml_fp16_t ii;
ftrace_enabled(-1);
for (int i = 0; i < (1 << 16); ++i) {
uint16_t ui = i;
memcpy(&ii, &ui, sizeof(ii));
const float f = table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(ii);
table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
table_silu_f16[i] = GGML_FP32_TO_FP16(ggml_silu_f32(f));
table_exp_f16[i] = GGML_FP32_TO_FP16(expf(f));
}
ftrace_enabled(+1);
}