From 180bfcd8d5c82c0b2fe06bc7538683c28a3790e9 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Fri, 26 Apr 2024 02:33:26 -0700 Subject: [PATCH] Minimize the GGML API surface area for BF16 --- ggml-impl.h | 87 ++++++++++++++++++++++++++++++++++++++++++++++++-- ggml.c | 20 +++++++++--- ggml.h | 92 +++++------------------------------------------------ 3 files changed, 107 insertions(+), 92 deletions(-) diff --git a/ggml-impl.h b/ggml-impl.h index 3e7484d29..83c32c743 100644 --- a/ggml-impl.h +++ b/ggml-impl.h @@ -17,6 +17,90 @@ #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) +/** + * Google Brain 16-bit floating point number. + * + * ┌sign + * │ + * │ ┌exponent + * │ │ + * │ │ ┌mantissa + * │ │ │ + * │┌──┴───┐┌─┴───┐ + * 0b0000000000000000 brain16 + * + * Since bf16 has the same number of exponent bits as a 32bit float, + * encoding and decoding numbers becomes relatively straightforward. + * + * ┌sign + * │ + * │ ┌exponent + * │ │ + * │ │ ┌mantissa + * │ │ │ + * │┌──┴───┐┌─┴───────────────────┐ + * 0b00000000000000000000000000000000 IEEE binary32 + * + * For comparison, the standard fp16 format has fewer exponent bits. + * + * ┌sign + * │ + * │ ┌exponent + * │ │ + * │ │ ┌mantissa + * │ │ │ + * │┌─┴─┐┌─┴──────┐ + * 0b0000000000000000 IEEE binary16 + * + * So be warned that converting between them, destroys several bits. + * + * @see IEEE 754-2008 + */ +struct ggml_bf16_s { + uint16_t bits; +}; + +/** + * Converts brain16 to float32. + */ +static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) { + union { + float f; + uint32_t i; + } u; + u.i = (uint32_t)h.bits << 16; + return u.f; +} + +/** + * Converts float32 to brain16. + * + * This function is binary identical to AMD Zen4 VCVTNEPS2BF16. + * Subnormals shall be flushed to zero, and NANs will be quiet. + * This code should vectorize nicely if using modern compilers. + */ +static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) { + ggml_bf16_t h; + union { + float f; + uint32_t i; + } u; + u.f = s; + if ((u.i & 0x7fffffff) > 0x7f800000) { /* nan */ + h.bits = (u.i >> 16) | 64; /* force to quiet */ + return h; + } + if (!(u.i & 0x7f800000)) { /* subnormal */ + h.bits = (u.i & 0x80000000) >> 16; /* flush to zero */ + return h; + } + h.bits = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16; + return h; +} + +#define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x) +#define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x) + #ifdef __cplusplus extern "C" { #endif @@ -518,9 +602,6 @@ size_t ggml_hash_insert ( struct ggml_hash_set hash_set, struct ggml // return index, asserts if table is full size_t ggml_hash_find_or_insert( struct ggml_hash_set hash_set, struct ggml_tensor * key); -#define GGML_FP32_TO_BF16(x) ggml_fp32_to_bf16(x) -#define GGML_BF16_TO_FP32(x) ggml_bf16_to_fp32(x) - #ifdef __cplusplus } #endif diff --git a/ggml.c b/ggml.c index 60b0e7b6c..64868213c 100644 --- a/ggml.c +++ b/ggml.c @@ -339,16 +339,26 @@ GGML_CALL const char * ggml_status_to_string(enum ggml_status status) { return "GGML status: unknown"; } -// note: do not use these inside ggml.c -// these are meant to be used via the ggml.h API float ggml_fp16_to_fp32(ggml_fp16_t x) { +#define ggml_fp16_to_fp32 do_not_use__ggml_fp16_to_fp32__in_ggml return GGML_FP16_TO_FP32(x); } ggml_fp16_t ggml_fp32_to_fp16(float x) { +#define ggml_fp32_to_fp16 do_not_use__ggml_fp32_to_fp16__in_ggml return GGML_FP32_TO_FP16(x); } +float ggml_bf16_to_fp32(ggml_bf16_t x) { +#define ggml_bf16_to_fp32 do_not_use__ggml_bf16_to_fp32__in_ggml + return GGML_BF16_TO_FP32(x); // it just left shifts +} + +ggml_bf16_t ggml_fp32_to_bf16(float x) { +#define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml + return GGML_FP32_TO_BF16(x); +} + void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) { for (int64_t i = 0; i < n; i++) { y[i] = GGML_FP16_TO_FP32(x[i]); @@ -374,8 +384,8 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) { } } -void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int n) { - int i = 0; +void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) { + int64_t i = 0; #if defined(__AVX512F__) for (; i + 16 <= n; i += 16) { _mm512_storeu_ps(y + i, @@ -402,7 +412,7 @@ void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int n) { } } -void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int n) { +void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) { int i = 0; #if defined(__AVX512BF16__) for (; i + 32 <= n; i += 32) { diff --git a/ggml.h b/ggml.h index 63e8d6b21..a422e0df0 100644 --- a/ggml.h +++ b/ggml.h @@ -335,6 +335,14 @@ extern "C" { GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n); GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n); + // bfloat16 + struct ggml_bf16_s; + typedef struct ggml_bf16_s ggml_bf16_t; + GGML_API ggml_bf16_t ggml_fp32_to_bf16(float); + GGML_API float ggml_bf16_to_fp32(ggml_bf16_t); // consider just doing << 16 + GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t); + GGML_API void ggml_fp32_to_bf16_row(const float *, ggml_bf16_t *, int64_t); + struct ggml_object; struct ggml_context; @@ -2392,90 +2400,6 @@ extern "C" { GGML_API int ggml_cpu_has_vsx (void); GGML_API int ggml_cpu_has_matmul_int8(void); - /** - * Google Brain 16-bit floating point number. - * - * ┌sign - * │ - * │ ┌exponent - * │ │ - * │ │ ┌mantissa - * │ │ │ - * │┌──┴───┐┌─┴───┐ - * 0b0000000000000000 brain16 - * - * Since bf16 has the same number of exponent bits as a 32bit float, - * encoding and decoding numbers becomes relatively straightforward. - * - * ┌sign - * │ - * │ ┌exponent - * │ │ - * │ │ ┌mantissa - * │ │ │ - * │┌──┴───┐┌─┴───────────────────┐ - * 0b00000000000000000000000000000000 IEEE binary32 - * - * For comparison, the standard fp16 format has fewer exponent bits. - * - * ┌sign - * │ - * │ ┌exponent - * │ │ - * │ │ ┌mantissa - * │ │ │ - * │┌─┴─┐┌─┴──────┐ - * 0b0000000000000000 IEEE binary16 - * - * So be warned that converting between them, destroys several bits. - * - * @see IEEE 754-2008 - */ - typedef struct { - uint16_t x; - } ggml_bf16_t; - - /** - * Converts brain16 to float32. - */ - static inline float ggml_bf16_to_fp32(ggml_bf16_t h) { - union { - float f; - uint32_t i; - } u; - u.i = (uint32_t)h.x << 16; - return u.f; - } - - /** - * Converts float32 to brain16. - * - * This function is binary identical to AMD Zen4 VCVTNEPS2BF16. - * Subnormals shall be flushed to zero, and NANs will be quiet. - * This code should vectorize nicely if using modern compilers. - */ - static inline ggml_bf16_t ggml_fp32_to_bf16(float s) { - ggml_bf16_t h; - union { - float f; - uint32_t i; - } u; - u.f = s; - if ((u.i & 0x7fffffff) > 0x7f800000) { /* nan */ - h.x = (u.i >> 16) | 64; /* force to quiet */ - return h; - } - if (!(u.i & 0x7f800000)) { /* subnormal */ - h.x = (u.i & 0x80000000) >> 16; /* flush to zero */ - return h; - } - h.x = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16; - return h; - } - - GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int n); - GGML_API void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int n); - // // Internal types and functions exposed for tests and benchmarks //