Minimize the GGML API surface area for BF16

2024-04-26 02:33:26 -07:00 · 2024-04-26 02:33:26 -07:00 · 180bfcd8d5
commit 180bfcd8d5
parent 823d45ad71
3 changed files with 107 additions and 92 deletions
--- a/ggml-impl.h
+++ b/ggml-impl.h
@ -17,6 +17,90 @@
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 /**
 * Google Brain 16-bit floating point number.
 *
 *       ┌sign
 *       │
 *       │   ┌exponent
 *       │   │
 *       │   │      ┌mantissa
 *       │   │      │
 *       │┌──┴───┐┌─┴───┐
 *     0b0000000000000000 brain16
 *
 * Since bf16 has the same number of exponent bits as a 32bit float,
 * encoding and decoding numbers becomes relatively straightforward.
 *
 *       ┌sign
 *       │
 *       │   ┌exponent
 *       │   │
 *       │   │      ┌mantissa
 *       │   │      │
 *       │┌──┴───┐┌─┴───────────────────┐
 *     0b00000000000000000000000000000000 IEEE binary32
 *
 * For comparison, the standard fp16 format has fewer exponent bits.
 *
 *       ┌sign
 *       │
 *       │  ┌exponent
 *       │  │
 *       │  │    ┌mantissa
 *       │  │    │
 *       │┌─┴─┐┌─┴──────┐
 *     0b0000000000000000 IEEE binary16
 *
 * So be warned that converting between them, destroys several bits.
 *
 * @see IEEE 754-2008
 */
 struct ggml_bf16_s {
    uint16_t bits;
 };
 /**
 * Converts brain16 to float32.
 */
 static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) {
    union {
        float f;
        uint32_t i;
    } u;
    u.i = (uint32_t)h.bits << 16;
    return u.f;
 }
 /**
 * Converts float32 to brain16.
 *
 * This function is binary identical to AMD Zen4 VCVTNEPS2BF16.
 * Subnormals shall be flushed to zero, and NANs will be quiet.
 * This code should vectorize nicely if using modern compilers.
 */
 static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
    ggml_bf16_t h;
    union {
        float f;
        uint32_t i;
    } u;
    u.f = s;
    if ((u.i & 0x7fffffff) > 0x7f800000) { /* nan */
        h.bits = (u.i >> 16) | 64; /* force to quiet */
        return h;
    }
    if (!(u.i & 0x7f800000)) { /* subnormal */
        h.bits = (u.i & 0x80000000) >> 16; /* flush to zero */
        return h;
    }
    h.bits = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16;
    return h;
 }
 #define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
 #define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
 #ifdef __cplusplus
 extern "C" {
 #endif
@ -518,9 +602,6 @@ size_t ggml_hash_insert        (      struct ggml_hash_set hash_set, struct ggml
 // return index, asserts if table is full
 size_t ggml_hash_find_or_insert(      struct ggml_hash_set hash_set, struct ggml_tensor * key);
 #define GGML_FP32_TO_BF16(x) ggml_fp32_to_bf16(x)
 #define GGML_BF16_TO_FP32(x) ggml_bf16_to_fp32(x)
 #ifdef __cplusplus
 }
 #endif
--- a/ggml.c
+++ b/ggml.c
@ -339,16 +339,26 @@ GGML_CALL const char * ggml_status_to_string(enum ggml_status status) {
    return "GGML status: unknown";
 }
 // note: do not use these inside ggml.c
 // these are meant to be used via the ggml.h API
 float ggml_fp16_to_fp32(ggml_fp16_t x) {
 #define ggml_fp16_to_fp32 do_not_use__ggml_fp16_to_fp32__in_ggml
    return GGML_FP16_TO_FP32(x);
 }
 ggml_fp16_t ggml_fp32_to_fp16(float x) {
 #define ggml_fp32_to_fp16 do_not_use__ggml_fp32_to_fp16__in_ggml
    return GGML_FP32_TO_FP16(x);
 }
 float ggml_bf16_to_fp32(ggml_bf16_t x) {
 #define ggml_bf16_to_fp32 do_not_use__ggml_bf16_to_fp32__in_ggml
    return GGML_BF16_TO_FP32(x);  // it just left shifts
 }
 ggml_bf16_t ggml_fp32_to_bf16(float x) {
 #define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml
    return GGML_FP32_TO_BF16(x);
 }
 void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
    for (int64_t i = 0; i < n; i++) {
        y[i] = GGML_FP16_TO_FP32(x[i]);
@ -374,8 +384,8 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
    }
 }
-void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int n) {
+void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
-    int i = 0;
+    int64_t i = 0;
 #if defined(__AVX512F__)
    for (; i + 16 <= n; i += 16) {
        _mm512_storeu_ps(y + i,
@ -402,7 +412,7 @@ void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int n) {
    }
 }
-void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int n) {
+void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
  int i = 0;
 #if defined(__AVX512BF16__)
  for (; i + 32 <= n; i += 32) {
--- a/ggml.h
+++ b/ggml.h
@ -335,6 +335,14 @@ extern "C" {
    GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n);
    GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n);
    // bfloat16
    struct ggml_bf16_s;
    typedef struct ggml_bf16_s ggml_bf16_t;
    GGML_API ggml_bf16_t ggml_fp32_to_bf16(float);
    GGML_API float       ggml_bf16_to_fp32(ggml_bf16_t);  // consider just doing << 16
    GGML_API void        ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t);
    GGML_API void        ggml_fp32_to_bf16_row(const float *, ggml_bf16_t *, int64_t);
    struct ggml_object;
    struct ggml_context;
@ -2392,90 +2400,6 @@ extern "C" {
    GGML_API int ggml_cpu_has_vsx        (void);
    GGML_API int ggml_cpu_has_matmul_int8(void);
    /**
     * Google Brain 16-bit floating point number.
     *
     *       ┌sign
     *       │
     *       │   ┌exponent
     *       │   │
     *       │   │      ┌mantissa
     *       │   │      │
     *       │┌──┴───┐┌─┴───┐
     *     0b0000000000000000 brain16
     *
     * Since bf16 has the same number of exponent bits as a 32bit float,
     * encoding and decoding numbers becomes relatively straightforward.
     *
     *       ┌sign
     *       │
     *       │   ┌exponent
     *       │   │
     *       │   │      ┌mantissa
     *       │   │      │
     *       │┌──┴───┐┌─┴───────────────────┐
     *     0b00000000000000000000000000000000 IEEE binary32
     *
     * For comparison, the standard fp16 format has fewer exponent bits.
     *
     *       ┌sign
     *       │
     *       │  ┌exponent
     *       │  │
     *       │  │    ┌mantissa
     *       │  │    │
     *       │┌─┴─┐┌─┴──────┐
     *     0b0000000000000000 IEEE binary16
     *
     * So be warned that converting between them, destroys several bits.
     *
     * @see IEEE 754-2008
     */
    typedef struct {
        uint16_t x;
    } ggml_bf16_t;
    /**
     * Converts brain16 to float32.
     */
    static inline float ggml_bf16_to_fp32(ggml_bf16_t h) {
        union {
            float f;
            uint32_t i;
        } u;
        u.i = (uint32_t)h.x << 16;
        return u.f;
    }
    /**
     * Converts float32 to brain16.
     *
     * This function is binary identical to AMD Zen4 VCVTNEPS2BF16.
     * Subnormals shall be flushed to zero, and NANs will be quiet.
     * This code should vectorize nicely if using modern compilers.
     */
    static inline ggml_bf16_t ggml_fp32_to_bf16(float s) {
        ggml_bf16_t h;
        union {
            float f;
            uint32_t i;
        } u;
        u.f = s;
        if ((u.i & 0x7fffffff) > 0x7f800000) { /* nan */
            h.x = (u.i >> 16) | 64; /* force to quiet */
            return h;
        }
        if (!(u.i & 0x7f800000)) { /* subnormal */
            h.x = (u.i & 0x80000000) >> 16; /* flush to zero */
            return h;
        }
        h.x = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16;
        return h;
    }
    GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int n);
    GGML_API void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int n);
    //
    // Internal types and functions exposed for tests and benchmarks
    //