Minimize the GGML API surface area for BF16
This commit is contained in:
parent
823d45ad71
commit
180bfcd8d5
3 changed files with 107 additions and 92 deletions
87
ggml-impl.h
87
ggml-impl.h
|
@ -17,6 +17,90 @@
|
||||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Google Brain 16-bit floating point number.
|
||||||
|
*
|
||||||
|
* ┌sign
|
||||||
|
* │
|
||||||
|
* │ ┌exponent
|
||||||
|
* │ │
|
||||||
|
* │ │ ┌mantissa
|
||||||
|
* │ │ │
|
||||||
|
* │┌──┴───┐┌─┴───┐
|
||||||
|
* 0b0000000000000000 brain16
|
||||||
|
*
|
||||||
|
* Since bf16 has the same number of exponent bits as a 32bit float,
|
||||||
|
* encoding and decoding numbers becomes relatively straightforward.
|
||||||
|
*
|
||||||
|
* ┌sign
|
||||||
|
* │
|
||||||
|
* │ ┌exponent
|
||||||
|
* │ │
|
||||||
|
* │ │ ┌mantissa
|
||||||
|
* │ │ │
|
||||||
|
* │┌──┴───┐┌─┴───────────────────┐
|
||||||
|
* 0b00000000000000000000000000000000 IEEE binary32
|
||||||
|
*
|
||||||
|
* For comparison, the standard fp16 format has fewer exponent bits.
|
||||||
|
*
|
||||||
|
* ┌sign
|
||||||
|
* │
|
||||||
|
* │ ┌exponent
|
||||||
|
* │ │
|
||||||
|
* │ │ ┌mantissa
|
||||||
|
* │ │ │
|
||||||
|
* │┌─┴─┐┌─┴──────┐
|
||||||
|
* 0b0000000000000000 IEEE binary16
|
||||||
|
*
|
||||||
|
* So be warned that converting between them, destroys several bits.
|
||||||
|
*
|
||||||
|
* @see IEEE 754-2008
|
||||||
|
*/
|
||||||
|
struct ggml_bf16_s {
|
||||||
|
uint16_t bits;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts brain16 to float32.
|
||||||
|
*/
|
||||||
|
static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) {
|
||||||
|
union {
|
||||||
|
float f;
|
||||||
|
uint32_t i;
|
||||||
|
} u;
|
||||||
|
u.i = (uint32_t)h.bits << 16;
|
||||||
|
return u.f;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts float32 to brain16.
|
||||||
|
*
|
||||||
|
* This function is binary identical to AMD Zen4 VCVTNEPS2BF16.
|
||||||
|
* Subnormals shall be flushed to zero, and NANs will be quiet.
|
||||||
|
* This code should vectorize nicely if using modern compilers.
|
||||||
|
*/
|
||||||
|
static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
|
||||||
|
ggml_bf16_t h;
|
||||||
|
union {
|
||||||
|
float f;
|
||||||
|
uint32_t i;
|
||||||
|
} u;
|
||||||
|
u.f = s;
|
||||||
|
if ((u.i & 0x7fffffff) > 0x7f800000) { /* nan */
|
||||||
|
h.bits = (u.i >> 16) | 64; /* force to quiet */
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
if (!(u.i & 0x7f800000)) { /* subnormal */
|
||||||
|
h.bits = (u.i & 0x80000000) >> 16; /* flush to zero */
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
h.bits = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16;
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
|
||||||
|
#define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
@ -518,9 +602,6 @@ size_t ggml_hash_insert ( struct ggml_hash_set hash_set, struct ggml
|
||||||
// return index, asserts if table is full
|
// return index, asserts if table is full
|
||||||
size_t ggml_hash_find_or_insert( struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
size_t ggml_hash_find_or_insert( struct ggml_hash_set hash_set, struct ggml_tensor * key);
|
||||||
|
|
||||||
#define GGML_FP32_TO_BF16(x) ggml_fp32_to_bf16(x)
|
|
||||||
#define GGML_BF16_TO_FP32(x) ggml_bf16_to_fp32(x)
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
20
ggml.c
20
ggml.c
|
@ -339,16 +339,26 @@ GGML_CALL const char * ggml_status_to_string(enum ggml_status status) {
|
||||||
return "GGML status: unknown";
|
return "GGML status: unknown";
|
||||||
}
|
}
|
||||||
|
|
||||||
// note: do not use these inside ggml.c
|
|
||||||
// these are meant to be used via the ggml.h API
|
|
||||||
float ggml_fp16_to_fp32(ggml_fp16_t x) {
|
float ggml_fp16_to_fp32(ggml_fp16_t x) {
|
||||||
|
#define ggml_fp16_to_fp32 do_not_use__ggml_fp16_to_fp32__in_ggml
|
||||||
return GGML_FP16_TO_FP32(x);
|
return GGML_FP16_TO_FP32(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_fp16_t ggml_fp32_to_fp16(float x) {
|
ggml_fp16_t ggml_fp32_to_fp16(float x) {
|
||||||
|
#define ggml_fp32_to_fp16 do_not_use__ggml_fp32_to_fp16__in_ggml
|
||||||
return GGML_FP32_TO_FP16(x);
|
return GGML_FP32_TO_FP16(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float ggml_bf16_to_fp32(ggml_bf16_t x) {
|
||||||
|
#define ggml_bf16_to_fp32 do_not_use__ggml_bf16_to_fp32__in_ggml
|
||||||
|
return GGML_BF16_TO_FP32(x); // it just left shifts
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_bf16_t ggml_fp32_to_bf16(float x) {
|
||||||
|
#define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml
|
||||||
|
return GGML_FP32_TO_BF16(x);
|
||||||
|
}
|
||||||
|
|
||||||
void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
|
void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
|
||||||
for (int64_t i = 0; i < n; i++) {
|
for (int64_t i = 0; i < n; i++) {
|
||||||
y[i] = GGML_FP16_TO_FP32(x[i]);
|
y[i] = GGML_FP16_TO_FP32(x[i]);
|
||||||
|
@ -374,8 +384,8 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int n) {
|
void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
|
||||||
int i = 0;
|
int64_t i = 0;
|
||||||
#if defined(__AVX512F__)
|
#if defined(__AVX512F__)
|
||||||
for (; i + 16 <= n; i += 16) {
|
for (; i + 16 <= n; i += 16) {
|
||||||
_mm512_storeu_ps(y + i,
|
_mm512_storeu_ps(y + i,
|
||||||
|
@ -402,7 +412,7 @@ void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int n) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int n) {
|
void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
#if defined(__AVX512BF16__)
|
#if defined(__AVX512BF16__)
|
||||||
for (; i + 32 <= n; i += 32) {
|
for (; i + 32 <= n; i += 32) {
|
||||||
|
|
92
ggml.h
92
ggml.h
|
@ -335,6 +335,14 @@ extern "C" {
|
||||||
GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n);
|
GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n);
|
||||||
GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n);
|
GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n);
|
||||||
|
|
||||||
|
// bfloat16
|
||||||
|
struct ggml_bf16_s;
|
||||||
|
typedef struct ggml_bf16_s ggml_bf16_t;
|
||||||
|
GGML_API ggml_bf16_t ggml_fp32_to_bf16(float);
|
||||||
|
GGML_API float ggml_bf16_to_fp32(ggml_bf16_t); // consider just doing << 16
|
||||||
|
GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t);
|
||||||
|
GGML_API void ggml_fp32_to_bf16_row(const float *, ggml_bf16_t *, int64_t);
|
||||||
|
|
||||||
struct ggml_object;
|
struct ggml_object;
|
||||||
struct ggml_context;
|
struct ggml_context;
|
||||||
|
|
||||||
|
@ -2392,90 +2400,6 @@ extern "C" {
|
||||||
GGML_API int ggml_cpu_has_vsx (void);
|
GGML_API int ggml_cpu_has_vsx (void);
|
||||||
GGML_API int ggml_cpu_has_matmul_int8(void);
|
GGML_API int ggml_cpu_has_matmul_int8(void);
|
||||||
|
|
||||||
/**
|
|
||||||
* Google Brain 16-bit floating point number.
|
|
||||||
*
|
|
||||||
* ┌sign
|
|
||||||
* │
|
|
||||||
* │ ┌exponent
|
|
||||||
* │ │
|
|
||||||
* │ │ ┌mantissa
|
|
||||||
* │ │ │
|
|
||||||
* │┌──┴───┐┌─┴───┐
|
|
||||||
* 0b0000000000000000 brain16
|
|
||||||
*
|
|
||||||
* Since bf16 has the same number of exponent bits as a 32bit float,
|
|
||||||
* encoding and decoding numbers becomes relatively straightforward.
|
|
||||||
*
|
|
||||||
* ┌sign
|
|
||||||
* │
|
|
||||||
* │ ┌exponent
|
|
||||||
* │ │
|
|
||||||
* │ │ ┌mantissa
|
|
||||||
* │ │ │
|
|
||||||
* │┌──┴───┐┌─┴───────────────────┐
|
|
||||||
* 0b00000000000000000000000000000000 IEEE binary32
|
|
||||||
*
|
|
||||||
* For comparison, the standard fp16 format has fewer exponent bits.
|
|
||||||
*
|
|
||||||
* ┌sign
|
|
||||||
* │
|
|
||||||
* │ ┌exponent
|
|
||||||
* │ │
|
|
||||||
* │ │ ┌mantissa
|
|
||||||
* │ │ │
|
|
||||||
* │┌─┴─┐┌─┴──────┐
|
|
||||||
* 0b0000000000000000 IEEE binary16
|
|
||||||
*
|
|
||||||
* So be warned that converting between them, destroys several bits.
|
|
||||||
*
|
|
||||||
* @see IEEE 754-2008
|
|
||||||
*/
|
|
||||||
typedef struct {
|
|
||||||
uint16_t x;
|
|
||||||
} ggml_bf16_t;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Converts brain16 to float32.
|
|
||||||
*/
|
|
||||||
static inline float ggml_bf16_to_fp32(ggml_bf16_t h) {
|
|
||||||
union {
|
|
||||||
float f;
|
|
||||||
uint32_t i;
|
|
||||||
} u;
|
|
||||||
u.i = (uint32_t)h.x << 16;
|
|
||||||
return u.f;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Converts float32 to brain16.
|
|
||||||
*
|
|
||||||
* This function is binary identical to AMD Zen4 VCVTNEPS2BF16.
|
|
||||||
* Subnormals shall be flushed to zero, and NANs will be quiet.
|
|
||||||
* This code should vectorize nicely if using modern compilers.
|
|
||||||
*/
|
|
||||||
static inline ggml_bf16_t ggml_fp32_to_bf16(float s) {
|
|
||||||
ggml_bf16_t h;
|
|
||||||
union {
|
|
||||||
float f;
|
|
||||||
uint32_t i;
|
|
||||||
} u;
|
|
||||||
u.f = s;
|
|
||||||
if ((u.i & 0x7fffffff) > 0x7f800000) { /* nan */
|
|
||||||
h.x = (u.i >> 16) | 64; /* force to quiet */
|
|
||||||
return h;
|
|
||||||
}
|
|
||||||
if (!(u.i & 0x7f800000)) { /* subnormal */
|
|
||||||
h.x = (u.i & 0x80000000) >> 16; /* flush to zero */
|
|
||||||
return h;
|
|
||||||
}
|
|
||||||
h.x = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16;
|
|
||||||
return h;
|
|
||||||
}
|
|
||||||
|
|
||||||
GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int n);
|
|
||||||
GGML_API void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int n);
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Internal types and functions exposed for tests and benchmarks
|
// Internal types and functions exposed for tests and benchmarks
|
||||||
//
|
//
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue