Fix documentation
This commit is contained in:
parent
ce0442d7a7
commit
bc278c8a0e
1 changed files with 3 additions and 10 deletions
13
ggml-impl.h
13
ggml-impl.h
|
@ -18,7 +18,9 @@
|
|||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/**
|
||||
* Google Brain 16-bit floating point number.
|
||||
* Converts brain16 to float32.
|
||||
*
|
||||
* The bfloat16 floating point format has the following structure:
|
||||
*
|
||||
* ┌sign
|
||||
* │
|
||||
|
@ -52,17 +54,8 @@
|
|||
* │┌─┴─┐┌─┴──────┐
|
||||
* 0b0000000000000000 IEEE binary16
|
||||
*
|
||||
* So be warned that converting between them, destroys several bits.
|
||||
*
|
||||
* @see IEEE 754-2008
|
||||
*/
|
||||
struct ggml_bf16_s {
|
||||
uint16_t bits;
|
||||
};
|
||||
|
||||
/**
|
||||
* Converts brain16 to float32.
|
||||
*/
|
||||
static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) {
|
||||
union {
|
||||
float f;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue