diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index b55fd1b8b..1701b1538 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -346,7 +346,6 @@ extern "C" {
 
     // google brain half-precision bfloat16
     typedef struct { uint16_t bits; } ggml_bf16_t;
-    GGML_API ggml_bf16_t ggml_make_bf16(uint16_t val);
     GGML_API ggml_bf16_t ggml_fp32_to_bf16(float);
     GGML_API float       ggml_bf16_to_fp32(ggml_bf16_t);  // consider just doing << 16
     GGML_API void        ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t);
diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
index 3f7963f66..c2ead2716 100644
--- a/ggml/src/ggml-impl.h
+++ b/ggml/src/ggml-impl.h
@@ -102,6 +102,15 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
     return h;
 }
 
+static inline ggml_bf16_t ggml_make_bf16(uint16_t h) {
+    union {
+        ggml_bf16_t f;
+        uint16_t i;
+    } u;
+    u.i = h;
+    return u.f;
+}
+
 #define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
 #define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
 
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index 506fc4ed5..642ca7333 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -428,12 +428,6 @@ float ggml_bf16_to_fp32(ggml_bf16_t x) {
     return GGML_BF16_TO_FP32(x);  // it just left shifts
 }
 
-ggml_bf16_t ggml_make_bf16(uint16_t x) {
-    ggml_bf16_t bf16_value;
-    bf16_value.bits = x;
-    return bf16_value;
-}
-
 ggml_bf16_t ggml_fp32_to_bf16(float x) {
 #define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml
     return GGML_FP32_TO_BF16(x);