diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp
index 924058fd8..af1e6272e 100644
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@@ -1,7 +1,5 @@
 #include "ggml.h"
-#include "ggml_internal.h"
 #include "llama.h"
-#include "llama_internal.h"
 
 #include <algorithm>
 #include <cassert>
diff --git a/ggml.c b/ggml.c
index de986e591..7b017f8e7 100644
--- a/ggml.c
+++ b/ggml.c
@@ -2,7 +2,6 @@
 #define _GNU_SOURCE
 
 #include "ggml.h"
-#include "ggml_internal.h"
 
 #if defined(_MSC_VER) || defined(__MINGW32__)
 #include <malloc.h> // using malloc.h with MSC/MINGW
diff --git a/ggml.h b/ggml.h
index ad962b109..f2567d7da 100644
--- a/ggml.h
+++ b/ggml.h
@@ -773,6 +773,30 @@ int ggml_cpu_has_blas(void);
 int ggml_cpu_has_sse3(void);
 int ggml_cpu_has_vsx(void);
 
+
+//
+// Internal types and functions exposed for tests and benchmarks
+//
+
+#ifdef  __cplusplus
+// restrict not standard in C++
+#define GGML_RESTRICT
+#else
+#define GGML_RESTRICT restrict
+#endif
+typedef void (*dequantize_row_q_t)(const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
+typedef void (*quantize_row_q_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
+typedef void (*vec_dot_q_t)(const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
+
+typedef struct {
+    dequantize_row_q_t dequantize_row_q;
+    quantize_row_q_t   quantize_row_q;
+    quantize_row_q_t   quantize_row_q_reference;
+    vec_dot_q_t        vec_dot_q;
+} quantize_fns_t;
+
+quantize_fns_t ggml_internal_get_quantize_fn(size_t i);
+
 #ifdef  __cplusplus
 }
 #endif
diff --git a/ggml_internal.h b/ggml_internal.h
deleted file mode 100644
index 6bfa441d5..000000000
--- a/ggml_internal.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#pragma once
-
-// Internal functions exposed for tests and benchmarks
-
-#ifdef  __cplusplus
-// restrict not standard in C++
-#define restrict
-extern "C" {
-#endif
-
-typedef void (*dequantize_row_q_t)(const void * restrict x, float * restrict y, int k);
-typedef void (*quantize_row_q_t)(const float * restrict x, void * restrict y, int k);
-typedef void (*vec_dot_q_t)(const int n, float * restrict s, const void * restrict x, const void * restrict y);
-
-typedef struct {
-    dequantize_row_q_t dequantize_row_q;
-    quantize_row_q_t   quantize_row_q;
-    quantize_row_q_t   quantize_row_q_reference;
-    vec_dot_q_t        vec_dot_q;
-} quantize_fns_t;
-
-quantize_fns_t ggml_internal_get_quantize_fn(size_t i);
-
-#ifdef  __cplusplus
-}
-#endif
diff --git a/llama.cpp b/llama.cpp
index b736dd88a..bc1f01011 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1,5 +1,4 @@
 #include "llama.h"
-#include "llama_internal.h"
 
 #include "ggml.h"
 
diff --git a/llama.h b/llama.h
index 04e2bf71c..deb09fe53 100644
--- a/llama.h
+++ b/llama.h
@@ -164,6 +164,13 @@ extern "C" {
 
 #ifdef __cplusplus
 }
+
+#include <string>
+#include <unordered_map>
+//
+// Internal function exposed for tests and benchmarks
+//
+std::unordered_map<std::string, struct ggml_tensor *>& llama_internal_get_tensor_map(struct llama_context * ctx);
 #endif
 
 #endif
diff --git a/llama_internal.h b/llama_internal.h
deleted file mode 100644
index 25c8c2c87..000000000
--- a/llama_internal.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef LLAMA_INTERNAL_H
-#define LLAMA_INTERNAL_H
-
-// Internal functions exposed for tests and benchmarks
-
-#include "ggml.h"
-
-#include <string>
-#include <unordered_map>
-
-std::unordered_map<std::string, struct ggml_tensor *>& llama_internal_get_tensor_map(struct llama_context * ctx);
-
-#endif