diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 924058fd8..af1e6272e 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -1,7 +1,5 @@ #include "ggml.h" -#include "ggml_internal.h" #include "llama.h" -#include "llama_internal.h" #include #include diff --git a/ggml.c b/ggml.c index de986e591..7b017f8e7 100644 --- a/ggml.c +++ b/ggml.c @@ -2,7 +2,6 @@ #define _GNU_SOURCE #include "ggml.h" -#include "ggml_internal.h" #if defined(_MSC_VER) || defined(__MINGW32__) #include // using malloc.h with MSC/MINGW diff --git a/ggml.h b/ggml.h index ad962b109..f2567d7da 100644 --- a/ggml.h +++ b/ggml.h @@ -773,6 +773,30 @@ int ggml_cpu_has_blas(void); int ggml_cpu_has_sse3(void); int ggml_cpu_has_vsx(void); + +// +// Internal types and functions exposed for tests and benchmarks +// + +#ifdef __cplusplus +// restrict not standard in C++ +#define GGML_RESTRICT +#else +#define GGML_RESTRICT restrict +#endif +typedef void (*dequantize_row_q_t)(const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k); +typedef void (*quantize_row_q_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k); +typedef void (*vec_dot_q_t)(const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y); + +typedef struct { + dequantize_row_q_t dequantize_row_q; + quantize_row_q_t quantize_row_q; + quantize_row_q_t quantize_row_q_reference; + vec_dot_q_t vec_dot_q; +} quantize_fns_t; + +quantize_fns_t ggml_internal_get_quantize_fn(size_t i); + #ifdef __cplusplus } #endif diff --git a/ggml_internal.h b/ggml_internal.h deleted file mode 100644 index 6bfa441d5..000000000 --- a/ggml_internal.h +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once - -// Internal functions exposed for tests and benchmarks - -#ifdef __cplusplus -// restrict not standard in C++ -#define restrict -extern "C" { -#endif - -typedef void (*dequantize_row_q_t)(const void * restrict x, float * restrict y, int k); -typedef void (*quantize_row_q_t)(const float * restrict x, void * restrict y, int k); -typedef void (*vec_dot_q_t)(const int n, float * restrict s, const void * restrict x, const void * restrict y); - -typedef struct { - dequantize_row_q_t dequantize_row_q; - quantize_row_q_t quantize_row_q; - quantize_row_q_t quantize_row_q_reference; - vec_dot_q_t vec_dot_q; -} quantize_fns_t; - -quantize_fns_t ggml_internal_get_quantize_fn(size_t i); - -#ifdef __cplusplus -} -#endif diff --git a/llama.cpp b/llama.cpp index b736dd88a..bc1f01011 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1,5 +1,4 @@ #include "llama.h" -#include "llama_internal.h" #include "ggml.h" diff --git a/llama.h b/llama.h index 04e2bf71c..deb09fe53 100644 --- a/llama.h +++ b/llama.h @@ -164,6 +164,13 @@ extern "C" { #ifdef __cplusplus } + +#include +#include +// +// Internal function exposed for tests and benchmarks +// +std::unordered_map& llama_internal_get_tensor_map(struct llama_context * ctx); #endif #endif diff --git a/llama_internal.h b/llama_internal.h deleted file mode 100644 index 25c8c2c87..000000000 --- a/llama_internal.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef LLAMA_INTERNAL_H -#define LLAMA_INTERNAL_H - -// Internal functions exposed for tests and benchmarks - -#include "ggml.h" - -#include -#include - -std::unordered_map& llama_internal_get_tensor_map(struct llama_context * ctx); - -#endif