diff --git a/ggml-kompute.cpp b/ggml-kompute.cpp
index 955e7f077..e0ae6cf70 100644
--- a/ggml-kompute.cpp
+++ b/ggml-kompute.cpp
@@ -85,6 +85,15 @@ public:
 
 static kompute_manager komputeManager;
 
+struct ggml_vk_memory {
+    void *data = nullptr;
+    size_t size = 0;
+    vk::DeviceMemory *primaryMemory = nullptr;
+    vk::Buffer *primaryBuffer = nullptr;
+    vk::DeviceMemory *stagingMemory = nullptr;
+    vk::Buffer *stagingBuffer = nullptr;
+};
+
 #ifdef __linux__
 __attribute__((constructor))
 static void enable_sam() {
@@ -302,13 +311,13 @@ ggml_vk_device ggml_vk_current_device() {
     return devices.front();
 }
 
-ggml_kompute_context *ggml_vk_init() {
+static ggml_kompute_context * ggml_vk_init() {
     GGML_ASSERT(s_kompute_context == nullptr);
     s_kompute_context = new ggml_kompute_context;
     return s_kompute_context;
 }
 
-void ggml_vk_free(struct ggml_kompute_context * ctx) {
+static void ggml_vk_free(struct ggml_kompute_context * ctx) {
     assert(ctx == s_kompute_context);
     s_kompute_context = nullptr;
     if (ctx != nullptr) {
@@ -457,7 +466,7 @@ static ggml_vk_memory ggml_vk_allocate(size_t size) {
     return memory;
 }
 
-void ggml_vk_free_memory(ggml_vk_memory &memory)
+static void ggml_vk_free_memory(ggml_vk_memory &memory)
 {
     komputeManager()->device()->destroy(
       *memory.primaryBuffer,
@@ -1376,7 +1385,7 @@ static bool ggml_vk_supports_op(const struct ggml_tensor * op) {
     return false;
 }
 
-void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf) {
+static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf) {
     const int n_seq = 8;
 
     // FIXME: Figure out if we can somehow optimize the size of the pool... right now we're setting
diff --git a/ggml-kompute.h b/ggml-kompute.h
index 288c835c5..63048213f 100644
--- a/ggml-kompute.h
+++ b/ggml-kompute.h
@@ -6,22 +6,6 @@
 #include <vector>
 #include <string>
 
-struct ggml_kompute_context;
-
-namespace vk {
-    class DeviceMemory;
-    class Buffer;
-};
-
-struct ggml_vk_memory {
-    void *data = nullptr;
-    size_t size = 0;
-    vk::DeviceMemory *primaryMemory = nullptr;
-    vk::Buffer *primaryBuffer = nullptr;
-    vk::DeviceMemory *stagingMemory = nullptr;
-    vk::Buffer *stagingBuffer = nullptr;
-};
-
 struct ggml_vk_device {
     int index = 0;
     int type = 0;           // same as VkPhysicalDeviceType
@@ -40,11 +24,6 @@ bool ggml_vk_has_vulkan();
 bool ggml_vk_has_device();
 bool ggml_vk_using_vulkan();
 ggml_vk_device ggml_vk_current_device();
-struct ggml_kompute_context * ggml_vk_init(void);
-void ggml_vk_free(struct ggml_kompute_context * ctx);
-void ggml_vk_free_memory(ggml_vk_memory &memory);
-
-void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf);
 
 //
 // backend API