From 5ac68ccacb7b86037a402a8792ffa1fb102f4394 Mon Sep 17 00:00:00 2001
From: niansa <anton-sa@web.de>
Date: Thu, 29 Jun 2023 11:14:21 +0200
Subject: [PATCH] Cleanups

---
 ggml-vulkan.h |  2 --
 ggml.c        |  6 ++----
 llama.cpp     | 12 ++----------
 3 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/ggml-vulkan.h b/ggml-vulkan.h
index 5ec392782..361d8b5e2 100644
--- a/ggml-vulkan.h
+++ b/ggml-vulkan.h
@@ -32,8 +32,6 @@ bool ggml_vk_add_buffer(
 void ggml_vk_h2d_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);
 void ggml_vk_d2h_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);
 
-void ggml_vk_dequantize_row_q4_0(const void * x, float * y, int k);
-void ggml_vk_dequantize_row_q4_1(const void * x, float * y, int k);
 void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf);
 
 #ifdef  __cplusplus
diff --git a/ggml.c b/ggml.c
index 34f087000..416a20576 100644
--- a/ggml.c
+++ b/ggml.c
@@ -161,8 +161,6 @@ inline static void* ggml_aligned_malloc(size_t size) {
 #endif
 #elif defined(GGML_USE_OPENBLAS)
 #include <cblas.h>
-#elif defined(GGML_USE_KOMPUTE)
-#include "ggml-vulkan.h"
 #elif defined(GGML_USE_CUBLAS)
 #include "ggml-cuda.h"
 #elif defined(GGML_USE_CLBLAST)
@@ -1550,7 +1548,7 @@ static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void *
 
 static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
     [GGML_TYPE_Q4_0] = {
-        .dequantize_row_q         = (dequantize_row_q_t) ggml_vk_dequantize_row_q4_0,
+        .dequantize_row_q         = (dequantize_row_q_t) dequantize_row_q4_0,
         .quantize_row_q           = quantize_row_q4_0,
         .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference,
         .quantize_row_q_dot       = quantize_row_q8_0,
@@ -1558,7 +1556,7 @@ static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
         .vec_dot_type             = GGML_TYPE_Q8_0,
     },
     [GGML_TYPE_Q4_1] = {
-        .dequantize_row_q         = (dequantize_row_q_t) ggml_vk_dequantize_row_q4_1,
+        .dequantize_row_q         = (dequantize_row_q_t) dequantize_row_q4_1,
         .quantize_row_q           = quantize_row_q4_1,
         .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference,
         .quantize_row_q_dot       = quantize_row_q8_1,
diff --git a/llama.cpp b/llama.cpp
index 740726445..40e3a4a7d 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -753,7 +753,7 @@ struct llama_model_loader {
         }
     }
 
-    void load_all_data(llama_context & lctx, llama_progress_callback progress_callback, void *  progress_callback_user_data, llama_mlock * lmlock) {
+    void load_all_data(llama_progress_callback progress_callback, void *  progress_callback_user_data, llama_mlock * lmlock) {
         size_t data_size = 0;
         size_t prefetch_size = 0;
         size_t lock_size = 0;
@@ -810,14 +810,6 @@ struct llama_model_loader {
                         free(lt.data);
                     }
                     break;
-#elif defined(GGML_USE_KOMPUTE)
-                case GGML_BACKEND_GPU:
-                    lt.ggml_tensor->data = lt.data;
-                    ggml_vk_h2d_tensor(lctx.ctx_kompute, lt.ggml_tensor);
-                    if (!use_mmap) {
-                        free(lt.data);
-                    }
-                    break;
 #endif
                 default:
                     continue;
@@ -1323,7 +1315,7 @@ static void llama_model_load_internal(
     }
 #endif
 
-    ml->load_all_data(lctx, progress_callback, progress_callback_user_data, use_mlock ? &lctx.model.mlock_mmap : NULL);
+    ml->load_all_data(progress_callback, progress_callback_user_data, use_mlock ? &lctx.model.mlock_mmap : NULL);
 
     if (progress_callback) {
         progress_callback(1.0f, progress_callback_user_data);