Cleanups

2023-06-29 11:14:21 +02:00 · 2023-06-29 11:14:21 +02:00 · 5ac68ccacb
commit 5ac68ccacb
parent de7d1823ed
3 changed files with 4 additions and 16 deletions
--- a/ggml-vulkan.h
+++ b/ggml-vulkan.h
@ -32,8 +32,6 @@ bool ggml_vk_add_buffer(
 void ggml_vk_h2d_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);
 void ggml_vk_d2h_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t);

-void ggml_vk_dequantize_row_q4_0(const void * x, float * y, int k);
-void ggml_vk_dequantize_row_q4_1(const void * x, float * y, int k);
 void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf);

 #ifdef  __cplusplus
--- a/ggml.c
+++ b/ggml.c
@ -161,8 +161,6 @@ inline static void* ggml_aligned_malloc(size_t size) {
 #endif
 #elif defined(GGML_USE_OPENBLAS)
 #include <cblas.h>
-#elif defined(GGML_USE_KOMPUTE)
-#include "ggml-vulkan.h"
 #elif defined(GGML_USE_CUBLAS)
 #include "ggml-cuda.h"
 #elif defined(GGML_USE_CLBLAST)
@ -1550,7 +1548,7 @@ static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void *

 static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
    [GGML_TYPE_Q4_0] = {
-        .dequantize_row_q         = (dequantize_row_q_t) ggml_vk_dequantize_row_q4_0,
+        .dequantize_row_q         = (dequantize_row_q_t) dequantize_row_q4_0,
        .quantize_row_q           = quantize_row_q4_0,
        .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference,
        .quantize_row_q_dot       = quantize_row_q8_0,
@ -1558,7 +1556,7 @@ static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
        .vec_dot_type             = GGML_TYPE_Q8_0,
    },
    [GGML_TYPE_Q4_1] = {
-        .dequantize_row_q         = (dequantize_row_q_t) ggml_vk_dequantize_row_q4_1,
+        .dequantize_row_q         = (dequantize_row_q_t) dequantize_row_q4_1,
        .quantize_row_q           = quantize_row_q4_1,
        .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference,
        .quantize_row_q_dot       = quantize_row_q8_1,
--- a/llama.cpp
+++ b/llama.cpp
@ -753,7 +753,7 @@ struct llama_model_loader {
        }
    }

-    void load_all_data(llama_context & lctx, llama_progress_callback progress_callback, void *  progress_callback_user_data, llama_mlock * lmlock) {
+    void load_all_data(llama_progress_callback progress_callback, void *  progress_callback_user_data, llama_mlock * lmlock) {
        size_t data_size = 0;
        size_t prefetch_size = 0;
        size_t lock_size = 0;
@ -810,14 +810,6 @@ struct llama_model_loader {
                        free(lt.data);
                    }
                    break;
-#elif defined(GGML_USE_KOMPUTE)
-                case GGML_BACKEND_GPU:
-                    lt.ggml_tensor->data = lt.data;
-                    ggml_vk_h2d_tensor(lctx.ctx_kompute, lt.ggml_tensor);
-                    if (!use_mmap) {
-                        free(lt.data);
-                    }
-                    break;
 #endif
                default:
                    continue;
@ -1323,7 +1315,7 @@ static void llama_model_load_internal(
    }
 #endif

-    ml->load_all_data(lctx, progress_callback, progress_callback_user_data, use_mlock ? &lctx.model.mlock_mmap : NULL);
+    ml->load_all_data(progress_callback, progress_callback_user_data, use_mlock ? &lctx.model.mlock_mmap : NULL);

    if (progress_callback) {
        progress_callback(1.0f, progress_callback_user_data);