From 5ac68ccacb7b86037a402a8792ffa1fb102f4394 Mon Sep 17 00:00:00 2001 From: niansa Date: Thu, 29 Jun 2023 11:14:21 +0200 Subject: [PATCH] Cleanups --- ggml-vulkan.h | 2 -- ggml.c | 6 ++---- llama.cpp | 12 ++---------- 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/ggml-vulkan.h b/ggml-vulkan.h index 5ec392782..361d8b5e2 100644 --- a/ggml-vulkan.h +++ b/ggml-vulkan.h @@ -32,8 +32,6 @@ bool ggml_vk_add_buffer( void ggml_vk_h2d_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t); void ggml_vk_d2h_tensor(struct ggml_kompute_context * ctx, struct ggml_tensor * t); -void ggml_vk_dequantize_row_q4_0(const void * x, float * y, int k); -void ggml_vk_dequantize_row_q4_1(const void * x, float * y, int k); void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf); #ifdef __cplusplus diff --git a/ggml.c b/ggml.c index 34f087000..416a20576 100644 --- a/ggml.c +++ b/ggml.c @@ -161,8 +161,6 @@ inline static void* ggml_aligned_malloc(size_t size) { #endif #elif defined(GGML_USE_OPENBLAS) #include -#elif defined(GGML_USE_KOMPUTE) -#include "ggml-vulkan.h" #elif defined(GGML_USE_CUBLAS) #include "ggml-cuda.h" #elif defined(GGML_USE_CLBLAST) @@ -1550,7 +1548,7 @@ static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void * static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = { [GGML_TYPE_Q4_0] = { - .dequantize_row_q = (dequantize_row_q_t) ggml_vk_dequantize_row_q4_0, + .dequantize_row_q = (dequantize_row_q_t) dequantize_row_q4_0, .quantize_row_q = quantize_row_q4_0, .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference, .quantize_row_q_dot = quantize_row_q8_0, @@ -1558,7 +1556,7 @@ static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = { .vec_dot_type = GGML_TYPE_Q8_0, }, [GGML_TYPE_Q4_1] = { - .dequantize_row_q = (dequantize_row_q_t) ggml_vk_dequantize_row_q4_1, + .dequantize_row_q = (dequantize_row_q_t) dequantize_row_q4_1, .quantize_row_q = quantize_row_q4_1, .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference, .quantize_row_q_dot = quantize_row_q8_1, diff --git a/llama.cpp b/llama.cpp index 740726445..40e3a4a7d 100644 --- a/llama.cpp +++ b/llama.cpp @@ -753,7 +753,7 @@ struct llama_model_loader { } } - void load_all_data(llama_context & lctx, llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) { + void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) { size_t data_size = 0; size_t prefetch_size = 0; size_t lock_size = 0; @@ -810,14 +810,6 @@ struct llama_model_loader { free(lt.data); } break; -#elif defined(GGML_USE_KOMPUTE) - case GGML_BACKEND_GPU: - lt.ggml_tensor->data = lt.data; - ggml_vk_h2d_tensor(lctx.ctx_kompute, lt.ggml_tensor); - if (!use_mmap) { - free(lt.data); - } - break; #endif default: continue; @@ -1323,7 +1315,7 @@ static void llama_model_load_internal( } #endif - ml->load_all_data(lctx, progress_callback, progress_callback_user_data, use_mlock ? &lctx.model.mlock_mmap : NULL); + ml->load_all_data(progress_callback, progress_callback_user_data, use_mlock ? &lctx.model.mlock_mmap : NULL); if (progress_callback) { progress_callback(1.0f, progress_callback_user_data);