From 82ce1c4da22eea6075a7d80da59880e99fc98d36 Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Thu, 25 Jan 2024 19:43:08 +0100 Subject: [PATCH] Cleanup header and other files --- CMakeLists.txt | 2 +- Makefile | 8 +++----- ggml-vulkan.cpp | 26 ++++---------------------- ggml-vulkan.h | 12 ------------ llama.cpp | 2 -- 5 files changed, 8 insertions(+), 42 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e3d58d8a0..908c109a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -408,7 +408,7 @@ if (LLAMA_VULKAN) message(STATUS "Vulkan found") add_library(ggml-vulkan STATIC ggml-vulkan.cpp ggml-vulkan.h) - target_link_libraries(ggml-vulkan PUBLIC Vulkan::Vulkan) + target_link_libraries(ggml-vulkan PRIVATE Vulkan::Vulkan) add_compile_definitions(GGML_USE_VULKAN) diff --git a/Makefile b/Makefile index a702b566a..2467ec73a 100644 --- a/Makefile +++ b/Makefile @@ -449,14 +449,12 @@ ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h endif # LLAMA_CLBLAST ifdef LLAMA_VULKAN - CFLAGS += -DGGML_USE_VULKAN - CXXFLAGS += -DGGML_USE_VULKAN - LDFLAGS += -lvulkan + MK_CPPFLAGS += -DGGML_USE_VULKAN + MK_LDFLAGS += -lvulkan OBJS += ggml-vulkan.o ifdef LLAMA_VULKAN_CHECK_RESULTS - CFLAGS += -DGGML_VULKAN_CHECK_RESULTS - CXXFLAGS += -DGGML_VULKAN_CHECK_RESULTS + MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS endif ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index ccdb1eb2a..ff57fec28 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -1254,7 +1254,7 @@ static vk_buffer ggml_vk_create_buffer_temp(size_t size) { return buf; } -void* ggml_vk_host_malloc(size_t size) { +static void * ggml_vk_host_malloc(size_t size) { #ifdef VK_DEBUG std::cerr << "ggml_vk_host_malloc(" << size << ")" << std::endl; #endif @@ -1278,7 +1278,7 @@ void* ggml_vk_host_malloc(size_t size) { return buf.ptr; } -void ggml_vk_host_free(void* ptr) { +static void ggml_vk_host_free(void* ptr) { if (ptr == nullptr) { return; } @@ -2500,7 +2500,7 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(vk_context& ctx, const ggml_tensor * } } -bool ggml_vk_can_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1, const ggml_tensor * dst) { +static bool ggml_vk_can_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1, const ggml_tensor * dst) { const uint64_t ne10 = src1->ne[0]; const uint64_t ne0 = dst->ne[0]; @@ -3532,24 +3532,6 @@ static ggml_tensor_extra_gpu * ggml_vk_tensor_create_extra(ggml_tensor * tensor) return extra; } -void ggml_vk_prepare_tensor(ggml_tensor * tensor) { -#ifdef VK_DEBUG - std::cerr << "ggml_vk_prepare_tensor(" << tensor << " (" << tensor->name << ", " << ggml_op_name(tensor->op) << "))" << std::endl; -#endif - tensor->backend = GGML_BACKEND_GPU; - - // recursively prepare buffers until a compute tensor is found - if (tensor->src[0] != nullptr && tensor->src[0]->backend == GGML_BACKEND_CPU) { - const ggml_op src0_op = tensor->src[0]->op; - if (src0_op == GGML_OP_RESHAPE || src0_op == GGML_OP_TRANSPOSE || src0_op == GGML_OP_VIEW || src0_op == GGML_OP_PERMUTE) { - ggml_vk_prepare_tensor(tensor->src[0]); - } - } - if (tensor->op == GGML_OP_CPY && tensor->src[1]->backend == GGML_BACKEND_CPU) { - ggml_vk_prepare_tensor(tensor->src[1]); - } -} - // TODO: Still needed? static void ggml_vk_tensor_stride_order(const ggml_tensor * tensor, std::array& order) { order = {-1, -1, -1, -1}; @@ -4120,7 +4102,7 @@ void ggml_vk_graph_cleanup() { vk_gc.contexts.clear(); } -void ggml_vk_cleanup() { +static void ggml_vk_cleanup() { #ifdef VK_DEBUG std::cerr << "ggml_vk_cleanup()" << std::endl; #endif diff --git a/ggml-vulkan.h b/ggml-vulkan.h index 128cf7bca..eb8a148e2 100644 --- a/ggml-vulkan.h +++ b/ggml-vulkan.h @@ -16,22 +16,10 @@ GGML_API void ggml_vk_preallocate_buffers(void); GGML_API void ggml_vk_build_graph(struct ggml_tensor * node, bool last_node); GGML_API bool ggml_vk_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor); #ifdef GGML_VULKAN_CHECK_RESULTS -void ggml_vk_check_results_0(struct ggml_compute_params * params, struct ggml_tensor * tensor); void ggml_vk_check_results_1(struct ggml_compute_params * params, struct ggml_tensor * tensor); #endif GGML_API void ggml_vk_graph_cleanup(void); -GGML_API void * ggml_vk_host_malloc(size_t size); -GGML_API void ggml_vk_host_free(void * ptr); - -GGML_API void ggml_vk_transform_tensor_temporary(const void * data, struct ggml_tensor * tensor); -GGML_API void ggml_vk_transform_tensor_static(const void * data, struct ggml_tensor * tensor); -GGML_API void ggml_vk_assign_buffer(struct ggml_tensor * tensor); -GGML_API void ggml_vk_prepare_tensor(struct ggml_tensor * tensor); -GGML_API void ggml_vk_cleanup(void); - -GGML_API bool ggml_vk_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * dst); - // backend API GGML_API GGML_CALL ggml_backend_t ggml_backend_vk_init(void); diff --git a/llama.cpp b/llama.cpp index 0fcc30772..0568b8eaa 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1,8 +1,6 @@ #define LLAMA_API_INTERNAL #include "llama.h" -#include - #include "unicode.h" #include "ggml.h"