From 8ad0bb30df71f69a1eb8f1dee642d59e04d1c4ea Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Wed, 27 Nov 2024 18:09:05 -0800 Subject: [PATCH] opencl: integrate backend dyn.load interface and fix compiler and format warnings --- ggml/include/ggml-opencl2.h | 2 -- ggml/src/ggml-opencl2/CMakeLists.txt | 2 +- ggml/src/ggml-opencl2/ggml-opencl2.cpp | 30 +++++++------------ ggml/src/ggml-opencl2/kernels/embed_kernel.py | 30 ++++++++++++------- ggml/src/ggml-opencl2/kernels/ggml-opencl2.cl | 2 +- .../ggml-opencl2/kernels/ggml-opencl2_cvt.cl | 2 +- .../kernels/ggml-opencl2_gemv_noshuffle.cl | 4 +-- .../ggml-opencl2_gemv_noshuffle_general.cl | 4 +-- .../ggml-opencl2/kernels/ggml-opencl2_mm.cl | 2 +- .../kernels/ggml-opencl2_transpose_16.cl | 6 ++-- 10 files changed, 41 insertions(+), 43 deletions(-) diff --git a/ggml/include/ggml-opencl2.h b/ggml/include/ggml-opencl2.h index 5db311968..837c1cc02 100644 --- a/ggml/include/ggml-opencl2.h +++ b/ggml/include/ggml-opencl2.h @@ -30,8 +30,6 @@ GGML_BACKEND_API bool ggml_backend_is_opencl2(ggml_backend_t backend); GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl2_buffer_type(void); GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl2_host_buffer_type(void); -GGML_BACKEND_API ggml_backend_t ggml_backend_reg_opencl2_init(const char * params, void * user_data); - GGML_BACKEND_API ggml_backend_reg_t ggml_backend_opencl2_reg(void); #ifdef __cplusplus diff --git a/ggml/src/ggml-opencl2/CMakeLists.txt b/ggml/src/ggml-opencl2/CMakeLists.txt index 474374084..d5cc816a4 100644 --- a/ggml/src/ggml-opencl2/CMakeLists.txt +++ b/ggml/src/ggml-opencl2/CMakeLists.txt @@ -6,7 +6,7 @@ if (OpenCL_FOUND) set(TARGET_NAME ggml-opencl2) - add_library(${TARGET_NAME} + ggml_add_backend_library(${TARGET_NAME} ggml-opencl2.cpp ../../include/ggml-opencl2.h) target_link_libraries(${TARGET_NAME} PRIVATE ggml-base ${OpenCL_LIBRARIES}) diff --git a/ggml/src/ggml-opencl2/ggml-opencl2.cpp b/ggml/src/ggml-opencl2/ggml-opencl2.cpp index ce5c09087..64aa99cff 100644 --- a/ggml/src/ggml-opencl2/ggml-opencl2.cpp +++ b/ggml/src/ggml-opencl2/ggml-opencl2.cpp @@ -1,6 +1,12 @@ // SPDX-FileCopyrightText: Copyright (c) Qualcomm Innovation Center, Inc. All rights reserved // SPDX-License-Identifier: MIT +#define CL_TARGET_OPENCL_VERSION 220 + +// suppress warnings in CL headers for GCC and Clang +#pragma GCC diagnostic ignored "-Wgnu-anonymous-struct" +#pragma GCC diagnostic ignored "-Woverlength-strings" + #include "ggml-opencl2.h" #include "ggml-backend.h" #include "ggml-impl.h" @@ -1237,10 +1243,6 @@ static void ggml_backend_opencl2_buffer_init_tensor(ggml_backend_buffer_t buffer tensor->extra = extra; } } - - // This should be removed. Keep it to make it easier to identify the backend - // when debugging until backend is removed from tensor. - tensor->backend = GGML_BACKEND_TYPE_GPU; } // The optimized gemm and gemv kernels are used for large matrices without batch. @@ -1938,18 +1940,7 @@ static struct ggml_backend_device_i ggml_backend_opencl2_device_i = { /* .event_synchronize = */ NULL, }; -// -// Backend registration -// - -GGML_API ggml_backend_t ggml_backend_reg_opencl2_init(const char * params, void * user_data) { - return ggml_backend_opencl2_init(); - - GGML_UNUSED(params); - GGML_UNUSED(user_data); -} - -// new API +// Backend registry static const char * ggml_backend_opencl2_reg_get_name(ggml_backend_reg_t reg) { return "OpenCL2"; @@ -1986,6 +1977,7 @@ ggml_backend_reg_t ggml_backend_opencl2_reg(void) { if (!initialized) { reg = ggml_backend_reg { + /* .api_version = */ GGML_BACKEND_API_VERSION, /* .iface = */ ggml_backend_opencl2_reg_i, /* .context = */ NULL, }; @@ -2004,6 +1996,8 @@ ggml_backend_reg_t ggml_backend_opencl2_reg(void) { return ® } +GGML_BACKEND_DL_IMPL(ggml_backend_opencl2_reg) + //------------------------------------------------------------------------------ // Debugging utils //------------------------------------------------------------------------------ @@ -2921,13 +2915,11 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co // init CL objects // <--------------------------------------------> // cl_int status; - cl_event evt; cl_image_format img_fmt_1d; cl_image_desc img_desc_1d; cl_buffer_region region; cl_mem A_image1d; cl_mem B_image1d; - cl_mem A_sub_buffer; cl_mem B_sub_buffer; cl_mem C_d; // for B transpose @@ -3623,7 +3615,7 @@ static void ggml_cl_cpy(ggml_backend_t backend, const ggml_tensor * src0, const GGML_ASSERT(src1->extra); // GGML_OP_CPY happens between src0 and src1. - // GGML_OP_DUP and GGML_OP_CONT happen between src0 and dst. + // GGML_OP_DUP and GGML_OP_CONT happen between src0 and dst. UNUSED(dst); const int ne00 = src0 ? src0->ne[0] : 0; diff --git a/ggml/src/ggml-opencl2/kernels/embed_kernel.py b/ggml/src/ggml-opencl2/kernels/embed_kernel.py index 6117014b3..a4b864f2a 100644 --- a/ggml/src/ggml-opencl2/kernels/embed_kernel.py +++ b/ggml/src/ggml-opencl2/kernels/embed_kernel.py @@ -1,19 +1,27 @@ +# + import sys +import logging +logger = logging.getLogger("opencl-embed-kernerl") + def main(): - if len(sys.argv) != 3: - print("Usage: python embed_kernel.py ") - exit(1) + logging.basicConfig(level=logging.INFO) - ifile = open(sys.argv[1], "r") - ofile = open(sys.argv[2], "w") + if len(sys.argv) != 3: + logger.info("Usage: python embed_kernel.py ") + sys.exit(1) - ofile.write("R\"(\n\n") - ofile.write(ifile.read()) - ofile.write("\n)\"") + ifile = open(sys.argv[1], "r") + ofile = open(sys.argv[2], "w") + + ofile.write("R\"(\n\n") + ofile.write(ifile.read()) + ofile.write("\n)\"") + + ifile.close() + ofile.close() - ifile.close() - ofile.close() if __name__ == "__main__": - main() + main() diff --git a/ggml/src/ggml-opencl2/kernels/ggml-opencl2.cl b/ggml/src/ggml-opencl2/kernels/ggml-opencl2.cl index a29f41be7..e6c25b380 100644 --- a/ggml/src/ggml-opencl2/kernels/ggml-opencl2.cl +++ b/ggml/src/ggml-opencl2/kernels/ggml-opencl2.cl @@ -6,7 +6,7 @@ #elif defined(cl_amd_fp16) #pragma OPENCL EXTENSION cl_amd_fp16 : enable #else -#error "Half precision floating point not supportedby OpenCL implementation on your device." +#error "Half precision floating point not supportedby OpenCL implementation on your device." #endif #ifdef cl_khr_subgroups diff --git a/ggml/src/ggml-opencl2/kernels/ggml-opencl2_cvt.cl b/ggml/src/ggml-opencl2/kernels/ggml-opencl2_cvt.cl index d0a0e7724..bf9ac7d1a 100644 --- a/ggml/src/ggml-opencl2/kernels/ggml-opencl2_cvt.cl +++ b/ggml/src/ggml-opencl2/kernels/ggml-opencl2_cvt.cl @@ -11,7 +11,7 @@ #elif defined(cl_amd_fp16) #pragma OPENCL EXTENSION cl_amd_fp16 : enable #else -#error "Half precision floating point not supportedby OpenCL implementation on your device." +#error "Half precision floating point not supportedby OpenCL implementation on your device." #endif #ifdef cl_khr_subgroups diff --git a/ggml/src/ggml-opencl2/kernels/ggml-opencl2_gemv_noshuffle.cl b/ggml/src/ggml-opencl2/kernels/ggml-opencl2_gemv_noshuffle.cl index aef74235b..2f5c41f55 100644 --- a/ggml/src/ggml-opencl2/kernels/ggml-opencl2_gemv_noshuffle.cl +++ b/ggml/src/ggml-opencl2/kernels/ggml-opencl2_gemv_noshuffle.cl @@ -8,7 +8,7 @@ #pragma OPENCL EXTENSION cl_qcom_extra_vector_types : enable #pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable -// assume +// assume #define QK4_0 32 #define N_SIMDGROUP 4 @@ -204,7 +204,7 @@ __kernel void kernel_gemv_noshuffle( int ne10, // K int ne12, // 1 int ne0, // M - int ne1, // N + int ne1, // N int r2, // 1 int r3) { diff --git a/ggml/src/ggml-opencl2/kernels/ggml-opencl2_gemv_noshuffle_general.cl b/ggml/src/ggml-opencl2/kernels/ggml-opencl2_gemv_noshuffle_general.cl index cb0f7853a..5326af3fe 100644 --- a/ggml/src/ggml-opencl2/kernels/ggml-opencl2_gemv_noshuffle_general.cl +++ b/ggml/src/ggml-opencl2/kernels/ggml-opencl2_gemv_noshuffle_general.cl @@ -8,7 +8,7 @@ #pragma OPENCL EXTENSION cl_qcom_extra_vector_types : enable #pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable -// assume +// assume #define QK4_0 32 #define N_SIMDGROUP 4 @@ -204,7 +204,7 @@ __kernel void kernel_gemv_noshuffle( int ne10, // K int ne12, // 1 int ne0, // M - int ne1, // N + int ne1, // N int r2, // 1 int r3) { diff --git a/ggml/src/ggml-opencl2/kernels/ggml-opencl2_mm.cl b/ggml/src/ggml-opencl2/kernels/ggml-opencl2_mm.cl index 37541e51f..91311ee73 100644 --- a/ggml/src/ggml-opencl2/kernels/ggml-opencl2_mm.cl +++ b/ggml/src/ggml-opencl2/kernels/ggml-opencl2_mm.cl @@ -10,7 +10,7 @@ #elif defined(cl_amd_fp16) #pragma OPENCL EXTENSION cl_amd_fp16 : enable #else -#error "Half precision floating point not supportedby OpenCL implementation on your device." +#error "Half precision floating point not supportedby OpenCL implementation on your device." #endif #ifdef cl_khr_subgroups diff --git a/ggml/src/ggml-opencl2/kernels/ggml-opencl2_transpose_16.cl b/ggml/src/ggml-opencl2/kernels/ggml-opencl2_transpose_16.cl index 550b4c71a..a2a9ab558 100644 --- a/ggml/src/ggml-opencl2/kernels/ggml-opencl2_transpose_16.cl +++ b/ggml/src/ggml-opencl2/kernels/ggml-opencl2_transpose_16.cl @@ -4,9 +4,9 @@ // 16-bit transpose, loading/storing an 8x8 tile of elements kernel void kernel_transpose_16( - __read_only image1d_buffer_t input, - __write_only image1d_buffer_t output, - const uint rows, + __read_only image1d_buffer_t input, + __write_only image1d_buffer_t output, + const uint rows, const uint cols ) {