opencl: integrate backend dyn.load interface and fix compiler and format warnings
This commit is contained in:
parent
c1af4b72b7
commit
8ad0bb30df
10 changed files with 41 additions and 43 deletions
|
@ -30,8 +30,6 @@ GGML_BACKEND_API bool ggml_backend_is_opencl2(ggml_backend_t backend);
|
||||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl2_buffer_type(void);
|
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl2_buffer_type(void);
|
||||||
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl2_host_buffer_type(void);
|
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_opencl2_host_buffer_type(void);
|
||||||
|
|
||||||
GGML_BACKEND_API ggml_backend_t ggml_backend_reg_opencl2_init(const char * params, void * user_data);
|
|
||||||
|
|
||||||
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_opencl2_reg(void);
|
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_opencl2_reg(void);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
@ -6,7 +6,7 @@ if (OpenCL_FOUND)
|
||||||
|
|
||||||
set(TARGET_NAME ggml-opencl2)
|
set(TARGET_NAME ggml-opencl2)
|
||||||
|
|
||||||
add_library(${TARGET_NAME}
|
ggml_add_backend_library(${TARGET_NAME}
|
||||||
ggml-opencl2.cpp
|
ggml-opencl2.cpp
|
||||||
../../include/ggml-opencl2.h)
|
../../include/ggml-opencl2.h)
|
||||||
target_link_libraries(${TARGET_NAME} PRIVATE ggml-base ${OpenCL_LIBRARIES})
|
target_link_libraries(${TARGET_NAME} PRIVATE ggml-base ${OpenCL_LIBRARIES})
|
||||||
|
|
|
@ -1,6 +1,12 @@
|
||||||
// SPDX-FileCopyrightText: Copyright (c) Qualcomm Innovation Center, Inc. All rights reserved
|
// SPDX-FileCopyrightText: Copyright (c) Qualcomm Innovation Center, Inc. All rights reserved
|
||||||
// SPDX-License-Identifier: MIT
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
#define CL_TARGET_OPENCL_VERSION 220
|
||||||
|
|
||||||
|
// suppress warnings in CL headers for GCC and Clang
|
||||||
|
#pragma GCC diagnostic ignored "-Wgnu-anonymous-struct"
|
||||||
|
#pragma GCC diagnostic ignored "-Woverlength-strings"
|
||||||
|
|
||||||
#include "ggml-opencl2.h"
|
#include "ggml-opencl2.h"
|
||||||
#include "ggml-backend.h"
|
#include "ggml-backend.h"
|
||||||
#include "ggml-impl.h"
|
#include "ggml-impl.h"
|
||||||
|
@ -1237,10 +1243,6 @@ static void ggml_backend_opencl2_buffer_init_tensor(ggml_backend_buffer_t buffer
|
||||||
tensor->extra = extra;
|
tensor->extra = extra;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// This should be removed. Keep it to make it easier to identify the backend
|
|
||||||
// when debugging until backend is removed from tensor.
|
|
||||||
tensor->backend = GGML_BACKEND_TYPE_GPU;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// The optimized gemm and gemv kernels are used for large matrices without batch.
|
// The optimized gemm and gemv kernels are used for large matrices without batch.
|
||||||
|
@ -1938,18 +1940,7 @@ static struct ggml_backend_device_i ggml_backend_opencl2_device_i = {
|
||||||
/* .event_synchronize = */ NULL,
|
/* .event_synchronize = */ NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
//
|
// Backend registry
|
||||||
// Backend registration
|
|
||||||
//
|
|
||||||
|
|
||||||
GGML_API ggml_backend_t ggml_backend_reg_opencl2_init(const char * params, void * user_data) {
|
|
||||||
return ggml_backend_opencl2_init();
|
|
||||||
|
|
||||||
GGML_UNUSED(params);
|
|
||||||
GGML_UNUSED(user_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
// new API
|
|
||||||
|
|
||||||
static const char * ggml_backend_opencl2_reg_get_name(ggml_backend_reg_t reg) {
|
static const char * ggml_backend_opencl2_reg_get_name(ggml_backend_reg_t reg) {
|
||||||
return "OpenCL2";
|
return "OpenCL2";
|
||||||
|
@ -1986,6 +1977,7 @@ ggml_backend_reg_t ggml_backend_opencl2_reg(void) {
|
||||||
|
|
||||||
if (!initialized) {
|
if (!initialized) {
|
||||||
reg = ggml_backend_reg {
|
reg = ggml_backend_reg {
|
||||||
|
/* .api_version = */ GGML_BACKEND_API_VERSION,
|
||||||
/* .iface = */ ggml_backend_opencl2_reg_i,
|
/* .iface = */ ggml_backend_opencl2_reg_i,
|
||||||
/* .context = */ NULL,
|
/* .context = */ NULL,
|
||||||
};
|
};
|
||||||
|
@ -2004,6 +1996,8 @@ ggml_backend_reg_t ggml_backend_opencl2_reg(void) {
|
||||||
return ®
|
return ®
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GGML_BACKEND_DL_IMPL(ggml_backend_opencl2_reg)
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Debugging utils
|
// Debugging utils
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
@ -2921,13 +2915,11 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co
|
||||||
// init CL objects
|
// init CL objects
|
||||||
// <--------------------------------------------> //
|
// <--------------------------------------------> //
|
||||||
cl_int status;
|
cl_int status;
|
||||||
cl_event evt;
|
|
||||||
cl_image_format img_fmt_1d;
|
cl_image_format img_fmt_1d;
|
||||||
cl_image_desc img_desc_1d;
|
cl_image_desc img_desc_1d;
|
||||||
cl_buffer_region region;
|
cl_buffer_region region;
|
||||||
cl_mem A_image1d;
|
cl_mem A_image1d;
|
||||||
cl_mem B_image1d;
|
cl_mem B_image1d;
|
||||||
cl_mem A_sub_buffer;
|
|
||||||
cl_mem B_sub_buffer;
|
cl_mem B_sub_buffer;
|
||||||
cl_mem C_d;
|
cl_mem C_d;
|
||||||
// for B transpose
|
// for B transpose
|
||||||
|
@ -3623,7 +3615,7 @@ static void ggml_cl_cpy(ggml_backend_t backend, const ggml_tensor * src0, const
|
||||||
GGML_ASSERT(src1->extra);
|
GGML_ASSERT(src1->extra);
|
||||||
|
|
||||||
// GGML_OP_CPY happens between src0 and src1.
|
// GGML_OP_CPY happens between src0 and src1.
|
||||||
// GGML_OP_DUP and GGML_OP_CONT happen between src0 and dst.
|
// GGML_OP_DUP and GGML_OP_CONT happen between src0 and dst.
|
||||||
UNUSED(dst);
|
UNUSED(dst);
|
||||||
|
|
||||||
const int ne00 = src0 ? src0->ne[0] : 0;
|
const int ne00 = src0 ? src0->ne[0] : 0;
|
||||||
|
|
|
@ -1,19 +1,27 @@
|
||||||
|
#
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
import logging
|
||||||
|
logger = logging.getLogger("opencl-embed-kernerl")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if len(sys.argv) != 3:
|
logging.basicConfig(level=logging.INFO)
|
||||||
print("Usage: python embed_kernel.py <input_file> <output_file>")
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
ifile = open(sys.argv[1], "r")
|
if len(sys.argv) != 3:
|
||||||
ofile = open(sys.argv[2], "w")
|
logger.info("Usage: python embed_kernel.py <input_file> <output_file>")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
ofile.write("R\"(\n\n")
|
ifile = open(sys.argv[1], "r")
|
||||||
ofile.write(ifile.read())
|
ofile = open(sys.argv[2], "w")
|
||||||
ofile.write("\n)\"")
|
|
||||||
|
ofile.write("R\"(\n\n")
|
||||||
|
ofile.write(ifile.read())
|
||||||
|
ofile.write("\n)\"")
|
||||||
|
|
||||||
|
ifile.close()
|
||||||
|
ofile.close()
|
||||||
|
|
||||||
ifile.close()
|
|
||||||
ofile.close()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
#elif defined(cl_amd_fp16)
|
#elif defined(cl_amd_fp16)
|
||||||
#pragma OPENCL EXTENSION cl_amd_fp16 : enable
|
#pragma OPENCL EXTENSION cl_amd_fp16 : enable
|
||||||
#else
|
#else
|
||||||
#error "Half precision floating point not supportedby OpenCL implementation on your device."
|
#error "Half precision floating point not supportedby OpenCL implementation on your device."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef cl_khr_subgroups
|
#ifdef cl_khr_subgroups
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
#elif defined(cl_amd_fp16)
|
#elif defined(cl_amd_fp16)
|
||||||
#pragma OPENCL EXTENSION cl_amd_fp16 : enable
|
#pragma OPENCL EXTENSION cl_amd_fp16 : enable
|
||||||
#else
|
#else
|
||||||
#error "Half precision floating point not supportedby OpenCL implementation on your device."
|
#error "Half precision floating point not supportedby OpenCL implementation on your device."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef cl_khr_subgroups
|
#ifdef cl_khr_subgroups
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
#pragma OPENCL EXTENSION cl_qcom_extra_vector_types : enable
|
#pragma OPENCL EXTENSION cl_qcom_extra_vector_types : enable
|
||||||
#pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable
|
#pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable
|
||||||
|
|
||||||
// assume
|
// assume
|
||||||
#define QK4_0 32
|
#define QK4_0 32
|
||||||
#define N_SIMDGROUP 4
|
#define N_SIMDGROUP 4
|
||||||
|
|
||||||
|
@ -204,7 +204,7 @@ __kernel void kernel_gemv_noshuffle(
|
||||||
int ne10, // K
|
int ne10, // K
|
||||||
int ne12, // 1
|
int ne12, // 1
|
||||||
int ne0, // M
|
int ne0, // M
|
||||||
int ne1, // N
|
int ne1, // N
|
||||||
int r2, // 1
|
int r2, // 1
|
||||||
int r3)
|
int r3)
|
||||||
{
|
{
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
#pragma OPENCL EXTENSION cl_qcom_extra_vector_types : enable
|
#pragma OPENCL EXTENSION cl_qcom_extra_vector_types : enable
|
||||||
#pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable
|
#pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable
|
||||||
|
|
||||||
// assume
|
// assume
|
||||||
#define QK4_0 32
|
#define QK4_0 32
|
||||||
#define N_SIMDGROUP 4
|
#define N_SIMDGROUP 4
|
||||||
|
|
||||||
|
@ -204,7 +204,7 @@ __kernel void kernel_gemv_noshuffle(
|
||||||
int ne10, // K
|
int ne10, // K
|
||||||
int ne12, // 1
|
int ne12, // 1
|
||||||
int ne0, // M
|
int ne0, // M
|
||||||
int ne1, // N
|
int ne1, // N
|
||||||
int r2, // 1
|
int r2, // 1
|
||||||
int r3)
|
int r3)
|
||||||
{
|
{
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
#elif defined(cl_amd_fp16)
|
#elif defined(cl_amd_fp16)
|
||||||
#pragma OPENCL EXTENSION cl_amd_fp16 : enable
|
#pragma OPENCL EXTENSION cl_amd_fp16 : enable
|
||||||
#else
|
#else
|
||||||
#error "Half precision floating point not supportedby OpenCL implementation on your device."
|
#error "Half precision floating point not supportedby OpenCL implementation on your device."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef cl_khr_subgroups
|
#ifdef cl_khr_subgroups
|
||||||
|
|
|
@ -4,9 +4,9 @@
|
||||||
// 16-bit transpose, loading/storing an 8x8 tile of elements
|
// 16-bit transpose, loading/storing an 8x8 tile of elements
|
||||||
|
|
||||||
kernel void kernel_transpose_16(
|
kernel void kernel_transpose_16(
|
||||||
__read_only image1d_buffer_t input,
|
__read_only image1d_buffer_t input,
|
||||||
__write_only image1d_buffer_t output,
|
__write_only image1d_buffer_t output,
|
||||||
const uint rows,
|
const uint rows,
|
||||||
const uint cols
|
const uint cols
|
||||||
) {
|
) {
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue