From cfa2cc1e403c156cf5f9c4429a2309702c6b841e Mon Sep 17 00:00:00 2001 From: Akarshan Biswas Date: Wed, 5 Feb 2025 13:33:46 +0530 Subject: [PATCH] Disable non-contiguous tensor support in norm kernels and add newline at the end of debug logs --- ggml/src/ggml-sycl/ggml-sycl.cpp | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 5a38556f3..ca92e966c 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -33,6 +33,7 @@ #include "common.hpp" #include "ggml-sycl/backend.hpp" #include "ggml-sycl/gemm.hpp" +#include "ggml.h" static bool g_sycl_loaded = false; int g_ggml_sycl_debug = 0; @@ -308,7 +309,7 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor, const void *data, size_t offset, size_t size) try { - GGML_SYCL_DEBUG("[SYCL] call %s", __func__); + GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__); ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context; @@ -333,7 +334,7 @@ static void ggml_backend_sycl_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor *tensor, void *data, size_t offset, size_t size) try { - GGML_SYCL_DEBUG("[SYCL] call %s", __func__); + GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__); ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context; @@ -362,7 +363,7 @@ static bool ggml_backend_sycl_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor *src, ggml_tensor *dst) try { - GGML_SYCL_DEBUG("[SYCL] call %s", __func__); + GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__); if (ggml_backend_buffer_is_sycl(src->buffer)) { ggml_backend_sycl_buffer_context * src_ctx = (ggml_backend_sycl_buffer_context *)src->buffer->context; ggml_backend_sycl_buffer_context * dst_ctx = (ggml_backend_sycl_buffer_context *)dst->buffer->context; @@ -420,7 +421,7 @@ ggml_backend_sycl_buffer_cpy_tensor(ggml_backend_buffer_t buffer, static void ggml_backend_sycl_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) try { - GGML_SYCL_DEBUG("[SYCL] call %s", __func__); + GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__); ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context; ggml_sycl_set_device(ctx->device); @@ -468,7 +469,7 @@ static const char * ggml_backend_sycl_buffer_type_get_name(ggml_backend_buffer_t static ggml_backend_buffer_t ggml_backend_sycl_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) try { - GGML_SYCL_DEBUG("[SYCL] call %s", __func__); + GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__); ggml_backend_sycl_buffer_type_context * buft_ctx = (ggml_backend_sycl_buffer_type_context *)buft->context; ggml_sycl_set_device(buft_ctx->device); const queue_ptr stream = buft_ctx->stream; @@ -712,7 +713,7 @@ static void * ggml_backend_sycl_split_buffer_get_base(ggml_backend_buffer_t buff static void ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor) try { - GGML_SYCL_DEBUG("[SYCL] call %s", __func__); + GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__); GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context; @@ -796,7 +797,7 @@ static void ggml_backend_sycl_split_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor *tensor, const void *data, size_t offset, size_t size) try { - GGML_SYCL_DEBUG("[SYCL] call %s", __func__); + GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__); // split tensors must always be set in their entirety at once GGML_ASSERT(offset == 0); GGML_ASSERT(size == ggml_nbytes(tensor)); @@ -850,7 +851,7 @@ static void ggml_backend_sycl_split_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor *tensor, void *data, size_t offset, size_t size) try { - GGML_SYCL_DEBUG("[SYCL] call %s", __func__); + GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__); // split tensors must always be set in their entirety at once GGML_ASSERT(offset == 0); GGML_ASSERT(size == ggml_nbytes(tensor)); @@ -901,7 +902,7 @@ catch (sycl::exception const &exc) { } static void ggml_backend_sycl_split_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { - GGML_SYCL_DEBUG("[SYCL] call %s", __func__); + GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__); GGML_UNUSED(buffer); GGML_UNUSED(value); } @@ -1025,7 +1026,7 @@ static const char * ggml_backend_sycl_host_buffer_type_name(ggml_backend_buffer_ } static void ggml_backend_sycl_host_buffer_free_buffer(ggml_backend_buffer_t buffer) { - GGML_SYCL_DEBUG("[SYCL] call %s", __func__); + GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__); ggml_sycl_host_free(buffer->context); } @@ -3277,14 +3278,17 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g case GGML_OP_VIEW: case GGML_OP_PERMUTE: case GGML_OP_TRANSPOSE: - case GGML_OP_NORM: case GGML_OP_ADD: case GGML_OP_ADD1: case GGML_OP_LOG: case GGML_OP_SUB: case GGML_OP_MUL: case GGML_OP_DIV: + return true; + case GGML_OP_NORM: + case GGML_OP_GROUP_NORM: case GGML_OP_RMS_NORM: + return ggml_is_contiguous(op->src[0]); case GGML_OP_SCALE: case GGML_OP_SQR: case GGML_OP_SQRT: @@ -3316,7 +3320,6 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g case GGML_OP_SUM_ROWS: case GGML_OP_ARGSORT: case GGML_OP_ACC: - case GGML_OP_GROUP_NORM: case GGML_OP_UPSCALE: case GGML_OP_PAD: case GGML_OP_LEAKY_RELU: