From a8c75c041df0d8805a11e54d1cf8b3f3454780c8 Mon Sep 17 00:00:00 2001 From: luoyu-intel Date: Tue, 16 Jul 2024 06:37:10 +0000 Subject: [PATCH] fix buf --- ggml/src/ggml-sycl.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-sycl.cpp b/ggml/src/ggml-sycl.cpp index 8d82a837e..a99adf0f8 100644 --- a/ggml/src/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl.cpp @@ -4341,6 +4341,9 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer, if (tensor->type == GGML_TYPE_Q4_0) { auto tmp_buf = sycl::malloc_shared(size, *stream); + SYCL_CHECK( + CHECK_TRY_ERROR((*stream).memcpy(tmp_buf, data, size) + .wait())); GGML_ASSERT((size % sizeof(block_q4_0) == 0)); GGML_ASSERT((offset % sizeof(block_q4_0) == 0)); int blk_offset = offset / sizeof(block_q4_0); @@ -4349,9 +4352,9 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer, size_t nrows = tensor->ne[1]; auto d_ptr = (sycl::half*)(qs_ptr + ncols * nrows / 2) + blk_offset; stream->parallel_for( - size / QK4_0, + size / sizeof(block_q4_0), [=](auto i) [[intel::reqd_sub_group_size(WARP_SIZE)]] { - const block_q4_0* x = (const block_q4_0*)data; + const block_q4_0* x = (const block_q4_0*)tmp_buf; int ib = i; typedef sycl::vec CT; CT tmp = *(CT*)x[ib].qs; @@ -4371,7 +4374,7 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer, } *(d_ptr + ib) = x[ib].d; - }); + }).wait(); sycl::free(tmp_buf, *stream); } else