[SYCL] Fix the sub group size of Intel (#8106)

* use warp_size macro for all sycl kernels * fix mask of permute_sub_group_by_xor * fix rms_norm with correct warp number * fix rms_norm_f32/group_norm_f32 * move norm to norm.cpp file * fix quantize bug * fix mmvq's batch size
2024-07-02 02:16:00 +00:00 · 2024-07-02 02:16:00 +00:00 · d08c20edde
commit d08c20edde
parent 5fac350b9c
9 changed files with 587 additions and 509 deletions
--- a/ggml/src/ggml-sycl/norm.hpp
+++ b/ggml/src/ggml-sycl/norm.hpp
@ -0,0 +1,35 @@
+//
+// MIT license
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: MIT
+//
+
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+
+#ifndef GGML_SYCL_NORM_HPP
+#define GGML_SYCL_NORM_HPP
+
+#include "common.hpp"
+
+void ggml_sycl_op_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, const ggml_tensor* src1,
+    ggml_tensor* dst, const float* src0_dd,
+    const float* src1_dd, float* dst_dd,
+    const queue_ptr& main_stream);
+
+void ggml_sycl_op_rms_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
+    const ggml_tensor* src1, ggml_tensor* dst,
+    const float* src0_dd, const float* src1_dd,
+    float* dst_dd,
+    const queue_ptr& main_stream);
+
+void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
+    const ggml_tensor* src1, ggml_tensor* dst,
+    const float* src0_dd, const float* src1_dd,
+    float* dst_dd,
+    const queue_ptr& main_stream);
+
+#endif // GGML_SYCL_NORM_HPP