[SYCL] Fix WARP_SIZE=16 bug of Intel GPU (#8266)

* fix group_norm ut

* split softmax

* fix softmax

* add concat support condition

* revert debug code

* move QK_WARP_SIZE to presets.hpp
This commit is contained in:
luoyu-intel 2024-07-05 05:06:13 +00:00 committed by GitHub
parent e235b267a2
commit a9554e20b6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 301 additions and 257 deletions

View file

@ -0,0 +1,24 @@
//
// MIT license
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: MIT
//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
#ifndef GGML_SYCL_SOFTMAX_HPP
#define GGML_SYCL_SOFTMAX_HPP
#include "common.hpp"
void ggml_sycl_op_soft_max(ggml_backend_sycl_context &ctx, const ggml_tensor *src0,
const ggml_tensor *src1, ggml_tensor *dst,
const float *src0_dd, const float *src1_dd,
float *dst_dd,
const queue_ptr &main_stream);
#endif // GGML_SYCL_SOFTMAX_HPP