remove fp16 replacing fp32
This commit is contained in:
parent
8eb0549fd0
commit
d0e9e0e14d
1 changed files with 4 additions and 7 deletions
|
@ -15258,23 +15258,20 @@ static void ggml_sycl_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1
|
||||||
bool use_mul_mat_q = ggml_sycl_supports_mmq(src0->type)
|
bool use_mul_mat_q = ggml_sycl_supports_mmq(src0->type)
|
||||||
&& src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32;
|
&& src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32;
|
||||||
|
|
||||||
// fp16 performance always better on gen12+
|
|
||||||
const bool fp16_performance_good = true;
|
|
||||||
|
|
||||||
// mmvq and mmq need the __dp4a instruction which is available for gen12+
|
// mmvq and mmq need the __dp4a instruction which is available for gen12+
|
||||||
// Workaround in https://github.com/ggerganov/llama.cpp/commit/95f84d5ce8b449a9b16009434aca800df504a02e
|
// Workaround in https://github.com/ggerganov/llama.cpp/commit/95f84d5ce8b449a9b16009434aca800df504a02e
|
||||||
use_mul_mat_q = use_mul_mat_q && (src0->type != GGML_TYPE_IQ2_XXS);
|
use_mul_mat_q = use_mul_mat_q && (src0->type != GGML_TYPE_IQ2_XXS);
|
||||||
#ifdef SYCL_USE_XMX
|
#ifdef SYCL_USE_XMX
|
||||||
use_mul_mat_q = use_mul_mat_q && (!fp16_performance_good || src1->ne[1] <= MMQ_MAX_BATCH_SIZE);
|
use_mul_mat_q = use_mul_mat_q && (src1->ne[1] <= MMQ_MAX_BATCH_SIZE);
|
||||||
#endif // SYCL_USE_XMX
|
#endif // SYCL_USE_XMX
|
||||||
|
|
||||||
if (!split && !fp16_performance_good && src0->type == GGML_TYPE_F16 && ggml_is_permuted(src0) && ggml_is_permuted(src1) && src1->ne[1] == 1) {
|
if (!split && src0->type == GGML_TYPE_F16 && ggml_is_permuted(src0) && ggml_is_permuted(src1) && src1->ne[1] == 1) {
|
||||||
// KQ single-batch
|
// KQ single-batch
|
||||||
ggml_sycl_mul_mat_vec_p021(src0, src1, dst);
|
ggml_sycl_mul_mat_vec_p021(src0, src1, dst);
|
||||||
} else if (!split && !fp16_performance_good && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && src1->ne[1] == 1) {
|
} else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && src1->ne[1] == 1) {
|
||||||
// KQV single-batch
|
// KQV single-batch
|
||||||
ggml_sycl_mul_mat_vec_nc(src0, src1, dst);
|
ggml_sycl_mul_mat_vec_nc(src0, src1, dst);
|
||||||
} else if (!split && src0->type == GGML_TYPE_F16 && (src1->type == GGML_TYPE_F16 || fp16_performance_good) && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) {
|
} else if (!split && src0->type == GGML_TYPE_F16 && (src1->type == GGML_TYPE_F16) && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) {
|
||||||
// KQ + KQV multi-batch
|
// KQ + KQV multi-batch
|
||||||
ggml_sycl_mul_mat_batched_sycl(src0, src1, dst);
|
ggml_sycl_mul_mat_batched_sycl(src0, src1, dst);
|
||||||
} else if (use_dequantize_mul_mat_vec) {
|
} else if (use_dequantize_mul_mat_vec) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue