ggml : update ggml_soft_max_ext() CUDA, SYCL

2024-05-10 11:12:19 +03:00 · 2024-05-10 11:12:19 +03:00 · d0592d495d
commit d0592d495d
parent 7fdca3348c
4 changed files with 38 additions and 74 deletions
--- a/ggml-kompute.cpp
+++ b/ggml-kompute.cpp
@ -1561,10 +1561,9 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
                        float scale;
                        memcpy(&scale, dst->op_params, sizeof(float));

-#pragma message("TODO: add ggml_vk_soft_max() F16/F32 src1 and src2 support")
+#pragma message("TODO: add ggml_vk_soft_max() F16 src1 support")
 #pragma message("ref:  https://github.com/ggerganov/llama.cpp/pull/5021")
                        GGML_ASSERT(!src1 || src1t == GGML_TYPE_F32);
-                        GGML_ASSERT(src2 == nullptr);

                        ggml_vk_soft_max(seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, ne00, ne01, ne02, ne03, scale);
                    } break;