vulkan : add dev notes

This commit is contained in:
Georgi Gerganov 2024-05-10 15:56:25 +03:00
parent 536983b1ad
commit 397b1f8f9d
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
2 changed files with 11 additions and 1 deletions

View file

@ -1559,12 +1559,19 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
case GGML_OP_SOFT_MAX: case GGML_OP_SOFT_MAX:
{ {
float scale; float scale;
memcpy(&scale, dst->op_params, sizeof(float)); float max_bias;
memcpy(&scale, (float *)dst->op_params + 0, sizeof(float));
memcpy(&max_bias, (float *)dst->op_params + 1, sizeof(float));
#pragma message("TODO: add ggml_vk_soft_max() F16 src1 support") #pragma message("TODO: add ggml_vk_soft_max() F16 src1 support")
#pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/5021") #pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/5021")
GGML_ASSERT(!src1 || src1t == GGML_TYPE_F32); GGML_ASSERT(!src1 || src1t == GGML_TYPE_F32);
#pragma message("TODO: add ALiBi support")
#pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/7192")
GGML_ASSERT(max_bias == 0.0f);
ggml_vk_soft_max(seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, ne00, ne01, ne02, ne03, scale); ggml_vk_soft_max(seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, ne00, ne01, ne02, ne03, scale);
} break; } break;
case GGML_OP_DIAG_MASK_INF: case GGML_OP_DIAG_MASK_INF:

View file

@ -4285,6 +4285,9 @@ static void ggml_vk_soft_max(ggml_backend_vk_context * ctx, vk_context * subctx,
const float m0 = powf(2.0f, -(max_bias ) / n_head_log2); const float m0 = powf(2.0f, -(max_bias ) / n_head_log2);
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2); const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2);
#pragma message("TODO: src2 is no longer used in soft_max - should be removed and ALiBi calculation should be updated")
#pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/7192")
ggml_vk_op_f32<vk_op_soft_max_push_constants>(ctx, subctx, src0, src1, src2, dst, GGML_OP_SOFT_MAX, { ggml_vk_op_f32<vk_op_soft_max_push_constants>(ctx, subctx, src0, src1, src2, dst, GGML_OP_SOFT_MAX, {
ncols, ncols,
src1 != nullptr ? nrows_y : (uint32_t)0, src1 != nullptr ? nrows_y : (uint32_t)0,