WIP: Add support for rwkv v7

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
This commit is contained in:
Molly Sophia 2025-01-15 20:43:23 +08:00
parent 5445300758
commit 6dcc21e7f5
14 changed files with 952 additions and 48 deletions

View file

@ -973,6 +973,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"GET_REL_POS",
"ADD_REL_POS",
"RWKV_WKV6",
"RWKV_WKV7",
"GATED_LINEAR_ATTN",
"UNARY",
@ -993,7 +994,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"OPT_STEP_ADAMW",
};
static_assert(GGML_OP_COUNT == 84, "GGML_OP_COUNT != 84");
static_assert(GGML_OP_COUNT == 85, "GGML_OP_COUNT != 85");
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"none",
@ -1071,6 +1072,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"get_rel_pos(x)",
"add_rel_pos(x)",
"rwkv_wkv6(k, v, r, tf, td, s)",
"rwkv_wkv7(r, w, k, v, a, b)",
"gated_linear_attn(k, v, q, gate, s)",
"unary(x)",
@ -1091,7 +1093,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"adamw(x)",
};
static_assert(GGML_OP_COUNT == 84, "GGML_OP_COUNT != 84");
static_assert(GGML_OP_COUNT == 85, "GGML_OP_COUNT != 85");
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
@ -4705,6 +4707,54 @@ struct ggml_tensor * ggml_rwkv_wkv6(
return result;
}
// ggml_rwkv_wkv7
struct ggml_tensor * ggml_rwkv_wkv7(
struct ggml_context * ctx,
struct ggml_tensor * r,
struct ggml_tensor * w,
struct ggml_tensor * k,
struct ggml_tensor * v,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * state) {
GGML_ASSERT(ggml_is_contiguous(r));
GGML_ASSERT(ggml_is_contiguous(w));
GGML_ASSERT(ggml_is_contiguous(k));
GGML_ASSERT(ggml_is_contiguous(v));
GGML_ASSERT(ggml_is_contiguous(a));
GGML_ASSERT(ggml_is_contiguous(b));
GGML_ASSERT(ggml_is_contiguous(state));
const int64_t S = k->ne[0];
const int64_t H = k->ne[1];
const int64_t n_tokens = k->ne[2];
const int64_t n_seqs = state->ne[1];
{
GGML_ASSERT(w->ne[0] == S && w->ne[1] == H && w->ne[2] == n_tokens);
GGML_ASSERT(k->ne[0] == S && k->ne[1] == H && k->ne[2] == n_tokens);
GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
GGML_ASSERT(a->ne[0] == S && a->ne[1] == H && a->ne[2] == n_tokens);
GGML_ASSERT(b->ne[0] == S && b->ne[1] == H && b->ne[2] == n_tokens);
GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
}
// concat output and new_state
const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
result->op = GGML_OP_RWKV_WKV7;
result->src[0] = r;
result->src[1] = w;
result->src[2] = k;
result->src[3] = v;
result->src[4] = a;
result->src[5] = b;
result->src[6] = state;
return result;
}
// ggml_gated_linear_attn
struct ggml_tensor * ggml_gated_linear_attn(