ggml : ggml_rope now takes a vector with positions instead of n_past
This commit is contained in:
parent
3b4bab6a38
commit
1fb033fd85
9 changed files with 270 additions and 131 deletions
|
@ -556,6 +556,14 @@ struct ggml_tensor * forward(
|
|||
struct ggml_tensor * kc = kv_self.k;
|
||||
struct ggml_tensor * vc = kv_self.v;
|
||||
|
||||
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
|
||||
{
|
||||
int * data = (int *) KQ_pos->data;
|
||||
for (int i = 0; i < N; ++i) {
|
||||
data[i] = n_past + i;
|
||||
}
|
||||
}
|
||||
|
||||
// inpL shape [n_embd,N,1,1]
|
||||
struct ggml_tensor * inpL = ggml_get_rows(ctx0, model->tok_embeddings, tokens);
|
||||
for (int il = 0; il < n_layer; ++il) {
|
||||
|
@ -583,8 +591,8 @@ struct ggml_tensor * forward(
|
|||
// wk shape [n_embd, n_embd, 1, 1]
|
||||
// Qcur shape [n_embd/n_head, n_head, N, 1]
|
||||
// Kcur shape [n_embd/n_head, n_head, N, 1]
|
||||
struct ggml_tensor * Qcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wq, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0, 0);
|
||||
struct ggml_tensor * Kcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wk, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0, 0);
|
||||
struct ggml_tensor * Qcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wq, cur), n_embd/n_head, n_head, N), KQ_pos, n_rot, 0, 0);
|
||||
struct ggml_tensor * Kcur = ggml_rope(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wk, cur), n_embd/n_head, n_head, N), KQ_pos, n_rot, 0, 0);
|
||||
|
||||
// store key and value to memory
|
||||
{
|
||||
|
@ -810,9 +818,18 @@ struct ggml_tensor * forward_batch(
|
|||
struct ggml_tensor * kc = kv_self.k;
|
||||
struct ggml_tensor * vc = kv_self.v;
|
||||
|
||||
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
|
||||
{
|
||||
int * data = (int *) KQ_pos->data;
|
||||
for (int i = 0; i < N; ++i) {
|
||||
data[i] = n_past + i;
|
||||
}
|
||||
}
|
||||
|
||||
// inpL shape [n_embd,N*n_batch,1]
|
||||
struct ggml_tensor * inpL = ggml_get_rows(ctx0, model->tok_embeddings, tokens);
|
||||
assert_shape_2d(inpL, n_embd, N*n_batch);
|
||||
|
||||
for (int il = 0; il < n_layer; ++il) {
|
||||
struct ggml_tensor * inpSA = inpL;
|
||||
|
||||
|
@ -840,8 +857,8 @@ struct ggml_tensor * forward_batch(
|
|||
// wk shape [n_embd, n_embd, 1, 1]
|
||||
// Qcur shape [n_embd/n_head, n_head, N, n_batch]
|
||||
// Kcur shape [n_embd/n_head, n_head, N, n_batch]
|
||||
struct ggml_tensor * Qcur = ggml_rope(ctx0, ggml_reshape_4d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wq, cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0, 0);
|
||||
struct ggml_tensor * Kcur = ggml_rope(ctx0, ggml_reshape_4d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wk, cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0, 0);
|
||||
struct ggml_tensor * Qcur = ggml_rope(ctx0, ggml_reshape_4d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wq, cur), n_embd/n_head, n_head, N, n_batch), KQ_pos, n_rot, 0, 0);
|
||||
struct ggml_tensor * Kcur = ggml_rope(ctx0, ggml_reshape_4d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wk, cur), n_embd/n_head, n_head, N, n_batch), KQ_pos, n_rot, 0, 0);
|
||||
assert_shape_4d(Qcur, n_embd/n_head, n_head, N, n_batch);
|
||||
assert_shape_4d(Kcur, n_embd/n_head, n_head, N, n_batch);
|
||||
|
||||
|
@ -1100,6 +1117,14 @@ struct ggml_tensor * forward_lora(
|
|||
struct ggml_tensor * kc = kv_self.k;
|
||||
struct ggml_tensor * vc = kv_self.v;
|
||||
|
||||
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
|
||||
{
|
||||
int * data = (int *) KQ_pos->data;
|
||||
for (int i = 0; i < N; ++i) {
|
||||
data[i] = n_past + i;
|
||||
}
|
||||
}
|
||||
|
||||
// inpL shape [n_embd,N,1,1]
|
||||
struct ggml_tensor * inpL = ggml_get_rows(ctx0, model->tok_embeddings, tokens);
|
||||
for (int il = 0; il < n_layer; ++il) {
|
||||
|
@ -1133,7 +1158,7 @@ struct ggml_tensor * forward_lora(
|
|||
model->layers[il].wqb,
|
||||
cur)),
|
||||
n_embd/n_head, n_head, N),
|
||||
n_past, n_rot, 0, 0);
|
||||
KQ_pos, n_rot, 0, 0);
|
||||
struct ggml_tensor * Kcur = ggml_rope(ctx0,
|
||||
ggml_reshape_3d(ctx0,
|
||||
ggml_mul_mat(ctx0,
|
||||
|
@ -1142,7 +1167,7 @@ struct ggml_tensor * forward_lora(
|
|||
model->layers[il].wkb,
|
||||
cur)),
|
||||
n_embd/n_head, n_head, N),
|
||||
n_past, n_rot, 0, 0);
|
||||
KQ_pos, n_rot, 0, 0);
|
||||
|
||||
// store key and value to memory
|
||||
{
|
||||
|
|
|
@ -679,15 +679,23 @@ struct ggml_tensor * llama_build_train_graphs(
|
|||
}
|
||||
};
|
||||
|
||||
// KQ_pos - contains the positions
|
||||
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, N);
|
||||
{
|
||||
int * data = (int *) KQ_pos->data;
|
||||
for (int i = 0; i < N; ++i) {
|
||||
data[i] = n_past + i;
|
||||
}
|
||||
}
|
||||
|
||||
// rope has so much parameters that we make a custom function for it
|
||||
auto rope = [ctx, n_rot, n_ctx, rope_freq_base, rope_freq_scale]
|
||||
auto rope = [ctx, KQ_pos, n_rot, n_ctx, rope_freq_base, rope_freq_scale]
|
||||
(struct ggml_tensor * t) -> struct ggml_tensor * {
|
||||
// not capturing these, to silcence warnings
|
||||
const int n_past = 0;
|
||||
const int rope_mode = 0;
|
||||
|
||||
return ggml_rope_custom(ctx,
|
||||
t, n_past, n_rot, rope_mode, n_ctx,
|
||||
t, KQ_pos, n_rot, rope_mode, n_ctx,
|
||||
rope_freq_base, rope_freq_scale);
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue