llama : switch to floating-point token positions
ggml-ci
This commit is contained in:
parent
15499eb942
commit
fc775366f1
14 changed files with 68 additions and 61 deletions
|
@ -1134,14 +1134,15 @@ struct test_rope : public test_case {
|
|||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_tensor * pos = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, ne[2]);
|
||||
ggml_tensor * pos = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ne[2]);
|
||||
ggml_set_name(pos, "pos");
|
||||
ggml_tensor * out = ggml_rope(ctx, a, pos, n_dims, mode, n_ctx);
|
||||
return out;
|
||||
}
|
||||
|
||||
void initialize_tensors(ggml_context * ctx) override {
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||
if (t->type == GGML_TYPE_I32) {
|
||||
if (strcmp(ggml_get_name(t), "pos") == 0) {
|
||||
// pos
|
||||
std::vector<int> data(ne[2]);
|
||||
for (int i = 0; i < ne[2]; i++) {
|
||||
|
@ -1703,7 +1704,7 @@ struct test_llama : public test_llm {
|
|||
inpL = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, hp.n_embd, hp.n_tokens);
|
||||
|
||||
// inp_pos - contains the positions
|
||||
struct ggml_tensor * inp_pos = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, hp.n_tokens);
|
||||
struct ggml_tensor * inp_pos = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hp.n_tokens);
|
||||
|
||||
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
|
||||
struct ggml_tensor * KQ_mask = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, hp.n_kv, hp.n_tokens, 1);
|
||||
|
@ -1825,7 +1826,7 @@ struct test_falcon : public test_llm {
|
|||
inpL = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, hp.n_embd, hp.n_tokens);
|
||||
|
||||
// inp_pos - contains the positions
|
||||
struct ggml_tensor * inp_pos = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, hp.n_tokens);
|
||||
struct ggml_tensor * inp_pos = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hp.n_tokens);
|
||||
|
||||
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
|
||||
struct ggml_tensor * KQ_mask = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, hp.n_kv, hp.n_tokens, 1);
|
||||
|
|
|
@ -1449,9 +1449,9 @@ int main(int argc, const char ** argv) {
|
|||
for (int n_past = 1; n_past < ne2[2]; ++n_past) {
|
||||
x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
|
||||
|
||||
struct ggml_tensor * p = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne2[2]);
|
||||
struct ggml_tensor * p = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, ne2[2]);
|
||||
for (int i = 0; i < ne2[2]; ++i) {
|
||||
((int32_t *) p->data)[i] = n_past + i;
|
||||
((float *) p->data)[i] = n_past + i;
|
||||
}
|
||||
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
@ -1489,9 +1489,9 @@ int main(int argc, const char ** argv) {
|
|||
for (int n_past = 1; n_past < ne2[2]; ++n_past) {
|
||||
x[0] = get_random_tensor_f16(ctx0, ndims, ne2, -1.0f, 1.0f);
|
||||
|
||||
struct ggml_tensor * p = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne2[2]);
|
||||
struct ggml_tensor * p = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, ne2[2]);
|
||||
for (int i = 0; i < ne2[2]; ++i) {
|
||||
((int32_t *) p->data)[i] = n_past + i;
|
||||
((float *) p->data)[i] = n_past + i;
|
||||
}
|
||||
|
||||
ggml_set_param(ctx0, x[0]);
|
||||
|
|
|
@ -146,14 +146,14 @@ int main(int /*argc*/, const char ** /*argv*/) {
|
|||
const int n_past_0 = 100;
|
||||
const int n_past_2 = 33;
|
||||
|
||||
struct ggml_tensor * p0 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]);
|
||||
struct ggml_tensor * p1 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]);
|
||||
struct ggml_tensor * p2 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]);
|
||||
struct ggml_tensor * p0 = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, ne[2]);
|
||||
struct ggml_tensor * p1 = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, ne[2]);
|
||||
struct ggml_tensor * p2 = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, ne[2]);
|
||||
|
||||
for (int i = 0; i < ne[2]; ++i) {
|
||||
((int32_t *) p0->data)[i] = n_past_0 + i;
|
||||
((int32_t *) p1->data)[i] = n_past_2 - n_past_0;
|
||||
((int32_t *) p2->data)[i] = n_past_2 + i;
|
||||
((float *) p0->data)[i] = n_past_0 + i;
|
||||
((float *) p1->data)[i] = n_past_2 - n_past_0;
|
||||
((float *) p2->data)[i] = n_past_2 + i;
|
||||
}
|
||||
|
||||
// test mode 0, 2, 4 (standard, GPT-NeoX, GLM)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue