mtl : verify V tensor contents

This commit is contained in:
Georgi Gerganov 2023-06-01 21:27:24 +03:00
parent f67c2d8cab
commit a266c26de2
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
2 changed files with 34 additions and 16 deletions

View file

@ -752,19 +752,26 @@ int llama_mtl_eval(
} }
printf("sum: %f\n", sum); printf("sum: %f\n", sum);
} else if (t->type == GGML_TYPE_F16) { } else if (t->type == GGML_TYPE_F16) {
const ggml_fp16_t * data = (const ggml_fp16_t *) ctx->out.contents; ggml_fp16_t * data = (const ggml_fp16_t *) ctx->out.contents;
printf("data: "); printf("data: ");
int n = ggml_nelements(t); for (int i = 0; i < (int) t->ne[0]; i++) {
if (n > 10) {
n = 10;
}
for (int i = 0; i < n; i++) {
printf("%f ", ggml_fp16_to_fp32(data[i])); printf("%f ", ggml_fp16_to_fp32(data[i]));
} }
printf("\n"); printf("\n");
double sum = 0.0; double sum = 0.0;
for (int i = 0; i < ggml_nelements(t); i++) { printf("nb: %lld %lld %lld %lld\n", t->nb[0], t->nb[1], t->nb[2], t->nb[3]);
sum += ggml_fp16_to_fp32(data[i]); for (int64_t i3 = 0; i3 < t->ne[3]; ++i3) {
for (int64_t i2 = 0; i2 < t->ne[2]; ++i2) {
for (int64_t i1 = 0; i1 < t->ne[1]; ++i1) {
for (int64_t i0 = 0; i0 < t->ne[0]; ++i0) {
const size_t offs = i3*t->nb[3] + i2*t->nb[2] + i1*t->nb[1] + i0*t->nb[0];
const ggml_fp16_t cur = *((ggml_fp16_t *)((char *) data + offs));
const float curf = ggml_fp16_to_fp32(cur);
if (isinf(curf)) continue;
sum += curf;
}
}
}
} }
printf("sum: %f\n", sum); printf("sum: %f\n", sum);
} else { } else {

View file

@ -1341,11 +1341,6 @@ static bool llama_eval_internal(
struct ggml_tensor * KQ_soft_max = ggml_soft_max_inplace(ctx0, KQ_masked); struct ggml_tensor * KQ_soft_max = ggml_soft_max_inplace(ctx0, KQ_masked);
ggml_set_name(KQ_soft_max, "KQ_soft_max"); ggml_set_name(KQ_soft_max, "KQ_soft_max");
// TODO: TMP !!!!
if (il == 0) {
ggml_set_name(KQ_soft_max, "mtl-check");
}
// split cached V into n_head heads // split cached V into n_head heads
struct ggml_tensor * V = struct ggml_tensor * V =
ggml_view_3d(ctx0, kv_self.v, ggml_view_3d(ctx0, kv_self.v,
@ -1355,6 +1350,11 @@ static bool llama_eval_internal(
il*n_ctx*ggml_element_size(kv_self.v)*n_embd); il*n_ctx*ggml_element_size(kv_self.v)*n_embd);
ggml_set_name(V, "V"); ggml_set_name(V, "V");
// TODO: TMP !!!!
if (il == 0) {
ggml_set_name(V, "mtl-check");
}
#if 1 #if 1
struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ_soft_max); struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ_soft_max);
ggml_set_name(KQV, "KQV"); ggml_set_name(KQV, "KQV");
@ -1479,13 +1479,24 @@ static bool llama_eval_internal(
auto print_t_f16 = [&](struct ggml_tensor * t) { auto print_t_f16 = [&](struct ggml_tensor * t) {
ggml_fp16_t * data = (ggml_fp16_t *)t->data; ggml_fp16_t * data = (ggml_fp16_t *)t->data;
printf("data: "); printf("data: ");
for (int i = 0; i < std::min((int) t->ne[0], 10); i++) { for (int i = 0; i < (int) t->ne[0]; i++) {
printf("%f ", ggml_fp16_to_fp32(data[i])); printf("%f ", ggml_fp16_to_fp32(data[i]));
} }
printf("\n"); printf("\n");
double sum = 0.0; double sum = 0.0;
for (int i = 0; i < ggml_nelements(t); i++) { printf("nb: %lld %lld %lld %lld\n", t->nb[0], t->nb[1], t->nb[2], t->nb[3]);
sum += ggml_fp16_to_fp32(data[i]); for (int64_t i3 = 0; i3 < t->ne[3]; ++i3) {
for (int64_t i2 = 0; i2 < t->ne[2]; ++i2) {
for (int64_t i1 = 0; i1 < t->ne[1]; ++i1) {
for (int64_t i0 = 0; i0 < t->ne[0]; ++i0) {
const size_t offs = i3*t->nb[3] + i2*t->nb[2] + i1*t->nb[1] + i0*t->nb[0];
const ggml_fp16_t cur = *((ggml_fp16_t *)((char *) data + offs));
const float curf = ggml_fp16_to_fp32(cur);
if (isinf(curf)) continue;
sum += curf;
}
}
}
} }
printf("sum: %f\n", sum); printf("sum: %f\n", sum);
}; };