From a266c26de2030b94f608510ba0e70888a9881b76 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 1 Jun 2023 21:27:24 +0300 Subject: [PATCH] mtl : verify V tensor contents --- examples/mtl/mtl.m | 23 +++++++++++++++-------- llama.cpp | 27 +++++++++++++++++++-------- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/examples/mtl/mtl.m b/examples/mtl/mtl.m index bb0074a4c..24f9479ce 100644 --- a/examples/mtl/mtl.m +++ b/examples/mtl/mtl.m @@ -752,19 +752,26 @@ int llama_mtl_eval( } printf("sum: %f\n", sum); } else if (t->type == GGML_TYPE_F16) { - const ggml_fp16_t * data = (const ggml_fp16_t *) ctx->out.contents; + ggml_fp16_t * data = (const ggml_fp16_t *) ctx->out.contents; printf("data: "); - int n = ggml_nelements(t); - if (n > 10) { - n = 10; - } - for (int i = 0; i < n; i++) { + for (int i = 0; i < (int) t->ne[0]; i++) { printf("%f ", ggml_fp16_to_fp32(data[i])); } printf("\n"); double sum = 0.0; - for (int i = 0; i < ggml_nelements(t); i++) { - sum += ggml_fp16_to_fp32(data[i]); + printf("nb: %lld %lld %lld %lld\n", t->nb[0], t->nb[1], t->nb[2], t->nb[3]); + for (int64_t i3 = 0; i3 < t->ne[3]; ++i3) { + for (int64_t i2 = 0; i2 < t->ne[2]; ++i2) { + for (int64_t i1 = 0; i1 < t->ne[1]; ++i1) { + for (int64_t i0 = 0; i0 < t->ne[0]; ++i0) { + const size_t offs = i3*t->nb[3] + i2*t->nb[2] + i1*t->nb[1] + i0*t->nb[0]; + const ggml_fp16_t cur = *((ggml_fp16_t *)((char *) data + offs)); + const float curf = ggml_fp16_to_fp32(cur); + if (isinf(curf)) continue; + sum += curf; + } + } + } } printf("sum: %f\n", sum); } else { diff --git a/llama.cpp b/llama.cpp index 6825636c8..2cf5a36fc 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1341,11 +1341,6 @@ static bool llama_eval_internal( struct ggml_tensor * KQ_soft_max = ggml_soft_max_inplace(ctx0, KQ_masked); ggml_set_name(KQ_soft_max, "KQ_soft_max"); - // TODO: TMP !!!! - if (il == 0) { - ggml_set_name(KQ_soft_max, "mtl-check"); - } - // split cached V into n_head heads struct ggml_tensor * V = ggml_view_3d(ctx0, kv_self.v, @@ -1355,6 +1350,11 @@ static bool llama_eval_internal( il*n_ctx*ggml_element_size(kv_self.v)*n_embd); ggml_set_name(V, "V"); + // TODO: TMP !!!! + if (il == 0) { + ggml_set_name(V, "mtl-check"); + } + #if 1 struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ_soft_max); ggml_set_name(KQV, "KQV"); @@ -1479,13 +1479,24 @@ static bool llama_eval_internal( auto print_t_f16 = [&](struct ggml_tensor * t) { ggml_fp16_t * data = (ggml_fp16_t *)t->data; printf("data: "); - for (int i = 0; i < std::min((int) t->ne[0], 10); i++) { + for (int i = 0; i < (int) t->ne[0]; i++) { printf("%f ", ggml_fp16_to_fp32(data[i])); } printf("\n"); double sum = 0.0; - for (int i = 0; i < ggml_nelements(t); i++) { - sum += ggml_fp16_to_fp32(data[i]); + printf("nb: %lld %lld %lld %lld\n", t->nb[0], t->nb[1], t->nb[2], t->nb[3]); + for (int64_t i3 = 0; i3 < t->ne[3]; ++i3) { + for (int64_t i2 = 0; i2 < t->ne[2]; ++i2) { + for (int64_t i1 = 0; i1 < t->ne[1]; ++i1) { + for (int64_t i0 = 0; i0 < t->ne[0]; ++i0) { + const size_t offs = i3*t->nb[3] + i2*t->nb[2] + i1*t->nb[1] + i0*t->nb[0]; + const ggml_fp16_t cur = *((ggml_fp16_t *)((char *) data + offs)); + const float curf = ggml_fp16_to_fp32(cur); + if (isinf(curf)) continue; + sum += curf; + } + } + } } printf("sum: %f\n", sum); };