From e26cd6b483c195a45c64e2f25c315269ada827a2 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Sun, 4 Jun 2023 11:23:36 +0300
Subject: [PATCH] mtl : remove temp / debug code

---
 examples/mtl/mtl.cpp | 34 ++----------------------
 llama.cpp            | 63 +-------------------------------------------
 2 files changed, 3 insertions(+), 94 deletions(-)
diff --git a/examples/mtl/mtl.cpp b/examples/mtl/mtl.cpp
index e527f2856..56510904c 100644
--- a/examples/mtl/mtl.cpp
+++ b/examples/mtl/mtl.cpp
@@ -5,8 +5,6 @@
 #include <cstring>
 #include <cstdlib>
 
-#include <vector> // tmp
-
 int main(int argc, char ** argv) {
     ggml_time_init();
 
@@ -24,44 +22,16 @@ int main(int argc, char ** argv) {
     struct ggml_cgraph gf = ggml_graph_import(fname_cgraph, &ctx_data, &ctx_eval);
     gf.n_threads = 1;
 
-    int32_t n_vocab = 0;
-
-    {
-        struct ggml_tensor * t_vocab = ggml_graph_get_tensor(&gf, "vocab");
-        if (t_vocab == NULL) {
-            fprintf(stderr, "%s: vocab tensor not found\n", __func__);
-            return -1;
-        }
-
-        const char * ptr = (const char *) t_vocab->data;
-
-        memcpy(&n_vocab, ptr, sizeof(n_vocab)); ptr += sizeof(n_vocab);
-
-        printf("%s: n_vocab = %d\n", __func__, n_vocab);
-
-        for (int i = 0; i < 512; ++i) {
-            char text[32];
-            float score;
-
-            memcpy(text,   ptr, sizeof(text));  ptr += sizeof(text);
-            memcpy(&score, ptr, sizeof(score)); ptr += sizeof(score);
-
-            printf("%s: token[%4d] = %16.*s, score = %6.2f\n", __func__, i, (int) sizeof(text), text, score);
-        }
-    }
-
     // this allocates all Metal resources and memory buffers
     auto * ctx_mtl = ggml_mtl_init();
 
     ggml_mtl_add_buffer(ctx_mtl, "data", ggml_get_mem_buffer(ctx_data), ggml_get_mem_size(ctx_data));
     ggml_mtl_add_buffer(ctx_mtl, "eval", ggml_get_mem_buffer(ctx_eval), ggml_get_mem_size(ctx_eval));
 
-    // TODO: tmp to match the input used when creating the cgraph
+    // main
     {
-        const std::vector<int> tmp(1, 1); // BOS
-
         struct ggml_tensor * input = ggml_graph_get_tensor(&gf, "embd");
-        memcpy(input->data, tmp.data(), tmp.size() * sizeof(int));
+        *(int32_t *) input->data = 1; // BOS
 
         ggml_mtl_set_tensor(ctx_mtl, input);
 
diff --git a/llama.cpp b/llama.cpp
index 26722e091..455402a4e 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1459,68 +1459,7 @@ static bool llama_eval_internal(
 #endif
 
     if (cgraph_fname) {
-        // TODO: tmp add the vocabulary as a leaf to the computation graph, until better approach is found
-        {
-            char tmp[32]; // max token length
-
-            // store null-terminated string for simplicity
-            std::vector<uint8_t> buf_vocab(sizeof(int32_t) + n_vocab*(32 + sizeof(float)));
-
-            uint64_t offs = 0;
-
-            {
-                const int32_t n = n_vocab;
-                memcpy(&buf_vocab[offs], &n, sizeof(n)); offs += sizeof(n);
-            }
-
-            for (int i = 0; i < n_vocab; i++) {
-                const int32_t id = i;
-
-                const float score = lctx.vocab.id_to_token[id].score;
-                const std::string text = lctx.vocab.id_to_token[id].tok;
-
-                snprintf(tmp, sizeof(tmp), "%s", text.c_str());
-
-                memcpy(&buf_vocab[offs], tmp, 32); offs += 32;
-                memcpy(&buf_vocab[offs], &score, sizeof(score)); offs += sizeof(score);
-            }
-
-            struct ggml_init_params params;
-            params.mem_size   = ggml_tensor_overhead();
-            params.mem_buffer = NULL;
-            params.no_alloc   = true;
-
-            ggml_context * ctx_vocab = ggml_init(params);
-
-            struct ggml_tensor * t_vocab = ggml_new_tensor_1d(ctx_vocab, GGML_TYPE_I8, buf_vocab.size());
-            t_vocab->data = buf_vocab.data();
-            ggml_set_name(t_vocab, "vocab");
-
-            gf.leafs[gf.n_leafs++] = t_vocab;
-
-            ggml_graph_export(&gf, cgraph_fname);
-
-            ggml_free(ctx_vocab);
-        }
-
-        float * logits = (float *) ggml_get_data(cur);
-
-        printf("logits: ");
-        for (int i = 0; i < 10; i++) {
-            printf("%8.4f ", logits[i]);
-        }
-        printf("\n");
-        double sum = 0.0;
-        int imax = 0;
-        double vmax = -INFINITY;
-        for (int i = 0; i < 32000; i++) {
-            sum += (double) logits[i];
-            if (logits[i] > vmax) {
-                vmax = logits[i];
-                imax = i;
-            }
-        }
-        printf("sum: %f, imax = %d, vmax = %f\n", sum, imax, vmax);
+        ggml_graph_export(&gf, cgraph_fname);
     }
 
 #ifdef GGML_PERF