From ec2b03e5049c2e64dd7ff949414ba9c3f984ca0c Mon Sep 17 00:00:00 2001
From: mike dupont <mike.dupont@introspector.local>
Date: Sat, 25 Nov 2023 20:06:00 -0500
Subject: [PATCH] now printing tensors

---
 binding.py                 |   4 +-
 examples/main/main.cpp     |   3 +-
 examples/server/server.cpp |   1 +
 ggml.cpp                   |  22 ++++-
 llama-internal.hpp         |   6 ++
 print.hpp                  | 162 +++++++++++++++++++++++++------------
 6 files changed, 140 insertions(+), 58 deletions(-)
diff --git a/binding.py b/binding.py
index 217dce684..dcf4c37ef 100644
--- a/binding.py
+++ b/binding.py
@@ -14,9 +14,9 @@ llvmLibPath = "/usr/lib/llvm-15/lib/"
 cxxClientRoot = "/home/mdupont/experiments/llama.cpp/"
 
 fileList = [
-#    "ggml.cpp",
+    "ggml.cpp",
 #    "llama.cpp",
-    "examples/server/server.cpp",
+#    "examples/server/server.cpp",
 ]
 
 typeList = [
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 18d2d03c0..d3c6cb4f5 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -30,7 +30,8 @@
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
 #endif
-
+#include "ggml-internal.hpp"
+#include "llama-internal.hpp"
 #include "print.hpp"
 
 static llama_context           ** g_ctx;
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index aace61c57..b202bafe0 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -24,6 +24,7 @@
 #include <thread>
 #include <mutex>
 #include <chrono>
+#include "llama-internal.hpp"
 #include "print.hpp"
 
 #ifndef SERVER_VERBOSE
diff --git a/ggml.cpp b/ggml.cpp
index f1a0e5358..804689931 100644
--- a/ggml.cpp
+++ b/ggml.cpp
@@ -4,6 +4,7 @@
 #include "ggml-impl.h"
 #include "ggml-quants.h"
 
+
 #if defined(_MSC_VER) || defined(__MINGW32__)
 #include <malloc.h> // using malloc.h with MSC/MINGW
 #elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
@@ -46,6 +47,12 @@ void type_traits_init();
 void GGUF_TYPE_SIZE_init();
 void GGUF_TYPE_NAME_init();
 
+#include "llama.h"
+struct ggml_allocr;
+//#include "ggml-internal.hpp"
+#include "llama-internal.hpp"
+#include "print.hpp"
+
 #if defined(_WIN32)
 
 #include <windows.h>
@@ -9412,7 +9419,10 @@ static void ggml_compute_forward_mul_mat(
         const struct ggml_tensor * src0,
         const struct ggml_tensor * src1,
               struct ggml_tensor * dst) {
-
+  print_fields(*params);
+  print_fields(*src0);
+  print_fields(*src1);
+  print_fields(*dst);
     int64_t t0 = ggml_perf_time_us();
     UNUSED(t0);
 
@@ -9456,6 +9466,7 @@ static void ggml_compute_forward_mul_mat(
         if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
             ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
         }
+	print_fields(*dst);
         return;
     }
 #endif
@@ -9463,10 +9474,12 @@ static void ggml_compute_forward_mul_mat(
 #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
     if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
         if (params->ith != 0) {
+	  print_fields(*dst);
             return;
         }
 
         if (params->type == GGML_TASK_INIT) {
+	  
             return;
         }
 
@@ -9508,7 +9521,7 @@ static void ggml_compute_forward_mul_mat(
         }
 
         //printf("CBLAS = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
-
+	print_fields(*dst);
         return;
     }
 #endif
@@ -9527,11 +9540,12 @@ static void ggml_compute_forward_mul_mat(
                 }
             }
         }
-
+	print_fields(*dst);
         return;
     }
 
     if (params->type == GGML_TASK_FINALIZE) {
+      print_fields(*dst);
         return;
     }
 
@@ -9565,6 +9579,7 @@ static void ggml_compute_forward_mul_mat(
     // threads with no work simply yield (not sure if it helps)
     if (ir010 >= ir011 || ir110 >= ir111) {
         sched_yield();
+	
         return;
     }
 
@@ -9617,6 +9632,7 @@ static void ggml_compute_forward_mul_mat(
             }
         }
     }
+    print_fields(*dst);
 }
 
 // ggml_compute_forward_out_prod
diff --git a/llama-internal.hpp b/llama-internal.hpp
index 33cf39e5d..4c054677f 100644
--- a/llama-internal.hpp
+++ b/llama-internal.hpp
@@ -1,5 +1,10 @@
 #include <set>
 #include <queue>
+#include <map>
+#include <random>
+#include <functional>
+#include <unordered_map>
+#include <memory>
 enum llm_arch {
     LLM_ARCH_LLAMA,
     LLM_ARCH_FALCON,
@@ -451,6 +456,7 @@ struct llama_model {
     }
 };
 
+struct ggml_allocr;
 struct llama_context {
     llama_context(const llama_model & model) : model(model), t_start_us(model.t_start_us), t_load_us(model.t_load_us) {}
   ~llama_context();
diff --git a/print.hpp b/print.hpp
index 94688a1dc..2ab13c21b 100644
--- a/print.hpp
+++ b/print.hpp
@@ -1,8 +1,9 @@
 #include <refl-cpp/refl.hpp>
 #include <iostream>
 #include "llama.h"
-#include "ggml-internal.hpp"
-#include "llama-internal.hpp"
+#include "common/common.h"
+//#include "ggml-internal.hpp"
+//#include "llama-internal.hpp"
 
 REFL_TYPE(ggml_init_params )
 REFL_END
@@ -96,9 +97,10 @@ REFL_END
 REFL_TYPE(llama_sampling_params)
 REFL_END
 
+#ifdef llm_arch
 REFL_TYPE(llm_arch)
 REFL_END
-
+#endif
 REFL_TYPE(llama_sampling_context )
 REFL_FIELD( params)
 REFL_FIELD( mirostat_mu)
@@ -125,10 +127,34 @@ REFL_END
 
 REFL_TYPE(ggml_tensor)
   REFL_FIELD(type)
+  REFL_FIELD(type)
+  REFL_FIELD(backend)
+  REFL_FIELD(buffer)
+  REFL_FIELD(n_dims)
+  REFL_FIELD(ne)
+  REFL_FIELD(nb)
+  REFL_FIELD(op)
+  REFL_FIELD(op_params)
+  REFL_FIELD(is_param)
+  REFL_FIELD(grad)
+  REFL_FIELD(src)
+  REFL_FIELD(perf_runs)
+  REFL_FIELD(perf_cycles)
+  REFL_FIELD(perf_time_us)
+  REFL_FIELD(view_src)
+  REFL_FIELD(view_offs)
+  REFL_FIELD(data)
+  REFL_FIELD(name)
+  REFL_FIELD(extra)
+  REFL_FIELD(padding)
 REFL_END
 
 REFL_TYPE(ggml_cplan)
   REFL_FIELD(work_size)
+  REFL_FIELD(work_data)
+  REFL_FIELD(n_threads)
+  REFL_FIELD(abort_callback)
+  REFL_FIELD(abort_callback_data)
 REFL_END
 
 REFL_TYPE(ggml_hash_set)
@@ -137,14 +163,32 @@ REFL_END
 
 REFL_TYPE(ggml_cgraph)
   REFL_FIELD(size)
+  REFL_FIELD(n_nodes)
+  REFL_FIELD(n_leafs)
+  REFL_FIELD(nodes)
+  REFL_FIELD(grads)
+  REFL_FIELD(leafs)
+  REFL_FIELD(visited_hash_table)
+  REFL_FIELD(order)
+  REFL_FIELD(perf_runs)
+  REFL_FIELD(perf_cycles)
+  REFL_FIELD(perf_time_us)
 REFL_END
 
 REFL_TYPE(ggml_scratch)
   REFL_FIELD(offs)
+  REFL_FIELD(size)
+  REFL_FIELD(data)
+
 REFL_END
 
 REFL_TYPE(ggml_compute_params)
   REFL_FIELD(type)
+  REFL_FIELD(ith)
+  REFL_FIELD(nth)
+  REFL_FIELD(wsize)
+  REFL_FIELD(wdata)
+
 REFL_END
 
 REFL_TYPE(ggml_opt_params)
@@ -162,6 +206,7 @@ REFL_TYPE(ggml_something)
   REFL_FIELD(type_name)
 REFL_END
 
+#ifdef ggml_context
 REFL_TYPE(ggml_context)
   REFL_FIELD(mem_size)
 REFL_FIELD(mem_buffer)
@@ -173,14 +218,17 @@ REFL_FIELD(    objects_begin)
 REFL_FIELD(    objects_end)
 REFL_FIELD(    scratch)
 REFL_FIELD(    scratch_save)
-
 REFL_END
+#endif
 
+#ifdef ggml_context_container
 REFL_TYPE(ggml_context_container)
   REFL_FIELD(used)
   REFL_FIELD(context)
 REFL_END
+#endif
 
+#ifdef ggml_numa_node
  REFL_TYPE(ggml_numa_node)
    REFL_FIELD(cpus)
    REFL_FIELD(n_cpus)
@@ -220,6 +268,7 @@ REFL_TYPE(hash_map)
   REFL_FIELD(set)
   REFL_FIELD(vals)
 REFL_END
+
 REFL_TYPE(ggml_compute_state_shared)
   REFL_FIELD(cgraph)
   REFL_FIELD(cplan)
@@ -232,12 +281,15 @@ REFL_TYPE(ggml_lbfgs_iteration_data)
   REFL_FIELD(alpha)
   REFL_FIELD(ys)
 REFL_END
+#endif
 
+#ifdef gguf_kv
 REFL_TYPE(gguf_kv)
   REFL_FIELD(key)
   REFL_FIELD(type)
 REFL_END
 
+
 REFL_TYPE(gguf_header)
   REFL_FIELD(magic)
   REFL_FIELD(version)
@@ -257,7 +309,7 @@ REFL_TYPE(gguf_buf)
   REFL_FIELD(data)
   REFL_FIELD(size)
 REFL_END
-
+#endif
 
 REFL_TYPE(llama_model_params)
   REFL_FIELD(n_gpu_layers)
@@ -265,6 +317,7 @@ REFL_END
 REFL_TYPE(llama_context_params)
   REFL_FIELD(seed)
 REFL_END
+
 REFL_TYPE(llama_model_quantize_params)
   REFL_FIELD(nthread)
 REFL_END
@@ -282,13 +335,16 @@ REFL_END
 REFL_TYPE(llama_beams_state)
   REFL_FIELD(beam_views)
 REFL_END
-  
+
+#ifdef ggml_backend
 REFL_TYPE(ggml_backend)
 REFL_END
+#endif
 
 REFL_TYPE(ggml_backend_buffer)
 REFL_END
 
+#ifdef ggml_allocr
 REFL_TYPE(ggml_allocr)
 REFL_END
 
@@ -298,13 +354,14 @@ REFL_END
 REFL_TYPE(ggml_gallocr)
 REFL_END
 
+#endif
 
+#ifdef llama_buffer
 REFL_TYPE(llama_buffer)
 REFL_FIELD(data)
 REFL_FIELD(size)
 REFL_END
   
-
 REFL_TYPE(llama_file)
 REFL_FIELD(fp)
 REFL_FIELD(size)
@@ -353,91 +410,92 @@ REFL_TYPE(llama_kv_cache)
    REFL_FIELD(has_shift)
    REFL_FIELD(head)
  REFL_END
+#endif
 
+#ifdef e_model
 REFL_TYPE(e_model)
 REFL_END
+#endif
 
 REFL_TYPE(llama_ftype)
 REFL_END
 
+//#ifdef llama_model
 REFL_TYPE(llama_model)
-  REFL_FIELD(type)
-  REFL_FIELD(arch)
+REFL_FIELD(type)
+REFL_FIELD(arch)
 REFL_FIELD(ftype )
-
-REFL_FIELD(  name )
-
-  REFL_FIELD(   hparams )
-REFL_FIELD(    vocab)
-
-REFL_FIELD(   tok_embd)
-REFL_FIELD(   pos_embd)
-REFL_FIELD(   tok_norm)
-REFL_FIELD(   tok_norm_b)
-
-REFL_FIELD(   output_norm)
-REFL_FIELD(  output_norm_b)
-REFL_FIELD(  output)
-
-REFL_FIELD(  layers)
-
-REFL_FIELD(  n_gpu_layers)
-
-  REFL_FIELD(  gguf_kv) //unordered map
-  REFL_FIELD( ctx)
-  REFL_FIELD( buf)
- REFL_FIELD( mapping) //std::unique_ptr 
-REFL_FIELD( mlock_buf)
-REFL_FIELD( mlock_mmap)
-REFL_FIELD( tensors_by_name)
-  REFL_FIELD( t_load_us)
-REFL_FIELD( t_start_us)
-
+REFL_FIELD(name )
+REFL_FIELD(hparams )
+REFL_FIELD(vocab)
+REFL_FIELD(tok_embd)
+REFL_FIELD(pos_embd)
+REFL_FIELD(tok_norm)
+REFL_FIELD(tok_norm_b)
+REFL_FIELD(output_norm)
+REFL_FIELD(output_norm_b)
+REFL_FIELD(output)
+REFL_FIELD(layers)
+REFL_FIELD(n_gpu_layers)
+REFL_FIELD(gguf_kv) //unordered map
+REFL_FIELD(ctx)
+REFL_FIELD(buf)
+REFL_FIELD(mapping) //std::unique_ptr 
+REFL_FIELD(mlock_buf)
+REFL_FIELD(mlock_mmap)
+REFL_FIELD(tensors_by_name)
+REFL_FIELD(t_load_us)
+REFL_FIELD(t_start_us)
 REFL_END
+//#endif
 
+#ifdef llama_vocab
 REFL_TYPE(llama_vocab)
   REFL_END
-  
-  REFL_TYPE(grammar_parser::parse_state)
-  REFL_END
-  
+#endif
+
+REFL_TYPE(grammar_parser::parse_state)
+REFL_END
+
+//#ifdef llama_context
 REFL_TYPE(llama_context)
 REFL_FIELD( cparams)
 //REFL_FIELD(model)
 REFL_FIELD(kv_self)
- REFL_FIELD(rng) //random numbers
+REFL_FIELD(rng) //random numbers
 REFL_FIELD(has_evaluated_once )
 REFL_FIELD( t_start_us)
 REFL_FIELD( t_load_us)
-  REFL_FIELD( t_sample_us )
+REFL_FIELD( t_sample_us )
 REFL_FIELD( t_p_eval_us )
-  REFL_FIELD( t_eval_us)
+REFL_FIELD( t_eval_us)
 REFL_FIELD( n_sample )
 REFL_FIELD( n_p_eval )
-  REFL_FIELD( n_eval  )
+REFL_FIELD( n_eval  )
 //REFL_FIELD(  logits) crash
 REFL_FIELD(  logits_all )
 REFL_FIELD(  embedding)
 //REFL_FIELD(   work_buffer)
-  REFL_FIELD(   buf_compute)
-  REFL_FIELD( buf_alloc)
+REFL_FIELD(   buf_compute)
+REFL_FIELD( buf_alloc)
 REFL_FIELD( alloc ) 
-
 #ifdef GGML_USE_METAL
 REFL_FIELD( ctx_metal )
 #endif
-
 #ifdef GGML_USE_MPI
 REFL_FIELD( ctx_mpi )
-
 #endif
 REFL_END
+//#endif
 
+#ifdef llama_model_loader
 REFL_TYPE(llama_model_loader)
   REFL_FIELD(n_kv)
   REFL_FIELD(n_tensors)
 REFL_END
+#endif
 
+#ifdef llm_build_context
 REFL_TYPE(llm_build_context)
 // REFL_FIELD(model) cannot create pointer to reference member ‘llm_build_context::model’
 //  REFL_FIELD(hparams) cannot create pointer to reference member ‘llm_build_context::hparams’
@@ -511,7 +569,6 @@ REFL_FIELD(    n_feed_forward_w2 )
   REFL_FIELD(    i_feed_forward_w2 )
 REFL_FIELD(    n_k_quantized     )
 REFL_FIELD(     n_fallback        )
-
 REFL_END
 
 REFL_TYPE(llama_data_context)
@@ -524,6 +581,7 @@ REFL_END
 REFL_TYPE(llama_data_file_context)
   REFL_FIELD(file)
 REFL_END
+#endif
 
 template <typename T>
 constexpr auto get_value_type_name(const T t) noexcept