now printing tensors

2023-11-25 20:06:00 -05:00 · 2023-11-25 20:06:00 -05:00 · ec2b03e504
commit ec2b03e504
parent af698c6f27
6 changed files with 140 additions and 58 deletions
--- a/binding.py
+++ b/binding.py
@ -14,9 +14,9 @@ llvmLibPath = "/usr/lib/llvm-15/lib/"
 cxxClientRoot = "/home/mdupont/experiments/llama.cpp/"

 fileList = [
-#    "ggml.cpp",
+    "ggml.cpp",
 #    "llama.cpp",
-    "examples/server/server.cpp",
+#    "examples/server/server.cpp",
 ]

 typeList = [
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -30,7 +30,8 @@
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
 #endif
-
+#include "ggml-internal.hpp"
+#include "llama-internal.hpp"
 #include "print.hpp"

 static llama_context           ** g_ctx;
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -24,6 +24,7 @@
 #include <thread>
 #include <mutex>
 #include <chrono>
+#include "llama-internal.hpp"
 #include "print.hpp"

 #ifndef SERVER_VERBOSE
--- a/ggml.cpp
+++ b/ggml.cpp
@ -4,6 +4,7 @@
 #include "ggml-impl.h"
 #include "ggml-quants.h"

+
 #if defined(_MSC_VER) || defined(__MINGW32__)
 #include <malloc.h> // using malloc.h with MSC/MINGW
 #elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
@ -46,6 +47,12 @@ void type_traits_init();
 void GGUF_TYPE_SIZE_init();
 void GGUF_TYPE_NAME_init();

+#include "llama.h"
+struct ggml_allocr;
+//#include "ggml-internal.hpp"
+#include "llama-internal.hpp"
+#include "print.hpp"
+
 #if defined(_WIN32)

 #include <windows.h>
@ -9412,7 +9419,10 @@ static void ggml_compute_forward_mul_mat(
        const struct ggml_tensor * src0,
        const struct ggml_tensor * src1,
              struct ggml_tensor * dst) {
-
+  print_fields(*params);
+  print_fields(*src0);
+  print_fields(*src1);
+  print_fields(*dst);
    int64_t t0 = ggml_perf_time_us();
    UNUSED(t0);

@ -9456,6 +9466,7 @@ static void ggml_compute_forward_mul_mat(
        if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
            ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
        }
+	print_fields(*dst);
        return;
    }
 #endif
@ -9463,10 +9474,12 @@ static void ggml_compute_forward_mul_mat(
 #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
    if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
        if (params->ith != 0) {
+	  print_fields(*dst);
            return;
        }

        if (params->type == GGML_TASK_INIT) {
+	  
            return;
        }

@ -9508,7 +9521,7 @@ static void ggml_compute_forward_mul_mat(
        }

        //printf("CBLAS = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
-
+	print_fields(*dst);
        return;
    }
 #endif
@ -9527,11 +9540,12 @@ static void ggml_compute_forward_mul_mat(
                }
            }
        }
-
+	print_fields(*dst);
        return;
    }

    if (params->type == GGML_TASK_FINALIZE) {
+      print_fields(*dst);
        return;
    }

@ -9565,6 +9579,7 @@ static void ggml_compute_forward_mul_mat(
    // threads with no work simply yield (not sure if it helps)
    if (ir010 >= ir011 || ir110 >= ir111) {
        sched_yield();
+	
        return;
    }

@ -9617,6 +9632,7 @@ static void ggml_compute_forward_mul_mat(
            }
        }
    }
+    print_fields(*dst);
 }

 // ggml_compute_forward_out_prod
--- a/llama-internal.hpp
+++ b/llama-internal.hpp
@ -1,5 +1,10 @@
 #include <set>
 #include <queue>
+#include <map>
+#include <random>
+#include <functional>
+#include <unordered_map>
+#include <memory>
 enum llm_arch {
    LLM_ARCH_LLAMA,
    LLM_ARCH_FALCON,
@ -451,6 +456,7 @@ struct llama_model {
    }
 };

+struct ggml_allocr;
 struct llama_context {
    llama_context(const llama_model & model) : model(model), t_start_us(model.t_start_us), t_load_us(model.t_load_us) {}
  ~llama_context();
--- a/print.hpp
+++ b/print.hpp
@ -1,8 +1,9 @@
 #include <refl-cpp/refl.hpp>
 #include <iostream>
 #include "llama.h"
-#include "ggml-internal.hpp"
-#include "llama-internal.hpp"
+#include "common/common.h"
+//#include "ggml-internal.hpp"
+//#include "llama-internal.hpp"

 REFL_TYPE(ggml_init_params )
 REFL_END
@ -96,9 +97,10 @@ REFL_END
 REFL_TYPE(llama_sampling_params)
 REFL_END

+#ifdef llm_arch
 REFL_TYPE(llm_arch)
 REFL_END
-
+#endif
 REFL_TYPE(llama_sampling_context )
 REFL_FIELD( params)
 REFL_FIELD( mirostat_mu)
@ -125,10 +127,34 @@ REFL_END

 REFL_TYPE(ggml_tensor)
  REFL_FIELD(type)
+  REFL_FIELD(type)
+  REFL_FIELD(backend)
+  REFL_FIELD(buffer)
+  REFL_FIELD(n_dims)
+  REFL_FIELD(ne)
+  REFL_FIELD(nb)
+  REFL_FIELD(op)
+  REFL_FIELD(op_params)
+  REFL_FIELD(is_param)
+  REFL_FIELD(grad)
+  REFL_FIELD(src)
+  REFL_FIELD(perf_runs)
+  REFL_FIELD(perf_cycles)
+  REFL_FIELD(perf_time_us)
+  REFL_FIELD(view_src)
+  REFL_FIELD(view_offs)
+  REFL_FIELD(data)
+  REFL_FIELD(name)
+  REFL_FIELD(extra)
+  REFL_FIELD(padding)
 REFL_END

 REFL_TYPE(ggml_cplan)
  REFL_FIELD(work_size)
+  REFL_FIELD(work_data)
+  REFL_FIELD(n_threads)
+  REFL_FIELD(abort_callback)
+  REFL_FIELD(abort_callback_data)
 REFL_END

 REFL_TYPE(ggml_hash_set)
@ -137,14 +163,32 @@ REFL_END

 REFL_TYPE(ggml_cgraph)
  REFL_FIELD(size)
+  REFL_FIELD(n_nodes)
+  REFL_FIELD(n_leafs)
+  REFL_FIELD(nodes)
+  REFL_FIELD(grads)
+  REFL_FIELD(leafs)
+  REFL_FIELD(visited_hash_table)
+  REFL_FIELD(order)
+  REFL_FIELD(perf_runs)
+  REFL_FIELD(perf_cycles)
+  REFL_FIELD(perf_time_us)
 REFL_END

 REFL_TYPE(ggml_scratch)
  REFL_FIELD(offs)
+  REFL_FIELD(size)
+  REFL_FIELD(data)
+
 REFL_END

 REFL_TYPE(ggml_compute_params)
  REFL_FIELD(type)
+  REFL_FIELD(ith)
+  REFL_FIELD(nth)
+  REFL_FIELD(wsize)
+  REFL_FIELD(wdata)
+
 REFL_END

 REFL_TYPE(ggml_opt_params)
@ -162,6 +206,7 @@ REFL_TYPE(ggml_something)
  REFL_FIELD(type_name)
 REFL_END

+#ifdef ggml_context
 REFL_TYPE(ggml_context)
  REFL_FIELD(mem_size)
 REFL_FIELD(mem_buffer)
@ -173,14 +218,17 @@ REFL_FIELD(    objects_begin)
 REFL_FIELD(    objects_end)
 REFL_FIELD(    scratch)
 REFL_FIELD(    scratch_save)
-
 REFL_END
+#endif

+#ifdef ggml_context_container
 REFL_TYPE(ggml_context_container)
  REFL_FIELD(used)
  REFL_FIELD(context)
 REFL_END
+#endif

+#ifdef ggml_numa_node
 REFL_TYPE(ggml_numa_node)
   REFL_FIELD(cpus)
   REFL_FIELD(n_cpus)
@ -220,6 +268,7 @@ REFL_TYPE(hash_map)
  REFL_FIELD(set)
  REFL_FIELD(vals)
 REFL_END
+
 REFL_TYPE(ggml_compute_state_shared)
  REFL_FIELD(cgraph)
  REFL_FIELD(cplan)
@ -232,12 +281,15 @@ REFL_TYPE(ggml_lbfgs_iteration_data)
  REFL_FIELD(alpha)
  REFL_FIELD(ys)
 REFL_END
+#endif

+#ifdef gguf_kv
 REFL_TYPE(gguf_kv)
  REFL_FIELD(key)
  REFL_FIELD(type)
 REFL_END

+
 REFL_TYPE(gguf_header)
  REFL_FIELD(magic)
  REFL_FIELD(version)
@ -257,7 +309,7 @@ REFL_TYPE(gguf_buf)
  REFL_FIELD(data)
  REFL_FIELD(size)
 REFL_END
-
+#endif

 REFL_TYPE(llama_model_params)
  REFL_FIELD(n_gpu_layers)
@ -265,6 +317,7 @@ REFL_END
 REFL_TYPE(llama_context_params)
  REFL_FIELD(seed)
 REFL_END
+
 REFL_TYPE(llama_model_quantize_params)
  REFL_FIELD(nthread)
 REFL_END
@ -283,12 +336,15 @@ REFL_TYPE(llama_beams_state)
  REFL_FIELD(beam_views)
 REFL_END

+#ifdef ggml_backend
 REFL_TYPE(ggml_backend)
 REFL_END
+#endif

 REFL_TYPE(ggml_backend_buffer)
 REFL_END

+#ifdef ggml_allocr
 REFL_TYPE(ggml_allocr)
 REFL_END

@ -298,13 +354,14 @@ REFL_END
 REFL_TYPE(ggml_gallocr)
 REFL_END

+#endif

+#ifdef llama_buffer
 REFL_TYPE(llama_buffer)
 REFL_FIELD(data)
 REFL_FIELD(size)
 REFL_END
  
-
 REFL_TYPE(llama_file)
 REFL_FIELD(fp)
 REFL_FIELD(size)
@ -353,91 +410,92 @@ REFL_TYPE(llama_kv_cache)
   REFL_FIELD(has_shift)
   REFL_FIELD(head)
 REFL_END
+#endif

+#ifdef e_model
 REFL_TYPE(e_model)
 REFL_END
+#endif

 REFL_TYPE(llama_ftype)
 REFL_END

+//#ifdef llama_model
 REFL_TYPE(llama_model)
-  REFL_FIELD(type)
-  REFL_FIELD(arch)
+REFL_FIELD(type)
+REFL_FIELD(arch)
 REFL_FIELD(ftype )
-
-REFL_FIELD(  name )
-
-  REFL_FIELD(   hparams )
-REFL_FIELD(    vocab)
-
-REFL_FIELD(   tok_embd)
-REFL_FIELD(   pos_embd)
-REFL_FIELD(   tok_norm)
-REFL_FIELD(   tok_norm_b)
-
-REFL_FIELD(   output_norm)
-REFL_FIELD(  output_norm_b)
-REFL_FIELD(  output)
-
-REFL_FIELD(  layers)
-
-REFL_FIELD(  n_gpu_layers)
-
-  REFL_FIELD(  gguf_kv) //unordered map
-  REFL_FIELD( ctx)
-  REFL_FIELD( buf)
- REFL_FIELD( mapping) //std::unique_ptr 
-REFL_FIELD( mlock_buf)
-REFL_FIELD( mlock_mmap)
-REFL_FIELD( tensors_by_name)
-  REFL_FIELD( t_load_us)
-REFL_FIELD( t_start_us)
-
+REFL_FIELD(name )
+REFL_FIELD(hparams )
+REFL_FIELD(vocab)
+REFL_FIELD(tok_embd)
+REFL_FIELD(pos_embd)
+REFL_FIELD(tok_norm)
+REFL_FIELD(tok_norm_b)
+REFL_FIELD(output_norm)
+REFL_FIELD(output_norm_b)
+REFL_FIELD(output)
+REFL_FIELD(layers)
+REFL_FIELD(n_gpu_layers)
+REFL_FIELD(gguf_kv) //unordered map
+REFL_FIELD(ctx)
+REFL_FIELD(buf)
+REFL_FIELD(mapping) //std::unique_ptr 
+REFL_FIELD(mlock_buf)
+REFL_FIELD(mlock_mmap)
+REFL_FIELD(tensors_by_name)
+REFL_FIELD(t_load_us)
+REFL_FIELD(t_start_us)
 REFL_END
+//#endif

+#ifdef llama_vocab
 REFL_TYPE(llama_vocab)
  REFL_END
+#endif

-  REFL_TYPE(grammar_parser::parse_state)
-  REFL_END
+REFL_TYPE(grammar_parser::parse_state)
+REFL_END

+//#ifdef llama_context
 REFL_TYPE(llama_context)
 REFL_FIELD( cparams)
 //REFL_FIELD(model)
 REFL_FIELD(kv_self)
- REFL_FIELD(rng) //random numbers
+REFL_FIELD(rng) //random numbers
 REFL_FIELD(has_evaluated_once )
 REFL_FIELD( t_start_us)
 REFL_FIELD( t_load_us)
-  REFL_FIELD( t_sample_us )
+REFL_FIELD( t_sample_us )
 REFL_FIELD( t_p_eval_us )
-  REFL_FIELD( t_eval_us)
+REFL_FIELD( t_eval_us)
 REFL_FIELD( n_sample )
 REFL_FIELD( n_p_eval )
-  REFL_FIELD( n_eval  )
+REFL_FIELD( n_eval  )
 //REFL_FIELD(  logits) crash
 REFL_FIELD(  logits_all )
 REFL_FIELD(  embedding)
 //REFL_FIELD(   work_buffer)
-  REFL_FIELD(   buf_compute)
-  REFL_FIELD( buf_alloc)
+REFL_FIELD(   buf_compute)
+REFL_FIELD( buf_alloc)
 REFL_FIELD( alloc ) 
-
 #ifdef GGML_USE_METAL
 REFL_FIELD( ctx_metal )
 #endif
-
 #ifdef GGML_USE_MPI
 REFL_FIELD( ctx_mpi )
-
 #endif
 REFL_END
+//#endif

+#ifdef llama_model_loader
 REFL_TYPE(llama_model_loader)
  REFL_FIELD(n_kv)
  REFL_FIELD(n_tensors)
 REFL_END
+#endif

+#ifdef llm_build_context
 REFL_TYPE(llm_build_context)
 // REFL_FIELD(model) cannot create pointer to reference member ‘llm_build_context::model’
 //  REFL_FIELD(hparams) cannot create pointer to reference member ‘llm_build_context::hparams’
@ -511,7 +569,6 @@ REFL_FIELD(    n_feed_forward_w2 )
  REFL_FIELD(    i_feed_forward_w2 )
 REFL_FIELD(    n_k_quantized     )
 REFL_FIELD(     n_fallback        )
-
 REFL_END

 REFL_TYPE(llama_data_context)
@ -524,6 +581,7 @@ REFL_END
 REFL_TYPE(llama_data_file_context)
  REFL_FIELD(file)
 REFL_END
+#endif

 template <typename T>
 constexpr auto get_value_type_name(const T t) noexcept