now printing tensors

2023-11-25 20:06:00 -05:00 · 2023-11-25 20:06:00 -05:00 · ec2b03e504
commit ec2b03e504
parent af698c6f27
6 changed files with 140 additions and 58 deletions
--- a/binding.py
+++ b/binding.py
@ -14,9 +14,9 @@ llvmLibPath = "/usr/lib/llvm-15/lib/"
 cxxClientRoot = "/home/mdupont/experiments/llama.cpp/"
 fileList = [
-#    "ggml.cpp",
+    "ggml.cpp",
 #    "llama.cpp",
-    "examples/server/server.cpp",
+#    "examples/server/server.cpp",
 ]
 typeList = [
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -30,7 +30,8 @@
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
 #endif
-
+#include "ggml-internal.hpp"
 #include "llama-internal.hpp"
 #include "print.hpp"
 static llama_context           ** g_ctx;
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -24,6 +24,7 @@
 #include <thread>
 #include <mutex>
 #include <chrono>
 #include "llama-internal.hpp"
 #include "print.hpp"
 #ifndef SERVER_VERBOSE
--- a/ggml.cpp
+++ b/ggml.cpp
@ -4,6 +4,7 @@
 #include "ggml-impl.h"
 #include "ggml-quants.h"
 #if defined(_MSC_VER) || defined(__MINGW32__)
 #include <malloc.h> // using malloc.h with MSC/MINGW
 #elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
@ -46,6 +47,12 @@ void type_traits_init();
 void GGUF_TYPE_SIZE_init();
 void GGUF_TYPE_NAME_init();
 #include "llama.h"
 struct ggml_allocr;
 //#include "ggml-internal.hpp"
 #include "llama-internal.hpp"
 #include "print.hpp"
 #if defined(_WIN32)
 #include <windows.h>
@ -9412,7 +9419,10 @@ static void ggml_compute_forward_mul_mat(
        const struct ggml_tensor * src0,
        const struct ggml_tensor * src1,
              struct ggml_tensor * dst) {
-
+  print_fields(*params);
  print_fields(*src0);
  print_fields(*src1);
  print_fields(*dst);
    int64_t t0 = ggml_perf_time_us();
    UNUSED(t0);
@ -9456,6 +9466,7 @@ static void ggml_compute_forward_mul_mat(
        if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
            ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
        }
 	print_fields(*dst);
        return;
    }
 #endif
@ -9463,10 +9474,12 @@ static void ggml_compute_forward_mul_mat(
 #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
    if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
        if (params->ith != 0) {
 	  print_fields(*dst);
            return;
        }
        if (params->type == GGML_TASK_INIT) {
            return;
        }
@ -9508,7 +9521,7 @@ static void ggml_compute_forward_mul_mat(
        }
        //printf("CBLAS = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
-
+	print_fields(*dst);
        return;
    }
 #endif
@ -9527,11 +9540,12 @@ static void ggml_compute_forward_mul_mat(
                }
            }
        }
-
+	print_fields(*dst);
        return;
    }
    if (params->type == GGML_TASK_FINALIZE) {
      print_fields(*dst);
        return;
    }
@ -9565,6 +9579,7 @@ static void ggml_compute_forward_mul_mat(
    // threads with no work simply yield (not sure if it helps)
    if (ir010 >= ir011 || ir110 >= ir111) {
        sched_yield();
        return;
    }
@ -9617,6 +9632,7 @@ static void ggml_compute_forward_mul_mat(
            }
        }
    }
    print_fields(*dst);
 }
 // ggml_compute_forward_out_prod
--- a/llama-internal.hpp
+++ b/llama-internal.hpp
@ -1,5 +1,10 @@
 #include <set>
 #include <queue>
 #include <map>
 #include <random>
 #include <functional>
 #include <unordered_map>
 #include <memory>
 enum llm_arch {
    LLM_ARCH_LLAMA,
    LLM_ARCH_FALCON,
@ -451,6 +456,7 @@ struct llama_model {
    }
 };
 struct ggml_allocr;
 struct llama_context {
    llama_context(const llama_model & model) : model(model), t_start_us(model.t_start_us), t_load_us(model.t_load_us) {}
  ~llama_context();
--- a/print.hpp
+++ b/print.hpp
@ -1,8 +1,9 @@
 #include <refl-cpp/refl.hpp>
 #include <iostream>
 #include "llama.h"
-#include "ggml-internal.hpp"
+#include "common/common.h"
-#include "llama-internal.hpp"
+//#include "ggml-internal.hpp"
 //#include "llama-internal.hpp"
 REFL_TYPE(ggml_init_params )
 REFL_END
@ -96,9 +97,10 @@ REFL_END
 REFL_TYPE(llama_sampling_params)
 REFL_END
 #ifdef llm_arch
 REFL_TYPE(llm_arch)
 REFL_END
-
+#endif
 REFL_TYPE(llama_sampling_context )
 REFL_FIELD( params)
 REFL_FIELD( mirostat_mu)
@ -125,10 +127,34 @@ REFL_END
 REFL_TYPE(ggml_tensor)
  REFL_FIELD(type)
  REFL_FIELD(type)
  REFL_FIELD(backend)
  REFL_FIELD(buffer)
  REFL_FIELD(n_dims)
  REFL_FIELD(ne)
  REFL_FIELD(nb)
  REFL_FIELD(op)
  REFL_FIELD(op_params)
  REFL_FIELD(is_param)
  REFL_FIELD(grad)
  REFL_FIELD(src)
  REFL_FIELD(perf_runs)
  REFL_FIELD(perf_cycles)
  REFL_FIELD(perf_time_us)
  REFL_FIELD(view_src)
  REFL_FIELD(view_offs)
  REFL_FIELD(data)
  REFL_FIELD(name)
  REFL_FIELD(extra)
  REFL_FIELD(padding)
 REFL_END
 REFL_TYPE(ggml_cplan)
  REFL_FIELD(work_size)
  REFL_FIELD(work_data)
  REFL_FIELD(n_threads)
  REFL_FIELD(abort_callback)
  REFL_FIELD(abort_callback_data)
 REFL_END
 REFL_TYPE(ggml_hash_set)
@ -137,14 +163,32 @@ REFL_END
 REFL_TYPE(ggml_cgraph)
  REFL_FIELD(size)
  REFL_FIELD(n_nodes)
  REFL_FIELD(n_leafs)
  REFL_FIELD(nodes)
  REFL_FIELD(grads)
  REFL_FIELD(leafs)
  REFL_FIELD(visited_hash_table)
  REFL_FIELD(order)
  REFL_FIELD(perf_runs)
  REFL_FIELD(perf_cycles)
  REFL_FIELD(perf_time_us)
 REFL_END
 REFL_TYPE(ggml_scratch)
  REFL_FIELD(offs)
  REFL_FIELD(size)
  REFL_FIELD(data)
 REFL_END
 REFL_TYPE(ggml_compute_params)
  REFL_FIELD(type)
  REFL_FIELD(ith)
  REFL_FIELD(nth)
  REFL_FIELD(wsize)
  REFL_FIELD(wdata)
 REFL_END
 REFL_TYPE(ggml_opt_params)
@ -162,6 +206,7 @@ REFL_TYPE(ggml_something)
  REFL_FIELD(type_name)
 REFL_END
 #ifdef ggml_context
 REFL_TYPE(ggml_context)
  REFL_FIELD(mem_size)
 REFL_FIELD(mem_buffer)
@ -173,14 +218,17 @@ REFL_FIELD(    objects_begin)
 REFL_FIELD(    objects_end)
 REFL_FIELD(    scratch)
 REFL_FIELD(    scratch_save)
 REFL_END
 #endif
 #ifdef ggml_context_container
 REFL_TYPE(ggml_context_container)
  REFL_FIELD(used)
  REFL_FIELD(context)
 REFL_END
 #endif
 #ifdef ggml_numa_node
 REFL_TYPE(ggml_numa_node)
   REFL_FIELD(cpus)
   REFL_FIELD(n_cpus)
@ -220,6 +268,7 @@ REFL_TYPE(hash_map)
  REFL_FIELD(set)
  REFL_FIELD(vals)
 REFL_END
 REFL_TYPE(ggml_compute_state_shared)
  REFL_FIELD(cgraph)
  REFL_FIELD(cplan)
@ -232,12 +281,15 @@ REFL_TYPE(ggml_lbfgs_iteration_data)
  REFL_FIELD(alpha)
  REFL_FIELD(ys)
 REFL_END
 #endif
 #ifdef gguf_kv
 REFL_TYPE(gguf_kv)
  REFL_FIELD(key)
  REFL_FIELD(type)
 REFL_END
 REFL_TYPE(gguf_header)
  REFL_FIELD(magic)
  REFL_FIELD(version)
@ -257,7 +309,7 @@ REFL_TYPE(gguf_buf)
  REFL_FIELD(data)
  REFL_FIELD(size)
 REFL_END
-
+#endif
 REFL_TYPE(llama_model_params)
  REFL_FIELD(n_gpu_layers)
@ -265,6 +317,7 @@ REFL_END
 REFL_TYPE(llama_context_params)
  REFL_FIELD(seed)
 REFL_END
 REFL_TYPE(llama_model_quantize_params)
  REFL_FIELD(nthread)
 REFL_END
@ -283,12 +336,15 @@ REFL_TYPE(llama_beams_state)
  REFL_FIELD(beam_views)
 REFL_END
 #ifdef ggml_backend
 REFL_TYPE(ggml_backend)
 REFL_END
 #endif
 REFL_TYPE(ggml_backend_buffer)
 REFL_END
 #ifdef ggml_allocr
 REFL_TYPE(ggml_allocr)
 REFL_END
@ -298,13 +354,14 @@ REFL_END
 REFL_TYPE(ggml_gallocr)
 REFL_END
 #endif
 #ifdef llama_buffer
 REFL_TYPE(llama_buffer)
 REFL_FIELD(data)
 REFL_FIELD(size)
 REFL_END
 REFL_TYPE(llama_file)
 REFL_FIELD(fp)
 REFL_FIELD(size)
@ -353,36 +410,33 @@ REFL_TYPE(llama_kv_cache)
   REFL_FIELD(has_shift)
   REFL_FIELD(head)
 REFL_END
 #endif
 #ifdef e_model
 REFL_TYPE(e_model)
 REFL_END
 #endif
 REFL_TYPE(llama_ftype)
 REFL_END
 //#ifdef llama_model
 REFL_TYPE(llama_model)
 REFL_FIELD(type)
 REFL_FIELD(arch)
 REFL_FIELD(ftype )
 REFL_FIELD(name )
 REFL_FIELD(hparams )
 REFL_FIELD(vocab)
 REFL_FIELD(tok_embd)
 REFL_FIELD(pos_embd)
 REFL_FIELD(tok_norm)
 REFL_FIELD(tok_norm_b)
 REFL_FIELD(output_norm)
 REFL_FIELD(output_norm_b)
 REFL_FIELD(output)
 REFL_FIELD(layers)
 REFL_FIELD(n_gpu_layers)
 REFL_FIELD(gguf_kv) //unordered map
 REFL_FIELD(ctx)
 REFL_FIELD(buf)
@ -392,15 +446,18 @@ REFL_FIELD( mlock_mmap)
 REFL_FIELD(tensors_by_name)
 REFL_FIELD(t_load_us)
 REFL_FIELD(t_start_us)
 REFL_END
 //#endif
 #ifdef llama_vocab
 REFL_TYPE(llama_vocab)
  REFL_END
 #endif
 REFL_TYPE(grammar_parser::parse_state)
 REFL_END
 //#ifdef llama_context
 REFL_TYPE(llama_context)
 REFL_FIELD( cparams)
 //REFL_FIELD(model)
@ -422,22 +479,23 @@ REFL_FIELD(  embedding)
 REFL_FIELD(   buf_compute)
 REFL_FIELD( buf_alloc)
 REFL_FIELD( alloc ) 
 #ifdef GGML_USE_METAL
 REFL_FIELD( ctx_metal )
 #endif
 #ifdef GGML_USE_MPI
 REFL_FIELD( ctx_mpi )
 #endif
 REFL_END
 //#endif
 #ifdef llama_model_loader
 REFL_TYPE(llama_model_loader)
  REFL_FIELD(n_kv)
  REFL_FIELD(n_tensors)
 REFL_END
 #endif
 #ifdef llm_build_context
 REFL_TYPE(llm_build_context)
 // REFL_FIELD(model) cannot create pointer to reference member ‘llm_build_context::model’
 //  REFL_FIELD(hparams) cannot create pointer to reference member ‘llm_build_context::hparams’
@ -511,7 +569,6 @@ REFL_FIELD(    n_feed_forward_w2 )
  REFL_FIELD(    i_feed_forward_w2 )
 REFL_FIELD(    n_k_quantized     )
 REFL_FIELD(     n_fallback        )
 REFL_END
 REFL_TYPE(llama_data_context)
@ -524,6 +581,7 @@ REFL_END
 REFL_TYPE(llama_data_file_context)
  REFL_FIELD(file)
 REFL_END
 #endif
 template <typename T>
 constexpr auto get_value_type_name(const T t) noexcept