From ec2b03e5049c2e64dd7ff949414ba9c3f984ca0c Mon Sep 17 00:00:00 2001 From: mike dupont Date: Sat, 25 Nov 2023 20:06:00 -0500 Subject: [PATCH] now printing tensors --- binding.py | 4 +- examples/main/main.cpp | 3 +- examples/server/server.cpp | 1 + ggml.cpp | 22 ++++- llama-internal.hpp | 6 ++ print.hpp | 162 +++++++++++++++++++++++++------------ 6 files changed, 140 insertions(+), 58 deletions(-) diff --git a/binding.py b/binding.py index 217dce684..dcf4c37ef 100644 --- a/binding.py +++ b/binding.py @@ -14,9 +14,9 @@ llvmLibPath = "/usr/lib/llvm-15/lib/" cxxClientRoot = "/home/mdupont/experiments/llama.cpp/" fileList = [ -# "ggml.cpp", + "ggml.cpp", # "llama.cpp", - "examples/server/server.cpp", +# "examples/server/server.cpp", ] typeList = [ diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 18d2d03c0..d3c6cb4f5 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -30,7 +30,8 @@ #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data #endif - +#include "ggml-internal.hpp" +#include "llama-internal.hpp" #include "print.hpp" static llama_context ** g_ctx; diff --git a/examples/server/server.cpp b/examples/server/server.cpp index aace61c57..b202bafe0 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -24,6 +24,7 @@ #include #include #include +#include "llama-internal.hpp" #include "print.hpp" #ifndef SERVER_VERBOSE diff --git a/ggml.cpp b/ggml.cpp index f1a0e5358..804689931 100644 --- a/ggml.cpp +++ b/ggml.cpp @@ -4,6 +4,7 @@ #include "ggml-impl.h" #include "ggml-quants.h" + #if defined(_MSC_VER) || defined(__MINGW32__) #include // using malloc.h with MSC/MINGW #elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) @@ -46,6 +47,12 @@ void type_traits_init(); void GGUF_TYPE_SIZE_init(); void GGUF_TYPE_NAME_init(); +#include "llama.h" +struct ggml_allocr; +//#include "ggml-internal.hpp" +#include "llama-internal.hpp" +#include "print.hpp" + #if defined(_WIN32) #include @@ -9412,7 +9419,10 @@ static void ggml_compute_forward_mul_mat( const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { - + print_fields(*params); + print_fields(*src0); + print_fields(*src1); + print_fields(*dst); int64_t t0 = ggml_perf_time_us(); UNUSED(t0); @@ -9456,6 +9466,7 @@ static void ggml_compute_forward_mul_mat( if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) { ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize); } + print_fields(*dst); return; } #endif @@ -9463,10 +9474,12 @@ static void ggml_compute_forward_mul_mat( #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) { if (params->ith != 0) { + print_fields(*dst); return; } if (params->type == GGML_TASK_INIT) { + return; } @@ -9508,7 +9521,7 @@ static void ggml_compute_forward_mul_mat( } //printf("CBLAS = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3); - + print_fields(*dst); return; } #endif @@ -9527,11 +9540,12 @@ static void ggml_compute_forward_mul_mat( } } } - + print_fields(*dst); return; } if (params->type == GGML_TASK_FINALIZE) { + print_fields(*dst); return; } @@ -9565,6 +9579,7 @@ static void ggml_compute_forward_mul_mat( // threads with no work simply yield (not sure if it helps) if (ir010 >= ir011 || ir110 >= ir111) { sched_yield(); + return; } @@ -9617,6 +9632,7 @@ static void ggml_compute_forward_mul_mat( } } } + print_fields(*dst); } // ggml_compute_forward_out_prod diff --git a/llama-internal.hpp b/llama-internal.hpp index 33cf39e5d..4c054677f 100644 --- a/llama-internal.hpp +++ b/llama-internal.hpp @@ -1,5 +1,10 @@ #include #include +#include +#include +#include +#include +#include enum llm_arch { LLM_ARCH_LLAMA, LLM_ARCH_FALCON, @@ -451,6 +456,7 @@ struct llama_model { } }; +struct ggml_allocr; struct llama_context { llama_context(const llama_model & model) : model(model), t_start_us(model.t_start_us), t_load_us(model.t_load_us) {} ~llama_context(); diff --git a/print.hpp b/print.hpp index 94688a1dc..2ab13c21b 100644 --- a/print.hpp +++ b/print.hpp @@ -1,8 +1,9 @@ #include #include #include "llama.h" -#include "ggml-internal.hpp" -#include "llama-internal.hpp" +#include "common/common.h" +//#include "ggml-internal.hpp" +//#include "llama-internal.hpp" REFL_TYPE(ggml_init_params ) REFL_END @@ -96,9 +97,10 @@ REFL_END REFL_TYPE(llama_sampling_params) REFL_END +#ifdef llm_arch REFL_TYPE(llm_arch) REFL_END - +#endif REFL_TYPE(llama_sampling_context ) REFL_FIELD( params) REFL_FIELD( mirostat_mu) @@ -125,10 +127,34 @@ REFL_END REFL_TYPE(ggml_tensor) REFL_FIELD(type) + REFL_FIELD(type) + REFL_FIELD(backend) + REFL_FIELD(buffer) + REFL_FIELD(n_dims) + REFL_FIELD(ne) + REFL_FIELD(nb) + REFL_FIELD(op) + REFL_FIELD(op_params) + REFL_FIELD(is_param) + REFL_FIELD(grad) + REFL_FIELD(src) + REFL_FIELD(perf_runs) + REFL_FIELD(perf_cycles) + REFL_FIELD(perf_time_us) + REFL_FIELD(view_src) + REFL_FIELD(view_offs) + REFL_FIELD(data) + REFL_FIELD(name) + REFL_FIELD(extra) + REFL_FIELD(padding) REFL_END REFL_TYPE(ggml_cplan) REFL_FIELD(work_size) + REFL_FIELD(work_data) + REFL_FIELD(n_threads) + REFL_FIELD(abort_callback) + REFL_FIELD(abort_callback_data) REFL_END REFL_TYPE(ggml_hash_set) @@ -137,14 +163,32 @@ REFL_END REFL_TYPE(ggml_cgraph) REFL_FIELD(size) + REFL_FIELD(n_nodes) + REFL_FIELD(n_leafs) + REFL_FIELD(nodes) + REFL_FIELD(grads) + REFL_FIELD(leafs) + REFL_FIELD(visited_hash_table) + REFL_FIELD(order) + REFL_FIELD(perf_runs) + REFL_FIELD(perf_cycles) + REFL_FIELD(perf_time_us) REFL_END REFL_TYPE(ggml_scratch) REFL_FIELD(offs) + REFL_FIELD(size) + REFL_FIELD(data) + REFL_END REFL_TYPE(ggml_compute_params) REFL_FIELD(type) + REFL_FIELD(ith) + REFL_FIELD(nth) + REFL_FIELD(wsize) + REFL_FIELD(wdata) + REFL_END REFL_TYPE(ggml_opt_params) @@ -162,6 +206,7 @@ REFL_TYPE(ggml_something) REFL_FIELD(type_name) REFL_END +#ifdef ggml_context REFL_TYPE(ggml_context) REFL_FIELD(mem_size) REFL_FIELD(mem_buffer) @@ -173,14 +218,17 @@ REFL_FIELD( objects_begin) REFL_FIELD( objects_end) REFL_FIELD( scratch) REFL_FIELD( scratch_save) - REFL_END +#endif +#ifdef ggml_context_container REFL_TYPE(ggml_context_container) REFL_FIELD(used) REFL_FIELD(context) REFL_END +#endif +#ifdef ggml_numa_node REFL_TYPE(ggml_numa_node) REFL_FIELD(cpus) REFL_FIELD(n_cpus) @@ -220,6 +268,7 @@ REFL_TYPE(hash_map) REFL_FIELD(set) REFL_FIELD(vals) REFL_END + REFL_TYPE(ggml_compute_state_shared) REFL_FIELD(cgraph) REFL_FIELD(cplan) @@ -232,12 +281,15 @@ REFL_TYPE(ggml_lbfgs_iteration_data) REFL_FIELD(alpha) REFL_FIELD(ys) REFL_END +#endif +#ifdef gguf_kv REFL_TYPE(gguf_kv) REFL_FIELD(key) REFL_FIELD(type) REFL_END + REFL_TYPE(gguf_header) REFL_FIELD(magic) REFL_FIELD(version) @@ -257,7 +309,7 @@ REFL_TYPE(gguf_buf) REFL_FIELD(data) REFL_FIELD(size) REFL_END - +#endif REFL_TYPE(llama_model_params) REFL_FIELD(n_gpu_layers) @@ -265,6 +317,7 @@ REFL_END REFL_TYPE(llama_context_params) REFL_FIELD(seed) REFL_END + REFL_TYPE(llama_model_quantize_params) REFL_FIELD(nthread) REFL_END @@ -282,13 +335,16 @@ REFL_END REFL_TYPE(llama_beams_state) REFL_FIELD(beam_views) REFL_END - + +#ifdef ggml_backend REFL_TYPE(ggml_backend) REFL_END +#endif REFL_TYPE(ggml_backend_buffer) REFL_END +#ifdef ggml_allocr REFL_TYPE(ggml_allocr) REFL_END @@ -298,13 +354,14 @@ REFL_END REFL_TYPE(ggml_gallocr) REFL_END +#endif +#ifdef llama_buffer REFL_TYPE(llama_buffer) REFL_FIELD(data) REFL_FIELD(size) REFL_END - REFL_TYPE(llama_file) REFL_FIELD(fp) REFL_FIELD(size) @@ -353,91 +410,92 @@ REFL_TYPE(llama_kv_cache) REFL_FIELD(has_shift) REFL_FIELD(head) REFL_END +#endif +#ifdef e_model REFL_TYPE(e_model) REFL_END +#endif REFL_TYPE(llama_ftype) REFL_END +//#ifdef llama_model REFL_TYPE(llama_model) - REFL_FIELD(type) - REFL_FIELD(arch) +REFL_FIELD(type) +REFL_FIELD(arch) REFL_FIELD(ftype ) - -REFL_FIELD( name ) - - REFL_FIELD( hparams ) -REFL_FIELD( vocab) - -REFL_FIELD( tok_embd) -REFL_FIELD( pos_embd) -REFL_FIELD( tok_norm) -REFL_FIELD( tok_norm_b) - -REFL_FIELD( output_norm) -REFL_FIELD( output_norm_b) -REFL_FIELD( output) - -REFL_FIELD( layers) - -REFL_FIELD( n_gpu_layers) - - REFL_FIELD( gguf_kv) //unordered map - REFL_FIELD( ctx) - REFL_FIELD( buf) - REFL_FIELD( mapping) //std::unique_ptr -REFL_FIELD( mlock_buf) -REFL_FIELD( mlock_mmap) -REFL_FIELD( tensors_by_name) - REFL_FIELD( t_load_us) -REFL_FIELD( t_start_us) - +REFL_FIELD(name ) +REFL_FIELD(hparams ) +REFL_FIELD(vocab) +REFL_FIELD(tok_embd) +REFL_FIELD(pos_embd) +REFL_FIELD(tok_norm) +REFL_FIELD(tok_norm_b) +REFL_FIELD(output_norm) +REFL_FIELD(output_norm_b) +REFL_FIELD(output) +REFL_FIELD(layers) +REFL_FIELD(n_gpu_layers) +REFL_FIELD(gguf_kv) //unordered map +REFL_FIELD(ctx) +REFL_FIELD(buf) +REFL_FIELD(mapping) //std::unique_ptr +REFL_FIELD(mlock_buf) +REFL_FIELD(mlock_mmap) +REFL_FIELD(tensors_by_name) +REFL_FIELD(t_load_us) +REFL_FIELD(t_start_us) REFL_END +//#endif +#ifdef llama_vocab REFL_TYPE(llama_vocab) REFL_END - - REFL_TYPE(grammar_parser::parse_state) - REFL_END - +#endif + +REFL_TYPE(grammar_parser::parse_state) +REFL_END + +//#ifdef llama_context REFL_TYPE(llama_context) REFL_FIELD( cparams) //REFL_FIELD(model) REFL_FIELD(kv_self) - REFL_FIELD(rng) //random numbers +REFL_FIELD(rng) //random numbers REFL_FIELD(has_evaluated_once ) REFL_FIELD( t_start_us) REFL_FIELD( t_load_us) - REFL_FIELD( t_sample_us ) +REFL_FIELD( t_sample_us ) REFL_FIELD( t_p_eval_us ) - REFL_FIELD( t_eval_us) +REFL_FIELD( t_eval_us) REFL_FIELD( n_sample ) REFL_FIELD( n_p_eval ) - REFL_FIELD( n_eval ) +REFL_FIELD( n_eval ) //REFL_FIELD( logits) crash REFL_FIELD( logits_all ) REFL_FIELD( embedding) //REFL_FIELD( work_buffer) - REFL_FIELD( buf_compute) - REFL_FIELD( buf_alloc) +REFL_FIELD( buf_compute) +REFL_FIELD( buf_alloc) REFL_FIELD( alloc ) - #ifdef GGML_USE_METAL REFL_FIELD( ctx_metal ) #endif - #ifdef GGML_USE_MPI REFL_FIELD( ctx_mpi ) - #endif REFL_END +//#endif +#ifdef llama_model_loader REFL_TYPE(llama_model_loader) REFL_FIELD(n_kv) REFL_FIELD(n_tensors) REFL_END +#endif +#ifdef llm_build_context REFL_TYPE(llm_build_context) // REFL_FIELD(model) cannot create pointer to reference member ‘llm_build_context::model’ // REFL_FIELD(hparams) cannot create pointer to reference member ‘llm_build_context::hparams’ @@ -511,7 +569,6 @@ REFL_FIELD( n_feed_forward_w2 ) REFL_FIELD( i_feed_forward_w2 ) REFL_FIELD( n_k_quantized ) REFL_FIELD( n_fallback ) - REFL_END REFL_TYPE(llama_data_context) @@ -524,6 +581,7 @@ REFL_END REFL_TYPE(llama_data_file_context) REFL_FIELD(file) REFL_END +#endif template constexpr auto get_value_type_name(const T t) noexcept