now printing tensors

This commit is contained in:
mike dupont 2023-11-25 20:06:00 -05:00
parent af698c6f27
commit ec2b03e504
6 changed files with 140 additions and 58 deletions

View file

@ -14,9 +14,9 @@ llvmLibPath = "/usr/lib/llvm-15/lib/"
cxxClientRoot = "/home/mdupont/experiments/llama.cpp/" cxxClientRoot = "/home/mdupont/experiments/llama.cpp/"
fileList = [ fileList = [
# "ggml.cpp", "ggml.cpp",
# "llama.cpp", # "llama.cpp",
"examples/server/server.cpp", # "examples/server/server.cpp",
] ]
typeList = [ typeList = [

View file

@ -30,7 +30,8 @@
#if defined(_MSC_VER) #if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data #pragma warning(disable: 4244 4267) // possible loss of data
#endif #endif
#include "ggml-internal.hpp"
#include "llama-internal.hpp"
#include "print.hpp" #include "print.hpp"
static llama_context ** g_ctx; static llama_context ** g_ctx;

View file

@ -24,6 +24,7 @@
#include <thread> #include <thread>
#include <mutex> #include <mutex>
#include <chrono> #include <chrono>
#include "llama-internal.hpp"
#include "print.hpp" #include "print.hpp"
#ifndef SERVER_VERBOSE #ifndef SERVER_VERBOSE

View file

@ -4,6 +4,7 @@
#include "ggml-impl.h" #include "ggml-impl.h"
#include "ggml-quants.h" #include "ggml-quants.h"
#if defined(_MSC_VER) || defined(__MINGW32__) #if defined(_MSC_VER) || defined(__MINGW32__)
#include <malloc.h> // using malloc.h with MSC/MINGW #include <malloc.h> // using malloc.h with MSC/MINGW
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) #elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
@ -46,6 +47,12 @@ void type_traits_init();
void GGUF_TYPE_SIZE_init(); void GGUF_TYPE_SIZE_init();
void GGUF_TYPE_NAME_init(); void GGUF_TYPE_NAME_init();
#include "llama.h"
struct ggml_allocr;
//#include "ggml-internal.hpp"
#include "llama-internal.hpp"
#include "print.hpp"
#if defined(_WIN32) #if defined(_WIN32)
#include <windows.h> #include <windows.h>
@ -9412,7 +9419,10 @@ static void ggml_compute_forward_mul_mat(
const struct ggml_tensor * src0, const struct ggml_tensor * src0,
const struct ggml_tensor * src1, const struct ggml_tensor * src1,
struct ggml_tensor * dst) { struct ggml_tensor * dst) {
print_fields(*params);
print_fields(*src0);
print_fields(*src1);
print_fields(*dst);
int64_t t0 = ggml_perf_time_us(); int64_t t0 = ggml_perf_time_us();
UNUSED(t0); UNUSED(t0);
@ -9456,6 +9466,7 @@ static void ggml_compute_forward_mul_mat(
if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) { if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize); ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
} }
print_fields(*dst);
return; return;
} }
#endif #endif
@ -9463,10 +9474,12 @@ static void ggml_compute_forward_mul_mat(
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) { if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) {
if (params->ith != 0) { if (params->ith != 0) {
print_fields(*dst);
return; return;
} }
if (params->type == GGML_TASK_INIT) { if (params->type == GGML_TASK_INIT) {
return; return;
} }
@ -9508,7 +9521,7 @@ static void ggml_compute_forward_mul_mat(
} }
//printf("CBLAS = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3); //printf("CBLAS = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
print_fields(*dst);
return; return;
} }
#endif #endif
@ -9527,11 +9540,12 @@ static void ggml_compute_forward_mul_mat(
} }
} }
} }
print_fields(*dst);
return; return;
} }
if (params->type == GGML_TASK_FINALIZE) { if (params->type == GGML_TASK_FINALIZE) {
print_fields(*dst);
return; return;
} }
@ -9565,6 +9579,7 @@ static void ggml_compute_forward_mul_mat(
// threads with no work simply yield (not sure if it helps) // threads with no work simply yield (not sure if it helps)
if (ir010 >= ir011 || ir110 >= ir111) { if (ir010 >= ir011 || ir110 >= ir111) {
sched_yield(); sched_yield();
return; return;
} }
@ -9617,6 +9632,7 @@ static void ggml_compute_forward_mul_mat(
} }
} }
} }
print_fields(*dst);
} }
// ggml_compute_forward_out_prod // ggml_compute_forward_out_prod

View file

@ -1,5 +1,10 @@
#include <set> #include <set>
#include <queue> #include <queue>
#include <map>
#include <random>
#include <functional>
#include <unordered_map>
#include <memory>
enum llm_arch { enum llm_arch {
LLM_ARCH_LLAMA, LLM_ARCH_LLAMA,
LLM_ARCH_FALCON, LLM_ARCH_FALCON,
@ -451,6 +456,7 @@ struct llama_model {
} }
}; };
struct ggml_allocr;
struct llama_context { struct llama_context {
llama_context(const llama_model & model) : model(model), t_start_us(model.t_start_us), t_load_us(model.t_load_us) {} llama_context(const llama_model & model) : model(model), t_start_us(model.t_start_us), t_load_us(model.t_load_us) {}
~llama_context(); ~llama_context();

View file

@ -1,8 +1,9 @@
#include <refl-cpp/refl.hpp> #include <refl-cpp/refl.hpp>
#include <iostream> #include <iostream>
#include "llama.h" #include "llama.h"
#include "ggml-internal.hpp" #include "common/common.h"
#include "llama-internal.hpp" //#include "ggml-internal.hpp"
//#include "llama-internal.hpp"
REFL_TYPE(ggml_init_params ) REFL_TYPE(ggml_init_params )
REFL_END REFL_END
@ -96,9 +97,10 @@ REFL_END
REFL_TYPE(llama_sampling_params) REFL_TYPE(llama_sampling_params)
REFL_END REFL_END
#ifdef llm_arch
REFL_TYPE(llm_arch) REFL_TYPE(llm_arch)
REFL_END REFL_END
#endif
REFL_TYPE(llama_sampling_context ) REFL_TYPE(llama_sampling_context )
REFL_FIELD( params) REFL_FIELD( params)
REFL_FIELD( mirostat_mu) REFL_FIELD( mirostat_mu)
@ -125,10 +127,34 @@ REFL_END
REFL_TYPE(ggml_tensor) REFL_TYPE(ggml_tensor)
REFL_FIELD(type) REFL_FIELD(type)
REFL_FIELD(type)
REFL_FIELD(backend)
REFL_FIELD(buffer)
REFL_FIELD(n_dims)
REFL_FIELD(ne)
REFL_FIELD(nb)
REFL_FIELD(op)
REFL_FIELD(op_params)
REFL_FIELD(is_param)
REFL_FIELD(grad)
REFL_FIELD(src)
REFL_FIELD(perf_runs)
REFL_FIELD(perf_cycles)
REFL_FIELD(perf_time_us)
REFL_FIELD(view_src)
REFL_FIELD(view_offs)
REFL_FIELD(data)
REFL_FIELD(name)
REFL_FIELD(extra)
REFL_FIELD(padding)
REFL_END REFL_END
REFL_TYPE(ggml_cplan) REFL_TYPE(ggml_cplan)
REFL_FIELD(work_size) REFL_FIELD(work_size)
REFL_FIELD(work_data)
REFL_FIELD(n_threads)
REFL_FIELD(abort_callback)
REFL_FIELD(abort_callback_data)
REFL_END REFL_END
REFL_TYPE(ggml_hash_set) REFL_TYPE(ggml_hash_set)
@ -137,14 +163,32 @@ REFL_END
REFL_TYPE(ggml_cgraph) REFL_TYPE(ggml_cgraph)
REFL_FIELD(size) REFL_FIELD(size)
REFL_FIELD(n_nodes)
REFL_FIELD(n_leafs)
REFL_FIELD(nodes)
REFL_FIELD(grads)
REFL_FIELD(leafs)
REFL_FIELD(visited_hash_table)
REFL_FIELD(order)
REFL_FIELD(perf_runs)
REFL_FIELD(perf_cycles)
REFL_FIELD(perf_time_us)
REFL_END REFL_END
REFL_TYPE(ggml_scratch) REFL_TYPE(ggml_scratch)
REFL_FIELD(offs) REFL_FIELD(offs)
REFL_FIELD(size)
REFL_FIELD(data)
REFL_END REFL_END
REFL_TYPE(ggml_compute_params) REFL_TYPE(ggml_compute_params)
REFL_FIELD(type) REFL_FIELD(type)
REFL_FIELD(ith)
REFL_FIELD(nth)
REFL_FIELD(wsize)
REFL_FIELD(wdata)
REFL_END REFL_END
REFL_TYPE(ggml_opt_params) REFL_TYPE(ggml_opt_params)
@ -162,6 +206,7 @@ REFL_TYPE(ggml_something)
REFL_FIELD(type_name) REFL_FIELD(type_name)
REFL_END REFL_END
#ifdef ggml_context
REFL_TYPE(ggml_context) REFL_TYPE(ggml_context)
REFL_FIELD(mem_size) REFL_FIELD(mem_size)
REFL_FIELD(mem_buffer) REFL_FIELD(mem_buffer)
@ -173,14 +218,17 @@ REFL_FIELD( objects_begin)
REFL_FIELD( objects_end) REFL_FIELD( objects_end)
REFL_FIELD( scratch) REFL_FIELD( scratch)
REFL_FIELD( scratch_save) REFL_FIELD( scratch_save)
REFL_END REFL_END
#endif
#ifdef ggml_context_container
REFL_TYPE(ggml_context_container) REFL_TYPE(ggml_context_container)
REFL_FIELD(used) REFL_FIELD(used)
REFL_FIELD(context) REFL_FIELD(context)
REFL_END REFL_END
#endif
#ifdef ggml_numa_node
REFL_TYPE(ggml_numa_node) REFL_TYPE(ggml_numa_node)
REFL_FIELD(cpus) REFL_FIELD(cpus)
REFL_FIELD(n_cpus) REFL_FIELD(n_cpus)
@ -220,6 +268,7 @@ REFL_TYPE(hash_map)
REFL_FIELD(set) REFL_FIELD(set)
REFL_FIELD(vals) REFL_FIELD(vals)
REFL_END REFL_END
REFL_TYPE(ggml_compute_state_shared) REFL_TYPE(ggml_compute_state_shared)
REFL_FIELD(cgraph) REFL_FIELD(cgraph)
REFL_FIELD(cplan) REFL_FIELD(cplan)
@ -232,12 +281,15 @@ REFL_TYPE(ggml_lbfgs_iteration_data)
REFL_FIELD(alpha) REFL_FIELD(alpha)
REFL_FIELD(ys) REFL_FIELD(ys)
REFL_END REFL_END
#endif
#ifdef gguf_kv
REFL_TYPE(gguf_kv) REFL_TYPE(gguf_kv)
REFL_FIELD(key) REFL_FIELD(key)
REFL_FIELD(type) REFL_FIELD(type)
REFL_END REFL_END
REFL_TYPE(gguf_header) REFL_TYPE(gguf_header)
REFL_FIELD(magic) REFL_FIELD(magic)
REFL_FIELD(version) REFL_FIELD(version)
@ -257,7 +309,7 @@ REFL_TYPE(gguf_buf)
REFL_FIELD(data) REFL_FIELD(data)
REFL_FIELD(size) REFL_FIELD(size)
REFL_END REFL_END
#endif
REFL_TYPE(llama_model_params) REFL_TYPE(llama_model_params)
REFL_FIELD(n_gpu_layers) REFL_FIELD(n_gpu_layers)
@ -265,6 +317,7 @@ REFL_END
REFL_TYPE(llama_context_params) REFL_TYPE(llama_context_params)
REFL_FIELD(seed) REFL_FIELD(seed)
REFL_END REFL_END
REFL_TYPE(llama_model_quantize_params) REFL_TYPE(llama_model_quantize_params)
REFL_FIELD(nthread) REFL_FIELD(nthread)
REFL_END REFL_END
@ -283,12 +336,15 @@ REFL_TYPE(llama_beams_state)
REFL_FIELD(beam_views) REFL_FIELD(beam_views)
REFL_END REFL_END
#ifdef ggml_backend
REFL_TYPE(ggml_backend) REFL_TYPE(ggml_backend)
REFL_END REFL_END
#endif
REFL_TYPE(ggml_backend_buffer) REFL_TYPE(ggml_backend_buffer)
REFL_END REFL_END
#ifdef ggml_allocr
REFL_TYPE(ggml_allocr) REFL_TYPE(ggml_allocr)
REFL_END REFL_END
@ -298,13 +354,14 @@ REFL_END
REFL_TYPE(ggml_gallocr) REFL_TYPE(ggml_gallocr)
REFL_END REFL_END
#endif
#ifdef llama_buffer
REFL_TYPE(llama_buffer) REFL_TYPE(llama_buffer)
REFL_FIELD(data) REFL_FIELD(data)
REFL_FIELD(size) REFL_FIELD(size)
REFL_END REFL_END
REFL_TYPE(llama_file) REFL_TYPE(llama_file)
REFL_FIELD(fp) REFL_FIELD(fp)
REFL_FIELD(size) REFL_FIELD(size)
@ -353,36 +410,33 @@ REFL_TYPE(llama_kv_cache)
REFL_FIELD(has_shift) REFL_FIELD(has_shift)
REFL_FIELD(head) REFL_FIELD(head)
REFL_END REFL_END
#endif
#ifdef e_model
REFL_TYPE(e_model) REFL_TYPE(e_model)
REFL_END REFL_END
#endif
REFL_TYPE(llama_ftype) REFL_TYPE(llama_ftype)
REFL_END REFL_END
//#ifdef llama_model
REFL_TYPE(llama_model) REFL_TYPE(llama_model)
REFL_FIELD(type) REFL_FIELD(type)
REFL_FIELD(arch) REFL_FIELD(arch)
REFL_FIELD(ftype ) REFL_FIELD(ftype )
REFL_FIELD(name ) REFL_FIELD(name )
REFL_FIELD(hparams ) REFL_FIELD(hparams )
REFL_FIELD(vocab) REFL_FIELD(vocab)
REFL_FIELD(tok_embd) REFL_FIELD(tok_embd)
REFL_FIELD(pos_embd) REFL_FIELD(pos_embd)
REFL_FIELD(tok_norm) REFL_FIELD(tok_norm)
REFL_FIELD(tok_norm_b) REFL_FIELD(tok_norm_b)
REFL_FIELD(output_norm) REFL_FIELD(output_norm)
REFL_FIELD(output_norm_b) REFL_FIELD(output_norm_b)
REFL_FIELD(output) REFL_FIELD(output)
REFL_FIELD(layers) REFL_FIELD(layers)
REFL_FIELD(n_gpu_layers) REFL_FIELD(n_gpu_layers)
REFL_FIELD(gguf_kv) //unordered map REFL_FIELD(gguf_kv) //unordered map
REFL_FIELD(ctx) REFL_FIELD(ctx)
REFL_FIELD(buf) REFL_FIELD(buf)
@ -392,15 +446,18 @@ REFL_FIELD( mlock_mmap)
REFL_FIELD(tensors_by_name) REFL_FIELD(tensors_by_name)
REFL_FIELD(t_load_us) REFL_FIELD(t_load_us)
REFL_FIELD(t_start_us) REFL_FIELD(t_start_us)
REFL_END REFL_END
//#endif
#ifdef llama_vocab
REFL_TYPE(llama_vocab) REFL_TYPE(llama_vocab)
REFL_END REFL_END
#endif
REFL_TYPE(grammar_parser::parse_state) REFL_TYPE(grammar_parser::parse_state)
REFL_END REFL_END
//#ifdef llama_context
REFL_TYPE(llama_context) REFL_TYPE(llama_context)
REFL_FIELD( cparams) REFL_FIELD( cparams)
//REFL_FIELD(model) //REFL_FIELD(model)
@ -422,22 +479,23 @@ REFL_FIELD( embedding)
REFL_FIELD( buf_compute) REFL_FIELD( buf_compute)
REFL_FIELD( buf_alloc) REFL_FIELD( buf_alloc)
REFL_FIELD( alloc ) REFL_FIELD( alloc )
#ifdef GGML_USE_METAL #ifdef GGML_USE_METAL
REFL_FIELD( ctx_metal ) REFL_FIELD( ctx_metal )
#endif #endif
#ifdef GGML_USE_MPI #ifdef GGML_USE_MPI
REFL_FIELD( ctx_mpi ) REFL_FIELD( ctx_mpi )
#endif #endif
REFL_END REFL_END
//#endif
#ifdef llama_model_loader
REFL_TYPE(llama_model_loader) REFL_TYPE(llama_model_loader)
REFL_FIELD(n_kv) REFL_FIELD(n_kv)
REFL_FIELD(n_tensors) REFL_FIELD(n_tensors)
REFL_END REFL_END
#endif
#ifdef llm_build_context
REFL_TYPE(llm_build_context) REFL_TYPE(llm_build_context)
// REFL_FIELD(model) cannot create pointer to reference member llm_build_context::model // REFL_FIELD(model) cannot create pointer to reference member llm_build_context::model
// REFL_FIELD(hparams) cannot create pointer to reference member llm_build_context::hparams // REFL_FIELD(hparams) cannot create pointer to reference member llm_build_context::hparams
@ -511,7 +569,6 @@ REFL_FIELD( n_feed_forward_w2 )
REFL_FIELD( i_feed_forward_w2 ) REFL_FIELD( i_feed_forward_w2 )
REFL_FIELD( n_k_quantized ) REFL_FIELD( n_k_quantized )
REFL_FIELD( n_fallback ) REFL_FIELD( n_fallback )
REFL_END REFL_END
REFL_TYPE(llama_data_context) REFL_TYPE(llama_data_context)
@ -524,6 +581,7 @@ REFL_END
REFL_TYPE(llama_data_file_context) REFL_TYPE(llama_data_file_context)
REFL_FIELD(file) REFL_FIELD(file)
REFL_END REFL_END
#endif
template <typename T> template <typename T>
constexpr auto get_value_type_name(const T t) noexcept constexpr auto get_value_type_name(const T t) noexcept