From df647db61179043798867dd214a927ffb5ffffc0 Mon Sep 17 00:00:00 2001 From: mike dupont Date: Thu, 23 Nov 2023 09:55:19 -0500 Subject: [PATCH] bindings --- Makefile | 3 + examples/baby-llama/baby-llama.cpp | 26 +- examples/benchmark/benchmark-matmult.cpp | 13 +- .../convert-llama2c-to-ggml.cpp | 10 +- examples/export-lora/export-lora.cpp | 8 +- examples/finetune/finetune.cpp | 57 ++- examples/llava/clip.cpp | 36 +- .../train-text-from-scratch.cpp | 57 ++- ggml-backend.cpp | 13 +- ggml.cpp | 30 +- ggml.h | 18 +- llama.cpp | 65 +-- llama.h | 10 +- print.hpp | 386 ++++++++++++++++-- tests/test-quantize-fns.cpp | 6 +- tests/test-quantize-perf.cpp | 6 +- tests/test-rope.cpp | 6 +- 17 files changed, 568 insertions(+), 182 deletions(-) diff --git a/Makefile b/Makefile index 6b941d622..93c23772c 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,6 @@ +tt: + clang++ -std=c++17 ggml.cpp + # Define the default target now so that it is always the first target BUILD_TARGETS = \ main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \ diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp index 697e003e3..b2679a9d9 100644 --- a/examples/baby-llama/baby-llama.cpp +++ b/examples/baby-llama/baby-llama.cpp @@ -1527,11 +1527,14 @@ int main(int argc, char ** argv) { std::vector work_buffer; for (int ex=0; exfn_model_base.c_str(), params_gguf); // create new gguf diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index cda68b76d..3b5fefda8 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -294,10 +294,12 @@ static void init_model(struct llama_model * input, struct my_llama_model * model // get parameters directly from gguf file { - struct gguf_init_params params = { - .no_alloc = false, - .ctx = NULL, - }; + struct gguf_init_params params( + //.no_alloc = + false, + //.ctx = + NULL + ); struct gguf_context * mctx = gguf_init_from_file(fn_model, params); load_model_hparams_gguf(mctx, &hparams, "llama"); @@ -991,9 +993,11 @@ static void save_checkpoint_lora_gguf(struct gguf_context * fctx, struct my_llam static bool load_checkpoint_lora_file(const char * filename, struct my_llama_model * model, struct my_llama_lora * lora, struct train_state * train) { struct ggml_context * f_ggml_ctx; - struct gguf_init_params params; - params.no_alloc = false; - params.ctx = &f_ggml_ctx; + struct gguf_init_params params( + //params.no_alloc = + false, + //params.ctx = + &f_ggml_ctx); struct gguf_context * fctx = gguf_init_from_file(filename, params); if (fctx == NULL) { return false; @@ -1708,11 +1712,14 @@ int main(int argc, char ** argv) { std::vector mem_compute_data; // context for input tensors without their data - struct ggml_init_params ctx_input_params = { - .mem_size= ggml_tensor_overhead() * 2, // mem_size - .mem_buffer=NULL, // mem_buffer - .no_alloc=true, // no_alloc - }; + struct ggml_init_params ctx_input_params( + //.mem_size= + ggml_tensor_overhead() * 2, // mem_size + //.mem_buffer= + NULL, // mem_buffer + //.no_alloc= + true // no_alloc + ); struct ggml_context * ctx_input = ggml_init(ctx_input_params); // the input tensors @@ -1737,11 +1744,14 @@ int main(int argc, char ** argv) { 2*LLAMA_TRAIN_MAX_NODES*ggml_tensor_overhead() + (params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true)) ); - struct ggml_init_params ctx_compute_params = { - .mem_size=estimated_compute_size_wo_data, // mem_size - .mem_buffer=NULL, // mem_buffer - .no_alloc=true, // no_alloc - }; + struct ggml_init_params ctx_compute_params( + //.mem_size= + estimated_compute_size_wo_data, // mem_size + //.mem_buffer= + NULL, // mem_buffer + //.no_alloc= + true // no_alloc + ); struct ggml_context * ctx_compute = NULL; struct ggml_tensor * loss = NULL; @@ -1904,11 +1914,14 @@ int main(int argc, char ** argv) { printf("%s: work_size = %zu bytes (%.1f MB)\n", __func__, max_work_size, (float) max_work_size / (1024.0f*1024.0f)); // context for work buffer - struct ggml_init_params ctx_work_params = { - .mem_size= max_work_size, // mem_size - .mem_buffer = NULL, // mem_buffer - .no_alloc = false, // no_alloc - }; + struct ggml_init_params ctx_work_params( + //.mem_size= + max_work_size, // mem_size + //.mem_buffer = + NULL, // mem_buffer + //.no_alloc = + false // no_alloc + ); struct ggml_context * ctx_work = ggml_init(ctx_work_params); int64_t t0 = ggml_time_ms(); diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index bfd0d6f98..684724ef9 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -255,11 +255,14 @@ static ggml_cgraph * clip_image_build_graph(const clip_ctx * ctx, const clip_ima const auto & buf_compute = ctx->buf_compute; - struct ggml_init_params params = { - .mem_size = buf_compute.size, - .mem_buffer = buf_compute.data, - .no_alloc = false, - }; + struct ggml_init_params params( + //.mem_size = + buf_compute.size, + //.mem_buffer = + buf_compute.data, + //.no_alloc = + false + ); params.no_alloc = true; @@ -455,10 +458,12 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { struct ggml_context * meta = NULL; - struct gguf_init_params params = { - .no_alloc = true, - .ctx = &meta, - }; + struct gguf_init_params params( + //.no_alloc = + true, + //.ctx = + &meta); + struct gguf_context * ctx = gguf_init_from_file(fname, params); if (!ctx) { @@ -552,11 +557,14 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { // load tensors { - struct ggml_init_params params = { - .mem_size = ctx_size, - .mem_buffer = NULL, - .no_alloc = false, - }; + struct ggml_init_params params( + //.mem_size = + ctx_size, + //.mem_buffer = + NULL, + //.no_alloc = + false + ); new_clip->ctx = ggml_init(params); if (!new_clip->ctx) { diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index f7f3792f2..04e7546f7 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -600,10 +600,12 @@ static void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vo // set vocab by copying from vocab_model gguf file { - struct gguf_init_params params = { - .no_alloc = false, - .ctx = NULL, - }; + struct gguf_init_params params( + //.no_alloc = + false, + //.ctx = + NULL + ); struct gguf_context * vctx = gguf_init_from_file(fn_vocab_model, params); const int token_idx = gguf_find_key(vctx, kv(LLM_KV_TOKENIZER_LIST)); @@ -745,9 +747,11 @@ static void save_checkpoint_gguf(struct gguf_context * fctx, const char * fn_voc static bool load_checkpoint_file(const char * filename, struct my_llama_model * model, struct train_state * train) { struct ggml_context * f_ggml_ctx; - struct gguf_init_params params; - params.no_alloc = false; - params.ctx = &f_ggml_ctx; + struct gguf_init_params params( + //params.no_alloc = + false, + //params.ctx = + &f_ggml_ctx); struct gguf_context * fctx = gguf_init_from_file(filename, params); if (fctx == NULL) { return false; @@ -1085,11 +1089,14 @@ int main(int argc, char ** argv) { ggml_allocr * alloc = NULL; // context for input tensors without their data - struct ggml_init_params ctx_input_params = { - .mem_size = ggml_tensor_overhead() * 2, // mem_size - .mem_buffer = NULL, // mem_buffer - .no_alloc = true, // no_alloc - }; + struct ggml_init_params ctx_input_params ( + //.mem_size = + ggml_tensor_overhead() * 2, // mem_size + // .mem_buffer = + NULL, // mem_buffer + // .no_alloc = + true // no_alloc + ); struct ggml_context * ctx_input = ggml_init(ctx_input_params); // the input tensors @@ -1114,11 +1121,14 @@ int main(int argc, char ** argv) { 2*LLAMA_TRAIN_MAX_NODES*ggml_tensor_overhead() + (params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true)) ); - struct ggml_init_params ctx_compute_params = { - .mem_size = estimated_compute_size_wo_data, // mem_size - .mem_buffer= NULL, // mem_buffer - .no_alloc = true, // no_alloc - }; + struct ggml_init_params ctx_compute_params( + // .mem_size = + estimated_compute_size_wo_data, // mem_size + //.mem_buffer= + NULL, // mem_buffer + //.no_alloc = + true // no_alloc + ); struct ggml_context * ctx_compute = NULL; struct ggml_tensor * loss = NULL; @@ -1267,11 +1277,14 @@ int main(int argc, char ** argv) { printf("%s: work_size = %zu bytes (%.1f MB)\n", __func__, max_work_size, (float) max_work_size / (1024.0f*1024.0f)); // context for work buffer - struct ggml_init_params ctx_work_params = { - .mem_size= max_work_size, // - .mem_buffer= NULL, // - .no_alloc=false, // - }; + struct ggml_init_params ctx_work_params( + //.mem_size= + max_work_size, // + //.mem_buffer= + NULL, // + //.no_alloc= + false // + ); struct ggml_context * ctx_work = ggml_init(ctx_work_params); int64_t t0 = ggml_time_ms(); diff --git a/ggml-backend.cpp b/ggml-backend.cpp index 2a43b1277..f258f69e3 100644 --- a/ggml-backend.cpp +++ b/ggml-backend.cpp @@ -586,11 +586,14 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g memset(sched->node_copies, 0, sizeof(sched->node_copies[0]) * hash_size); sched->n_splits = 0; - struct ggml_init_params params = { - .mem_size = sizeof(sched->context_buffer), - .mem_buffer = sched->context_buffer, - .no_alloc = true - }; + struct ggml_init_params params( + //.mem_size = + sizeof(sched->context_buffer), + //.mem_buffer = + sched->context_buffer, + //.no_alloc = + true + ); if (sched->ctx != NULL) { ggml_free(sched->ctx); diff --git a/ggml.cpp b/ggml.cpp index c0ea3704e..27f078ca1 100644 --- a/ggml.cpp +++ b/ggml.cpp @@ -16446,7 +16446,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { /*.abort_callback =*/ NULL, /*.abort_callback_data =*/ NULL, }; - struct ggml_compute_state * workers = alloca(sizeof(struct ggml_compute_state)*n_threads); + struct ggml_compute_state * workers = (struct ggml_compute_state *)alloca(sizeof(struct ggml_compute_state)*n_threads); // create thread pool if (n_threads > 1) { @@ -16775,11 +16775,11 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context * const size_t overhead = 1*ggml_tensor_overhead(); GGML_ASSERT(0); // FIXME - struct ggml_init_params params;// = { - params.mem_size = fsize + overhead, - params.mem_buffer = NULL, - params.no_alloc = false, - // }; + struct ggml_init_params params( + fsize + overhead, + NULL, + false); + *ctx_data = ggml_init(params); @@ -16831,10 +16831,10 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context * { const size_t overhead = (n_leafs + n_nodes)*ggml_tensor_overhead() + ggml_graph_overhead_custom(graph_size, false); - struct ggml_init_params params;// = { - params.mem_size = size_eval + overhead, - params.mem_buffer = NULL, - params.no_alloc = true, + struct ggml_init_params params( + size_eval + overhead, +NULL, + true); *ctx_eval = ggml_init(params); @@ -17974,7 +17974,7 @@ GGML_API void ggml_opt_init( opt->nx = nx; opt->just_initialized = true; if (opt->ctx == NULL) { - struct ggml_init_params ctx_opt_params; + struct ggml_init_params ctx_opt_params; if (opt->params.type == GGML_OPT_ADAM) { ctx_opt_params.mem_size = GGML_MEM_ALIGN*3 + ggml_tensor_overhead()*3 + ggml_type_size(GGML_TYPE_F32)*nx*3; if (opt->params.past > 0) { @@ -18690,10 +18690,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p (ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size; // FIXME - struct ggml_init_params pdata; - pdata.mem_size = mem_size, - pdata.mem_buffer = NULL, - pdata.no_alloc = params.no_alloc, + struct ggml_init_params pdata( + mem_size, + NULL, + params.no_alloc); *params.ctx = ggml_init(pdata); diff --git a/ggml.h b/ggml.h index 9f5f20da9..66a6b65bc 100644 --- a/ggml.h +++ b/ggml.h @@ -286,7 +286,7 @@ GGML_UNUSED(prefix##3); #ifdef __cplusplus -extern "C" { +//extern "C" { #endif #if defined(__ARM_NEON) && defined(__CUDACC__) @@ -581,6 +581,18 @@ extern "C" { }; struct ggml_init_params : refl::attr::usage::type{ + + ggml_init_params(size_t mem_size, + void * mem_buffer, + bool no_alloc): + mem_size( mem_size), + mem_buffer(mem_buffer), + no_alloc(no_alloc){} + ggml_init_params(): + mem_size(0), + mem_buffer(0), + no_alloc(0){} + // memory pool size_t mem_size; // bytes void * mem_buffer; // if NULL, memory will be allocated internally @@ -2013,6 +2025,8 @@ extern "C" { struct gguf_context; struct gguf_init_params : refl::attr::usage::type{ + gguf_init_params(bool no_alloc, struct ggml_context ** ctx): no_alloc(no_alloc),ctx(ctx){} + bool no_alloc; // if not NULL, create a ggml_context and allocate the tensor data in it @@ -2164,5 +2178,5 @@ extern "C" { GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type); #ifdef __cplusplus -} +//} #endif diff --git a/llama.cpp b/llama.cpp index 9bf27d69e..2d0d2b30f 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1785,10 +1785,10 @@ struct llama_model_loader { struct ggml_context * ctx_meta = NULL; llama_model_loader(const std::string & fname, bool use_mmap) : file(fname.c_str(), "rb") { - struct gguf_init_params params = { - .no_alloc = true, - .ctx = &ctx_meta, - }; + struct gguf_init_params params( + /*.no_alloc =*/ true, + /*.ctx = */ &ctx_meta + ); ctx_gguf = gguf_init_from_file(fname.c_str(), params); if (!ctx_gguf) { @@ -2676,11 +2676,12 @@ static void llm_load_tensors( model.mlock_buf.grow_to(model.buf.size); } - struct ggml_init_params params = { - .mem_size = model.buf.size, - .mem_buffer = model.buf.data, - .no_alloc = ml.use_mmap, - }; + struct ggml_init_params params( + model.buf.size, + model.buf.data, + + + ml.use_mmap ); model.ctx = ggml_init(params); if (!model.ctx) { @@ -3842,11 +3843,14 @@ struct llm_build_context { } void init() { - struct ggml_init_params params = { - .mem_size = buf_compute.size, - .mem_buffer = buf_compute.data, - .no_alloc = true, - }; + struct ggml_init_params params( + //.mem_size = + buf_compute.size, + //.mem_buffer = + buf_compute.data, + //.no_alloc = + true + ); ctx0 = ggml_init(params); } @@ -8447,10 +8451,11 @@ void llama_backend_init(bool numa) { // needed to initialize f16 tables { - struct ggml_init_params params = { .mem_size = 0, - .mem_buffer = NULL, - .no_alloc = false - }; + struct ggml_init_params params( + 0, + NULL, + false + ); struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } @@ -9021,11 +9026,14 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat if (kv_buf_size) { const size_t elt_size = ggml_element_size(kv_self.k); - ggml_init_params ip = { - .mem_size = 6*ggml_tensor_overhead() + ggml_graph_overhead(), - .mem_buffer =NULL, - .no_alloc = /* no_alloc */ true - }; + ggml_init_params ip( + //.mem_size = + 6*ggml_tensor_overhead() + ggml_graph_overhead(), + //.mem_buffer = + NULL, + //.no_alloc = /* no_alloc */ + true + ); ggml_context * cpy_ctx = ggml_init( ip); ggml_cgraph * gf = ggml_new_graph(cpy_ctx); @@ -9155,10 +9163,13 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) { const size_t elt_size = ggml_element_size(kv_self.k); - ggml_init_params ip { - .mem_size= 6*ggml_tensor_overhead() + ggml_graph_overhead(), - .mem_buffer=NULL, - .no_alloc=true }; + ggml_init_params ip( + //.mem_size= + 6*ggml_tensor_overhead() + ggml_graph_overhead(), + //.mem_buffer= + NULL, + //.no_alloc= + true ); ggml_context * cpy_ctx = ggml_init(ip); ggml_cgraph * gf = ggml_new_graph(cpy_ctx); diff --git a/llama.h b/llama.h index b016b48f8..cbde7990b 100644 --- a/llama.h +++ b/llama.h @@ -50,7 +50,7 @@ #endif #ifdef __cplusplus -extern "C" { +//extern "C" { #endif // @@ -189,7 +189,7 @@ extern "C" { llama_seq_id all_seq_id; // used if seq_id == NULL } llama_batch; - struct llama_model_params : refl::attr::usage::type{ + struct llama_model_params { int32_t n_gpu_layers; // number of layers to store in VRAM int32_t main_gpu; // the GPU that is used for scratch and small tensors const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES) @@ -268,7 +268,7 @@ extern "C" { LLAMA_GRETYPE_CHAR_ALT = 6, }; - typedef struct llama_grammar_element : refl::attr::usage::type{ + typedef struct llama_grammar_element : refl::attr::usage::type { llama_grammar_element( enum llama_gretype type, uint32_t value // Unicode code point or rule ID ):type(type), value(value){} @@ -811,7 +811,7 @@ extern "C" { LLAMA_API void llama_dump_timing_info_yaml(FILE * stream, const struct llama_context * ctx); #ifdef __cplusplus -} +//} #endif // Internal API to be implemented by llama.cpp and used by tests/benchmarks only @@ -828,6 +828,6 @@ const std::vector> & llama_internal #endif // LLAMA_API_INTERNAL -template void print_fields(const T& obj); + #endif // LLAMA_H diff --git a/print.hpp b/print.hpp index d0679eca1..9c17550c0 100644 --- a/print.hpp +++ b/print.hpp @@ -4,65 +4,373 @@ //#include #include "llama.h" -REFL_TYPE(ggml_object) -REFL_END -REFL_TYPE(ggml_tensor) -REFL_END -REFL_TYPE(ggml_cplan ) -REFL_END -REFL_TYPE(ggml_hash_set ) -REFL_END -REFL_TYPE(ggml_cgraph ) -REFL_END -REFL_TYPE(ggml_scratch ) -REFL_END REFL_TYPE(ggml_init_params ) REFL_END -REFL_TYPE(ggml_compute_params ) -REFL_END -REFL_TYPE(ggml_opt_params ) -REFL_END + REFL_TYPE(ggml_opt_params::ggml_adam) REFL_END + REFL_TYPE(ggml_opt_params::ggml_lbfgs) REFL_END -REFL_TYPE(ggml_opt_context ) -REFL_END + + REFL_TYPE(ggml_opt_context::ggml_grad ) REFL_END + REFL_TYPE(gpt_params ) REFL_END -REFL_TYPE(gguf_init_params ) -REFL_END -REFL_TYPE(ggml_something ) -REFL_END + + REFL_TYPE(llama_sampling_context ) REFL_END + REFL_TYPE(llama_token_data ) REFL_END -REFL_TYPE(llama_model ) -REFL_END + + REFL_TYPE(llama_token_data_array ) REFL_END + REFL_TYPE(llama_batch ) REFL_END -REFL_TYPE(llama_model_params ) -REFL_END -REFL_TYPE(llama_context_params ) -REFL_END -REFL_TYPE(llama_context ) -REFL_END -REFL_TYPE(llama_model_quantize_params ) -REFL_END -REFL_TYPE(llama_grammar_element ) -REFL_END -REFL_TYPE(llama_timings ) -REFL_END -REFL_TYPE(llama_beam_view ) -REFL_END -REFL_TYPE(llama_beams_state ) + + +REFL_TYPE(ggml_object) + REFL_FIELD(offs) REFL_END +REFL_TYPE(ggml_tensor) + REFL_FIELD(type) +REFL_END + +REFL_TYPE(ggml_cplan) + REFL_FIELD(work_size) +REFL_END + +REFL_TYPE(ggml_hash_set) + REFL_FIELD(size) +REFL_END + +REFL_TYPE(ggml_cgraph) + REFL_FIELD(size) +REFL_END + +REFL_TYPE(ggml_scratch) + REFL_FIELD(offs) +REFL_END + +REFL_TYPE(ggml_compute_params) + REFL_FIELD(type) +REFL_END + +REFL_TYPE(ggml_opt_params) + REFL_FIELD(type) +REFL_END + +REFL_TYPE(ggml_opt_context) + REFL_FIELD(ctx) +REFL_END + +//REFL_TYPE(gguf_context) +//REFL_END + +REFL_TYPE(gguf_init_params) +REFL_END + +REFL_TYPE(ggml_something) + REFL_FIELD(type_name) +REFL_END + + +//REFL_TYPE() +// REFL_FIELD(d) +//REFL_TYPE() + +// incomplete ttype +// REFL_TYPE(ggml_context) +// REFL_FIELD(mem_size) +// REFL_FIELD(mem_buffer) +// REFL_END + +//REFL_TYPE(ggml_context_container) +// REFL_FIELD(used) +// REFL_FIELD(context) +//REFL_END + +// REFL_TYPE(ggml_numa_node) +// REFL_FIELD(cpus) +// REFL_FIELD(n_cpus) +// REFL_END + +// REFL_TYPE(ggml_numa_nodes) +// REFL_FIELD(nodes) +// REFL_FIELD(n_nodes) +// REFL_END + +// REFL_TYPE(ggml_state) +// REFL_FIELD(contexts) +// REFL_FIELD(numa) +// REFL_END + +// REFL_TYPE(gguf_str) +// REFL_FIELD(n) +// REFL_FIELD(data) +// REFL_END + +// REFL_TYPE(ggml_map_custom1_op_params) +// REFL_FIELD(fun) +// REFL_FIELD(n_tasks) +// REFL_END + +// REFL_TYPE(ggml_map_custom2_op_params) +// REFL_FIELD(fun) +// REFL_FIELD(n_tasks) +// REFL_END + +// REFL_TYPE(ggml_map_custom3_op_params) +// REFL_FIELD(fun) +// REFL_FIELD(n_tasks) +// REFL_END + +// REFL_TYPE(hash_map) +// REFL_FIELD(set) +// REFL_FIELD(vals) +// REFL_END +// REFL_TYPE(ggml_compute_state_shared) +// REFL_FIELD(cgraph) +// REFL_FIELD(cplan) +// REFL_END +// REFL_TYPE(ggml_compute_state) +// REFL_FIELD(thrd) +// REFL_FIELD(ith) +// REFL_END +// REFL_TYPE(ggml_lbfgs_iteration_data) +// REFL_FIELD(alpha) +// REFL_FIELD(ys) +// REFL_END +//REFL_TYPE() +// REFL_FIELD(type) +//REFL_END +// REFL_TYPE(gguf_kv) +// REFL_FIELD(key) +// REFL_FIELD(type) +// REFL_END + +// REFL_TYPE(gguf_header) +// REFL_FIELD(magic) +// REFL_FIELD(version) +// REFL_END + +// REFL_TYPE(gguf_tensor_info) +// REFL_FIELD(name) +// REFL_FIELD(n_dims) +// REFL_END + +REFL_TYPE(gguf_context) +// REFL_FIELD(header) +// REFL_FIELD(kv) +REFL_END + +// REFL_TYPE(gguf_buf) +// REFL_FIELD(data) +// REFL_FIELD(size) +// REFL_END + +//REFL_TYPE(llama_token_data) +//REFL_END + + +REFL_TYPE(llama_model_params) + REFL_FIELD(n_gpu_layers) +REFL_END +REFL_TYPE(llama_context_params) + REFL_FIELD(seed) +REFL_END +REFL_TYPE(llama_model_quantize_params) + REFL_FIELD(nthread) +REFL_END + +REFL_TYPE(llama_grammar_element) +REFL_END + +REFL_TYPE(llama_timings) + REFL_FIELD(t_start_ms) +REFL_END +REFL_TYPE(llama_beam_view) + REFL_FIELD(tokens) +REFL_END + +REFL_TYPE(llama_beams_state) + REFL_FIELD(beam_views) +REFL_END + +//REFL_TYPE(ggml_backend) +//REFL_END + +REFL_TYPE(ggml_backend_buffer) +REFL_END + +//REFL_TYPE(ggml_allocr) +//REFL_END + +//REFL_TYPE(ggml_tallocr) +//REFL_END + +//REFL_TYPE(ggml_gallocr) +//REFL_END + + +//REFL_TYPE(llama_buffer) +//REFL_FIELD(data) +//REFL_FIELD(size) +//REFL_END + + +// REFL_TYPE(llama_file) +// REFL_FIELD(fp) +// REFL_FIELD(size) +// REFL_END + + +// REFL_TYPE(llama_mmap) +// REFL_FIELD(addr) +// REFL_FIELD(size) +// REFL_END + + +// REFL_TYPE(llama_mlock) +// REFL_FIELD(addr) +// REFL_FIELD(size) +// REFL_END + +//REFL_TYPE(llama_state) +// REFL_FIELD(log_callback) +// REFL_FIELD(log_callback_user_data) +// REFL_END + + +// REFL_TYPE(llama_hparams) +// REFL_FIELD(vocab_only) +// REFL_FIELD(n_vocab) +// REFL_END + + +//REFL_TYPE(llama_cparams) +// REFL_FIELD(n_ctx) +// REFL_FIELD(n_batch) +//REFL_END + +//REFL_TYPE(llama_layer) +// REFL_FIELD(attn_norm) +// REFL_FIELD(attn_norm_b) +//REFL_END + +// REFL_TYPE(llama_kv_cell) +// REFL_FIELD(pos) +// REFL_FIELD(delta) +// REFL_END + +// REFL_TYPE(llama_kv_cache) +// REFL_FIELD(has_shift) +// REFL_FIELD(head) +// REFL_END + +// REFL_TYPE(llama_vocab) +// REFL_END + +REFL_TYPE(llama_model) +// REFL_FIELD(type) +// REFL_FIELD(arch) +REFL_END + +REFL_TYPE(llama_context) +REFL_END + +// REFL_TYPE(llama_model_loader) +// REFL_FIELD(n_kv) +// REFL_FIELD(n_tensors) +// REFL_END + +// REFL_TYPE(llm_build_context) +// REFL_FIELD(model) +// REFL_FIELD(hparams) +// REFL_END + +// REFL_TYPE(llm_offload_trie) +// REFL_END + +// REFL_TYPE(llm_symbol) +// REFL_FIELD(prev) +// REFL_END + +// REFL_TYPE(llm_bigram_spm) +// REFL_END + +// REFL_TYPE(llm_tokenizer_spm) +// REFL_END + +// REFL_TYPE(llm_bigram_bpe) +// REFL_END + +// REFL_TYPE(llm_tokenizer_bpe) +// REFL_END + + +// REFL_TYPE(fragment_buffer_variant) +// REFL_END + + +// REFL_TYPE(llama_partial_utf8) +// REFL_FIELD(value) +// REFL_FIELD(n_remain) +// REFL_END + + +REFL_TYPE(llama_grammar) +// REFL_FIELD(rules) +// REFL_FIELD(stacks) +REFL_END + + +//REFL_TYPE(llama_grammar_candidate) +// REFL_FIELD(index) +// REFL_FIELD(code_points) +//REFL_END + + +// REFL_TYPE(llama_beam) +// REFL_FIELD(tokens) +// REFL_FIELD(p) +// REFL_END + + +// REFL_TYPE(llama_logit_info) +// REFL_FIELD(logits) +// REFL_FIELD(n_vocab) +// REFL_END + +// REFL_TYPE(llama_beam_search_data) +// REFL_FIELD(ctx) +// REFL_FIELD(n_beams) +// REFL_END + + +// REFL_TYPE(quantize_state_internal) +// REFL_FIELD(model) +// REFL_FIELD(params) +// REFL_END + +// REFL_TYPE(llama_data_context) +// REFL_END + +// REFL_TYPE(llama_data_buffer_context) +// REFL_FIELD(ptr) +// REFL_END + +// REFL_TYPE(llama_data_file_context) +// REFL_FIELD(file) +// REFL_END + // // A simple struct with some fields and a function // // A custom attribute to mark some fields as hidden struct hidden : refl::attr::usage::field {}; diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp index a2459a286..a58e55562 100644 --- a/tests/test-quantize-fns.cpp +++ b/tests/test-quantize-fns.cpp @@ -115,11 +115,11 @@ int main(int argc, char * argv[]) { generate_data(1.0, test_data2.size(), test_data2.data()); // Initialize GGML, ensures float conversion tables are initialized - struct ggml_init_params ggml_params = { + struct ggml_init_params ggml_params( /* .mem_size = */ 1*1024, /* .mem_buffer = */ NULL, - /* .no_alloc = */ true, - }; + /* .no_alloc = */ true + ); struct ggml_context * ctx = ggml_init(ggml_params); int num_failed = 0; diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp index 88fac0e23..dccfe087b 100644 --- a/tests/test-quantize-perf.cpp +++ b/tests/test-quantize-perf.cpp @@ -261,11 +261,11 @@ int main(int argc, char * argv[]) { // Initialize GGML, ensures float conversion tables are initialized - struct ggml_init_params ggml_params = { + struct ggml_init_params ggml_params( /* .mem_size = */ 1*1024, /* .mem_buffer = */ NULL, - /* .no_alloc = */ true, - }; + /* .no_alloc = */ true + ); struct ggml_context * ctx = ggml_init(ggml_params); for (int i = 0; i < GGML_TYPE_COUNT; i++) { diff --git a/tests/test-rope.cpp b/tests/test-rope.cpp index 26c1f42dc..e1d92cdd4 100644 --- a/tests/test-rope.cpp +++ b/tests/test-rope.cpp @@ -124,11 +124,11 @@ static void ggml_graph_compute_helper(std::vector & buf, ggml_cgraph * } int main(int /*argc*/, const char ** /*argv*/) { - struct ggml_init_params params = { + struct ggml_init_params params( /* .mem_size = */ 128*1024*1024, /* .mem_buffer = */ NULL, - /* .no_alloc = */ false, - }; + /* .no_alloc = */ false + ); std::vector work_buffer;