From 6fd690fae7f38024492caebb0233b294482e39a7 Mon Sep 17 00:00:00 2001 From: mike dupont Date: Wed, 22 Nov 2023 09:04:00 -0500 Subject: [PATCH] running --- examples/baby-llama/baby-llama.cpp | 12 +++---- examples/benchmark/benchmark-matmult.cpp | 6 ++-- .../convert-llama2c-to-ggml.cpp | 4 +-- examples/finetune/finetune.cpp | 22 ++++++------ examples/gguf/gguf.cpp | 14 ++++---- examples/llava/clip.cpp | 16 ++++----- .../train-text-from-scratch.cpp | 22 ++++++------ ggml-alloc.cpp | 20 ++++++----- ggml-backend.cpp | 6 ++-- ggml.cpp | 22 ++++++------ ggml.h | 33 ++++++++--------- llama.cpp | 36 +++++++++++++------ 12 files changed, 116 insertions(+), 97 deletions(-) diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp index 8155101d0..697e003e3 100644 --- a/examples/baby-llama/baby-llama.cpp +++ b/examples/baby-llama/baby-llama.cpp @@ -1528,9 +1528,9 @@ int main(int argc, char ** argv) { for (int ex=0; ex{ "hello", "world", "!" }.data(), 3); struct ggml_init_params params = { - /*.mem_size =*/ 128ull*1024ull*1024ull, - /*.mem_buffer =*/ NULL, - /*.no_alloc =*/ false, + .mem_size = 128ull*1024ull*1024ull, + .mem_buffer = NULL, + .no_alloc = false, }; struct ggml_context * ctx_data = ggml_init(params); @@ -87,8 +87,8 @@ static bool gguf_ex_write(const std::string & fname) { // just read tensor info static bool gguf_ex_read_0(const std::string & fname) { struct gguf_init_params params = { - /*.no_alloc = */ false, - /*.ctx = */ NULL, + .no_alloc = false, + .ctx = NULL, }; struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params); @@ -147,8 +147,8 @@ static bool gguf_ex_read_1(const std::string & fname) { struct ggml_context * ctx_data = NULL; struct gguf_init_params params = { - /*.no_alloc = */ false, - /*.ctx = */ &ctx_data, + .no_alloc = false, + .ctx = &ctx_data, }; struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params); diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index fc0656c23..bfd0d6f98 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -256,9 +256,9 @@ static ggml_cgraph * clip_image_build_graph(const clip_ctx * ctx, const clip_ima const auto & buf_compute = ctx->buf_compute; struct ggml_init_params params = { - /*.mem_size =*/ buf_compute.size, - /*.mem_buffer =*/ buf_compute.data, - /*.no_alloc =*/ false, + .mem_size = buf_compute.size, + .mem_buffer = buf_compute.data, + .no_alloc = false, }; params.no_alloc = true; @@ -456,8 +456,8 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { struct ggml_context * meta = NULL; struct gguf_init_params params = { - /*.no_alloc = */ true, - /*.ctx = */ &meta, + .no_alloc = true, + .ctx = &meta, }; struct gguf_context * ctx = gguf_init_from_file(fname, params); @@ -553,9 +553,9 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { // load tensors { struct ggml_init_params params = { - /*.mem_size =*/ ctx_size, - /*.mem_buffer =*/ NULL, - /*.no_alloc =*/ false, + .mem_size = ctx_size, + .mem_buffer = NULL, + .no_alloc = false, }; new_clip->ctx = ggml_init(params); diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index f605f25a1..f7f3792f2 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -601,8 +601,8 @@ static void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vo // set vocab by copying from vocab_model gguf file { struct gguf_init_params params = { - /*.no_alloc = */ false, - /*.ctx = */ NULL, + .no_alloc = false, + .ctx = NULL, }; struct gguf_context * vctx = gguf_init_from_file(fn_vocab_model, params); @@ -1086,9 +1086,9 @@ int main(int argc, char ** argv) { // context for input tensors without their data struct ggml_init_params ctx_input_params = { - ggml_tensor_overhead() * 2, // mem_size - NULL, // mem_buffer - true, // no_alloc + .mem_size = ggml_tensor_overhead() * 2, // mem_size + .mem_buffer = NULL, // mem_buffer + .no_alloc = true, // no_alloc }; struct ggml_context * ctx_input = ggml_init(ctx_input_params); @@ -1115,9 +1115,9 @@ int main(int argc, char ** argv) { (params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true)) ); struct ggml_init_params ctx_compute_params = { - estimated_compute_size_wo_data, // mem_size - NULL, // mem_buffer - true, // no_alloc + .mem_size = estimated_compute_size_wo_data, // mem_size + .mem_buffer= NULL, // mem_buffer + .no_alloc = true, // no_alloc }; struct ggml_context * ctx_compute = NULL; @@ -1268,9 +1268,9 @@ int main(int argc, char ** argv) { // context for work buffer struct ggml_init_params ctx_work_params = { - max_work_size, // mem_size - NULL, // mem_buffer - false, // no_alloc + .mem_size= max_work_size, // + .mem_buffer= NULL, // + .no_alloc=false, // }; struct ggml_context * ctx_work = ggml_init(ctx_work_params); diff --git a/ggml-alloc.cpp b/ggml-alloc.cpp index cdfe4caf6..4c0c914d7 100644 --- a/ggml-alloc.cpp +++ b/ggml-alloc.cpp @@ -351,15 +351,17 @@ struct ggml_gallocr { ggml_gallocr_t ggml_gallocr_new(void) { ggml_gallocr_t galloc = (ggml_gallocr_t)malloc(sizeof(struct ggml_gallocr)); + ggml_hash_set hs = {.size=0, .keys=NULL}; *galloc = (struct ggml_gallocr) { - /*.talloc = */ NULL, - /*.hash_set = */ {0}, - /*.hash_values = */ NULL, - /*.hash_values_size = */ 0, - /*.hash_allocs = */ NULL, - /*.parse_seq = */ NULL, - /*.parse_seq_len = */ 0, + .talloc = NULL, + .hash_set =hs, + .hash_values = NULL, + .hash_values_size = 0, + .hash_allocs = NULL, + .parse_seq = NULL, + .parse_seq_len = 0, }; + //((*galloc).hash_set)[0] = 0; return galloc; } @@ -706,8 +708,8 @@ struct ggml_allocr { static ggml_allocr_t ggml_allocr_new_impl(ggml_tallocr_t talloc) { ggml_allocr_t alloc = (ggml_allocr_t)malloc(sizeof(struct ggml_allocr)); *alloc = (struct ggml_allocr) { - /*.talloc = */ talloc, - /*.galloc = */ ggml_gallocr_new(), + .talloc = talloc, + .galloc = ggml_gallocr_new(), }; return alloc; } diff --git a/ggml-backend.cpp b/ggml-backend.cpp index 51f7018d1..2a43b1277 100644 --- a/ggml-backend.cpp +++ b/ggml-backend.cpp @@ -587,9 +587,9 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g sched->n_splits = 0; struct ggml_init_params params = { - /*.mem_size = */ sizeof(sched->context_buffer), - /*.mem_buffer = */ sched->context_buffer, - /*.no_alloc = */ true + .mem_size = sizeof(sched->context_buffer), + .mem_buffer = sched->context_buffer, + .no_alloc = true }; if (sched->ctx != NULL) { diff --git a/ggml.cpp b/ggml.cpp index 963b44e3f..c0ea3704e 100644 --- a/ggml.cpp +++ b/ggml.cpp @@ -2,6 +2,8 @@ //https://github.com/Neargye/magic_enum.git #include + + #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows #define _USE_MATH_DEFINES // For M_PI on MSVC @@ -16136,11 +16138,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { // all other threads are finished and spinning // do finalize and init here so we don't have synchronize again struct ggml_compute_params params = { - /*.type =*/ GGML_TASK_FINALIZE, - /*.ith =*/ 0, - /*.nth =*/ 0, - /*.wsize =*/ cplan->work_size, - /*.wdata =*/ cplan->work_data, + .type = GGML_TASK_FINALIZE, + .ith = 0, + .nth = 0, + .wsize = cplan->work_size, + .wdata = cplan->work_data, }; if (node_n != -1) { @@ -16219,11 +16221,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { const int n_tasks = ggml_get_n_tasks(node, n_threads); struct ggml_compute_params params = { - /*.type =*/ GGML_TASK_COMPUTE, - /*.ith =*/ state->ith, - /*.nth =*/ n_tasks, - /*.wsize =*/ cplan->work_size, - /*.wdata =*/ cplan->work_data, + .type = GGML_TASK_COMPUTE, + .ith = state->ith, + .nth = n_tasks, + .wsize = cplan->work_size, + .wdata = cplan->work_data, }; if (state->ith < n_tasks) { diff --git a/ggml.h b/ggml.h index 6d625fd67..9f5f20da9 100644 --- a/ggml.h +++ b/ggml.h @@ -1,5 +1,6 @@ #pragma once +#include // // GGML Tensor Library // @@ -465,7 +466,7 @@ extern "C" { }; // ggml object - struct ggml_object { + struct ggml_object : refl::attr::usage::type { size_t offs; size_t size; @@ -479,7 +480,7 @@ extern "C" { static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object); // n-dimensional tensor - struct ggml_tensor { + struct ggml_tensor : refl::attr::usage::type{ enum ggml_type type; enum ggml_backend_type backend; @@ -524,7 +525,7 @@ extern "C" { // the compute plan that needs to be prepared for ggml_graph_compute() // since https://github.com/ggerganov/ggml/issues/287 - struct ggml_cplan { + struct ggml_cplan : refl::attr::usage::type{ size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()` uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()` @@ -541,13 +542,13 @@ extern "C" { GGML_CGRAPH_EVAL_ORDER_COUNT }; - struct ggml_hash_set { + struct ggml_hash_set : refl::attr::usage::type{ size_t size; struct ggml_tensor ** keys; }; // computation graph - struct ggml_cgraph { + struct ggml_cgraph : refl::attr::usage::type{ int size; int n_nodes; int n_leafs; @@ -567,7 +568,7 @@ extern "C" { }; // scratch buffer - struct ggml_scratch { + struct ggml_scratch : refl::attr::usage::type{ size_t offs; size_t size; void * data; @@ -579,7 +580,7 @@ extern "C" { {} }; - struct ggml_init_params { + struct ggml_init_params : refl::attr::usage::type{ // memory pool size_t mem_size; // bytes void * mem_buffer; // if NULL, memory will be allocated internally @@ -597,7 +598,7 @@ extern "C" { GGML_TASK_FINALIZE, }; - struct ggml_compute_params { + struct ggml_compute_params : refl::attr::usage::type{ enum ggml_task_type type; // ith = thread index, nth = number of threads @@ -1835,7 +1836,7 @@ extern "C" { // // see ggml.c (ggml_opt_default_params) for default values // - struct ggml_opt_params { + struct ggml_opt_params : refl::attr::usage::type{ enum ggml_opt_type type; size_t graph_size; @@ -1865,7 +1866,7 @@ extern "C" { int n_gradient_accumulation; // ADAM parameters - struct { + struct ggml_adam: refl::attr::usage::type{ int n_iter; float sched; // schedule multiplier (fixed, decay or warmup) @@ -1881,7 +1882,7 @@ extern "C" { } adam; // LBFGS parameters - struct { + struct ggml_lbfgs: refl::attr::usage::type{ int m; // number of corrections to approximate the inv. Hessian int n_iter; int max_linesearch; @@ -1896,7 +1897,7 @@ extern "C" { } lbfgs; }; - struct ggml_opt_context { + struct ggml_opt_context : refl::attr::usage::type{ struct ggml_context * ctx; struct ggml_opt_params params; @@ -1908,7 +1909,7 @@ extern "C" { float loss_before; float loss_after; - struct { + struct ggml_grad : refl::attr::usage::type{ struct ggml_tensor * g; // current gradient struct ggml_tensor * m; // first moment struct ggml_tensor * v; // second moment @@ -1918,7 +1919,7 @@ extern "C" { int n_no_improvement; } adam; - struct { + struct ggml_params : refl::attr::usage::type{ struct ggml_tensor * x; // current parameters struct ggml_tensor * xp; // previous parameters struct ggml_tensor * g; // current gradient @@ -2011,7 +2012,7 @@ extern "C" { struct gguf_context; - struct gguf_init_params { + struct gguf_init_params : refl::attr::usage::type{ bool no_alloc; // if not NULL, create a ggml_context and allocate the tensor data in it @@ -2148,7 +2149,7 @@ extern "C" { typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k); typedef void (*ggml_vec_dot_t) (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y); - typedef struct { + typedef struct ggml_something : refl::attr::usage::type{ const char * type_name; int blck_size; size_t type_size; diff --git a/llama.cpp b/llama.cpp index c51829c45..6aaaef6c1 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1786,8 +1786,8 @@ struct llama_model_loader { llama_model_loader(const std::string & fname, bool use_mmap) : file(fname.c_str(), "rb") { struct gguf_init_params params = { - /*.no_alloc = */ true, - /*.ctx = */ &ctx_meta, + .no_alloc = true, + .ctx = &ctx_meta, }; ctx_gguf = gguf_init_from_file(fname.c_str(), params); @@ -2677,9 +2677,9 @@ static void llm_load_tensors( } struct ggml_init_params params = { - /*.mem_size =*/ model.buf.size, - /*.mem_buffer =*/ model.buf.data, - /*.no_alloc =*/ ml.use_mmap, + .mem_size = model.buf.size, + .mem_buffer = model.buf.data, + .no_alloc = ml.use_mmap, }; model.ctx = ggml_init(params); @@ -3843,9 +3843,9 @@ struct llm_build_context { void init() { struct ggml_init_params params = { - /*.mem_size =*/ buf_compute.size, - /*.mem_buffer =*/ buf_compute.data, - /*.no_alloc =*/ true, + .mem_size = buf_compute.size, + .mem_buffer = buf_compute.data, + .no_alloc = true, }; ctx0 = ggml_init(params); @@ -8427,7 +8427,10 @@ void llama_backend_init(bool numa) { // needed to initialize f16 tables { - struct ggml_init_params params = { 0, NULL, false }; + struct ggml_init_params params = { .mem_size = 0, + .mem_buffer = NULL, + .no_alloc = false + }; struct ggml_context * ctx = ggml_init(params); ggml_free(ctx); } @@ -8998,7 +9001,13 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat if (kv_buf_size) { const size_t elt_size = ggml_element_size(kv_self.k); - ggml_context * cpy_ctx = ggml_init({ 6*ggml_tensor_overhead() + ggml_graph_overhead(), NULL, /* no_alloc */ true }); + ggml_init_params ip = { + .mem_size = 6*ggml_tensor_overhead() + ggml_graph_overhead(), + .mem_buffer =NULL, + .no_alloc = /* no_alloc */ true + }; + + ggml_context * cpy_ctx = ggml_init( ip); ggml_cgraph * gf = ggml_new_graph(cpy_ctx); ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer); @@ -9126,7 +9135,12 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) { const size_t elt_size = ggml_element_size(kv_self.k); - ggml_context * cpy_ctx = ggml_init({ 6*ggml_tensor_overhead() + ggml_graph_overhead(), NULL, /* no_alloc */ true }); + ggml_init_params ip { + .mem_size= 6*ggml_tensor_overhead() + ggml_graph_overhead(), + .mem_buffer=NULL, + .no_alloc=true }; + + ggml_context * cpy_ctx = ggml_init(ip); ggml_cgraph * gf = ggml_new_graph(cpy_ctx); ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);