This commit is contained in:
mike dupont 2023-11-22 09:04:00 -05:00
parent 6f8adf99d5
commit 6fd690fae7
12 changed files with 116 additions and 97 deletions

View file

@ -1528,9 +1528,9 @@ int main(int argc, char ** argv) {
for (int ex=0; ex<n_examples; ++ex) {
struct ggml_init_params params = {
/*.mem_size =*/ compute_size,
/*.mem_buffer =*/ compute_addr,
/*.no_alloc =*/ false,
.mem_size = compute_size,
.mem_buffer = compute_addr,
.no_alloc = false,
};
struct ggml_context * ctx0 = ggml_init(params);
@ -1603,9 +1603,9 @@ int main(int argc, char ** argv) {
printf("---\n");
for (int i=0; i<n_gen; ++i) {
struct ggml_init_params params = {
/*.mem_size =*/ compute_size,
/*.mem_buffer =*/ compute_addr,
/*.no_alloc =*/ false,
.mem_size = compute_size,
.mem_buffer = compute_addr,
.no_alloc = false,
};
struct ggml_context * ctx0 = ggml_init(params);

View file

@ -141,9 +141,9 @@ int main(int argc, char ** argv) {
printf("Allocating Memory of size %zi bytes, %zi MB\n",ctx_size, (ctx_size/1024/1024));
struct ggml_init_params params = {
/*.mem_size =*/ ctx_size,
/*.mem_buffer =*/ NULL,
/* no_alloc =*/ 0
.mem_size = ctx_size,
.mem_buffer = NULL,
.no_alloc = 0
};
ctx = ggml_init(params);

View file

@ -554,8 +554,8 @@ static void load_vocab(const char *filename, Config *config, struct llama_vocab
struct ggml_context * ctx_data = NULL;
struct gguf_init_params params = {
/*.no_alloc = */ false,
/*.ctx = */ &ctx_data,
.no_alloc = false,
.ctx = &ctx_data,
};
struct gguf_context * ctx = gguf_init_from_file(filename, params);

View file

@ -295,8 +295,8 @@ static void init_model(struct llama_model * input, struct my_llama_model * model
// get parameters directly from gguf file
{
struct gguf_init_params params = {
/*.no_alloc = */ false,
/*.ctx = */ NULL,
.no_alloc = false,
.ctx = NULL,
};
struct gguf_context * mctx = gguf_init_from_file(fn_model, params);
@ -1709,9 +1709,9 @@ int main(int argc, char ** argv) {
// context for input tensors without their data
struct ggml_init_params ctx_input_params = {
ggml_tensor_overhead() * 2, // mem_size
NULL, // mem_buffer
true, // no_alloc
.mem_size= ggml_tensor_overhead() * 2, // mem_size
.mem_buffer=NULL, // mem_buffer
.no_alloc=true, // no_alloc
};
struct ggml_context * ctx_input = ggml_init(ctx_input_params);
@ -1738,9 +1738,9 @@ int main(int argc, char ** argv) {
(params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true))
);
struct ggml_init_params ctx_compute_params = {
estimated_compute_size_wo_data, // mem_size
NULL, // mem_buffer
true, // no_alloc
.mem_size=estimated_compute_size_wo_data, // mem_size
.mem_buffer=NULL, // mem_buffer
.no_alloc=true, // no_alloc
};
struct ggml_context * ctx_compute = NULL;
@ -1905,9 +1905,9 @@ int main(int argc, char ** argv) {
// context for work buffer
struct ggml_init_params ctx_work_params = {
max_work_size, // mem_size
NULL, // mem_buffer
false, // no_alloc
.mem_size= max_work_size, // mem_size
.mem_buffer = NULL, // mem_buffer
.no_alloc = false, // no_alloc
};
struct ggml_context * ctx_work = ggml_init(ctx_work_params);

View file

@ -41,9 +41,9 @@ static bool gguf_ex_write(const std::string & fname) {
gguf_set_arr_str (ctx, "some.parameter.arr.str", std::vector<const char *>{ "hello", "world", "!" }.data(), 3);
struct ggml_init_params params = {
/*.mem_size =*/ 128ull*1024ull*1024ull,
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ false,
.mem_size = 128ull*1024ull*1024ull,
.mem_buffer = NULL,
.no_alloc = false,
};
struct ggml_context * ctx_data = ggml_init(params);
@ -87,8 +87,8 @@ static bool gguf_ex_write(const std::string & fname) {
// just read tensor info
static bool gguf_ex_read_0(const std::string & fname) {
struct gguf_init_params params = {
/*.no_alloc = */ false,
/*.ctx = */ NULL,
.no_alloc = false,
.ctx = NULL,
};
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
@ -147,8 +147,8 @@ static bool gguf_ex_read_1(const std::string & fname) {
struct ggml_context * ctx_data = NULL;
struct gguf_init_params params = {
/*.no_alloc = */ false,
/*.ctx = */ &ctx_data,
.no_alloc = false,
.ctx = &ctx_data,
};
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);

View file

@ -256,9 +256,9 @@ static ggml_cgraph * clip_image_build_graph(const clip_ctx * ctx, const clip_ima
const auto & buf_compute = ctx->buf_compute;
struct ggml_init_params params = {
/*.mem_size =*/ buf_compute.size,
/*.mem_buffer =*/ buf_compute.data,
/*.no_alloc =*/ false,
.mem_size = buf_compute.size,
.mem_buffer = buf_compute.data,
.no_alloc = false,
};
params.no_alloc = true;
@ -456,8 +456,8 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
struct ggml_context * meta = NULL;
struct gguf_init_params params = {
/*.no_alloc = */ true,
/*.ctx = */ &meta,
.no_alloc = true,
.ctx = &meta,
};
struct gguf_context * ctx = gguf_init_from_file(fname, params);
@ -553,9 +553,9 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
// load tensors
{
struct ggml_init_params params = {
/*.mem_size =*/ ctx_size,
/*.mem_buffer =*/ NULL,
/*.no_alloc =*/ false,
.mem_size = ctx_size,
.mem_buffer = NULL,
.no_alloc = false,
};
new_clip->ctx = ggml_init(params);

View file

@ -601,8 +601,8 @@ static void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vo
// set vocab by copying from vocab_model gguf file
{
struct gguf_init_params params = {
/*.no_alloc = */ false,
/*.ctx = */ NULL,
.no_alloc = false,
.ctx = NULL,
};
struct gguf_context * vctx = gguf_init_from_file(fn_vocab_model, params);
@ -1086,9 +1086,9 @@ int main(int argc, char ** argv) {
// context for input tensors without their data
struct ggml_init_params ctx_input_params = {
ggml_tensor_overhead() * 2, // mem_size
NULL, // mem_buffer
true, // no_alloc
.mem_size = ggml_tensor_overhead() * 2, // mem_size
.mem_buffer = NULL, // mem_buffer
.no_alloc = true, // no_alloc
};
struct ggml_context * ctx_input = ggml_init(ctx_input_params);
@ -1115,9 +1115,9 @@ int main(int argc, char ** argv) {
(params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true))
);
struct ggml_init_params ctx_compute_params = {
estimated_compute_size_wo_data, // mem_size
NULL, // mem_buffer
true, // no_alloc
.mem_size = estimated_compute_size_wo_data, // mem_size
.mem_buffer= NULL, // mem_buffer
.no_alloc = true, // no_alloc
};
struct ggml_context * ctx_compute = NULL;
@ -1268,9 +1268,9 @@ int main(int argc, char ** argv) {
// context for work buffer
struct ggml_init_params ctx_work_params = {
max_work_size, // mem_size
NULL, // mem_buffer
false, // no_alloc
.mem_size= max_work_size, //
.mem_buffer= NULL, //
.no_alloc=false, //
};
struct ggml_context * ctx_work = ggml_init(ctx_work_params);

View file

@ -351,15 +351,17 @@ struct ggml_gallocr {
ggml_gallocr_t ggml_gallocr_new(void) {
ggml_gallocr_t galloc = (ggml_gallocr_t)malloc(sizeof(struct ggml_gallocr));
ggml_hash_set hs = {.size=0, .keys=NULL};
*galloc = (struct ggml_gallocr) {
/*.talloc = */ NULL,
/*.hash_set = */ {0},
/*.hash_values = */ NULL,
/*.hash_values_size = */ 0,
/*.hash_allocs = */ NULL,
/*.parse_seq = */ NULL,
/*.parse_seq_len = */ 0,
.talloc = NULL,
.hash_set =hs,
.hash_values = NULL,
.hash_values_size = 0,
.hash_allocs = NULL,
.parse_seq = NULL,
.parse_seq_len = 0,
};
//((*galloc).hash_set)[0] = 0;
return galloc;
}
@ -706,8 +708,8 @@ struct ggml_allocr {
static ggml_allocr_t ggml_allocr_new_impl(ggml_tallocr_t talloc) {
ggml_allocr_t alloc = (ggml_allocr_t)malloc(sizeof(struct ggml_allocr));
*alloc = (struct ggml_allocr) {
/*.talloc = */ talloc,
/*.galloc = */ ggml_gallocr_new(),
.talloc = talloc,
.galloc = ggml_gallocr_new(),
};
return alloc;
}

View file

@ -587,9 +587,9 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g
sched->n_splits = 0;
struct ggml_init_params params = {
/*.mem_size = */ sizeof(sched->context_buffer),
/*.mem_buffer = */ sched->context_buffer,
/*.no_alloc = */ true
.mem_size = sizeof(sched->context_buffer),
.mem_buffer = sched->context_buffer,
.no_alloc = true
};
if (sched->ctx != NULL) {

View file

@ -2,6 +2,8 @@
//https://github.com/Neargye/magic_enum.git
#include <magic_enum.hpp>
#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
#define _USE_MATH_DEFINES // For M_PI on MSVC
@ -16136,11 +16138,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
// all other threads are finished and spinning
// do finalize and init here so we don't have synchronize again
struct ggml_compute_params params = {
/*.type =*/ GGML_TASK_FINALIZE,
/*.ith =*/ 0,
/*.nth =*/ 0,
/*.wsize =*/ cplan->work_size,
/*.wdata =*/ cplan->work_data,
.type = GGML_TASK_FINALIZE,
.ith = 0,
.nth = 0,
.wsize = cplan->work_size,
.wdata = cplan->work_data,
};
if (node_n != -1) {
@ -16219,11 +16221,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
const int n_tasks = ggml_get_n_tasks(node, n_threads);
struct ggml_compute_params params = {
/*.type =*/ GGML_TASK_COMPUTE,
/*.ith =*/ state->ith,
/*.nth =*/ n_tasks,
/*.wsize =*/ cplan->work_size,
/*.wdata =*/ cplan->work_data,
.type = GGML_TASK_COMPUTE,
.ith = state->ith,
.nth = n_tasks,
.wsize = cplan->work_size,
.wdata = cplan->work_data,
};
if (state->ith < n_tasks) {

33
ggml.h
View file

@ -1,5 +1,6 @@
#pragma once
#include<refl-cpp/refl.hpp>
//
// GGML Tensor Library
//
@ -465,7 +466,7 @@ extern "C" {
};
// ggml object
struct ggml_object {
struct ggml_object : refl::attr::usage::type {
size_t offs;
size_t size;
@ -479,7 +480,7 @@ extern "C" {
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
// n-dimensional tensor
struct ggml_tensor {
struct ggml_tensor : refl::attr::usage::type{
enum ggml_type type;
enum ggml_backend_type backend;
@ -524,7 +525,7 @@ extern "C" {
// the compute plan that needs to be prepared for ggml_graph_compute()
// since https://github.com/ggerganov/ggml/issues/287
struct ggml_cplan {
struct ggml_cplan : refl::attr::usage::type{
size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
@ -541,13 +542,13 @@ extern "C" {
GGML_CGRAPH_EVAL_ORDER_COUNT
};
struct ggml_hash_set {
struct ggml_hash_set : refl::attr::usage::type{
size_t size;
struct ggml_tensor ** keys;
};
// computation graph
struct ggml_cgraph {
struct ggml_cgraph : refl::attr::usage::type{
int size;
int n_nodes;
int n_leafs;
@ -567,7 +568,7 @@ extern "C" {
};
// scratch buffer
struct ggml_scratch {
struct ggml_scratch : refl::attr::usage::type{
size_t offs;
size_t size;
void * data;
@ -579,7 +580,7 @@ extern "C" {
{}
};
struct ggml_init_params {
struct ggml_init_params : refl::attr::usage::type{
// memory pool
size_t mem_size; // bytes
void * mem_buffer; // if NULL, memory will be allocated internally
@ -597,7 +598,7 @@ extern "C" {
GGML_TASK_FINALIZE,
};
struct ggml_compute_params {
struct ggml_compute_params : refl::attr::usage::type{
enum ggml_task_type type;
// ith = thread index, nth = number of threads
@ -1835,7 +1836,7 @@ extern "C" {
//
// see ggml.c (ggml_opt_default_params) for default values
//
struct ggml_opt_params {
struct ggml_opt_params : refl::attr::usage::type{
enum ggml_opt_type type;
size_t graph_size;
@ -1865,7 +1866,7 @@ extern "C" {
int n_gradient_accumulation;
// ADAM parameters
struct {
struct ggml_adam: refl::attr::usage::type{
int n_iter;
float sched; // schedule multiplier (fixed, decay or warmup)
@ -1881,7 +1882,7 @@ extern "C" {
} adam;
// LBFGS parameters
struct {
struct ggml_lbfgs: refl::attr::usage::type{
int m; // number of corrections to approximate the inv. Hessian
int n_iter;
int max_linesearch;
@ -1896,7 +1897,7 @@ extern "C" {
} lbfgs;
};
struct ggml_opt_context {
struct ggml_opt_context : refl::attr::usage::type{
struct ggml_context * ctx;
struct ggml_opt_params params;
@ -1908,7 +1909,7 @@ extern "C" {
float loss_before;
float loss_after;
struct {
struct ggml_grad : refl::attr::usage::type{
struct ggml_tensor * g; // current gradient
struct ggml_tensor * m; // first moment
struct ggml_tensor * v; // second moment
@ -1918,7 +1919,7 @@ extern "C" {
int n_no_improvement;
} adam;
struct {
struct ggml_params : refl::attr::usage::type{
struct ggml_tensor * x; // current parameters
struct ggml_tensor * xp; // previous parameters
struct ggml_tensor * g; // current gradient
@ -2011,7 +2012,7 @@ extern "C" {
struct gguf_context;
struct gguf_init_params {
struct gguf_init_params : refl::attr::usage::type{
bool no_alloc;
// if not NULL, create a ggml_context and allocate the tensor data in it
@ -2148,7 +2149,7 @@ extern "C" {
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
typedef void (*ggml_vec_dot_t) (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
typedef struct {
typedef struct ggml_something : refl::attr::usage::type{
const char * type_name;
int blck_size;
size_t type_size;

View file

@ -1786,8 +1786,8 @@ struct llama_model_loader {
llama_model_loader(const std::string & fname, bool use_mmap) : file(fname.c_str(), "rb") {
struct gguf_init_params params = {
/*.no_alloc = */ true,
/*.ctx = */ &ctx_meta,
.no_alloc = true,
.ctx = &ctx_meta,
};
ctx_gguf = gguf_init_from_file(fname.c_str(), params);
@ -2677,9 +2677,9 @@ static void llm_load_tensors(
}
struct ggml_init_params params = {
/*.mem_size =*/ model.buf.size,
/*.mem_buffer =*/ model.buf.data,
/*.no_alloc =*/ ml.use_mmap,
.mem_size = model.buf.size,
.mem_buffer = model.buf.data,
.no_alloc = ml.use_mmap,
};
model.ctx = ggml_init(params);
@ -3843,9 +3843,9 @@ struct llm_build_context {
void init() {
struct ggml_init_params params = {
/*.mem_size =*/ buf_compute.size,
/*.mem_buffer =*/ buf_compute.data,
/*.no_alloc =*/ true,
.mem_size = buf_compute.size,
.mem_buffer = buf_compute.data,
.no_alloc = true,
};
ctx0 = ggml_init(params);
@ -8427,7 +8427,10 @@ void llama_backend_init(bool numa) {
// needed to initialize f16 tables
{
struct ggml_init_params params = { 0, NULL, false };
struct ggml_init_params params = { .mem_size = 0,
.mem_buffer = NULL,
.no_alloc = false
};
struct ggml_context * ctx = ggml_init(params);
ggml_free(ctx);
}
@ -8998,7 +9001,13 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
if (kv_buf_size) {
const size_t elt_size = ggml_element_size(kv_self.k);
ggml_context * cpy_ctx = ggml_init({ 6*ggml_tensor_overhead() + ggml_graph_overhead(), NULL, /* no_alloc */ true });
ggml_init_params ip = {
.mem_size = 6*ggml_tensor_overhead() + ggml_graph_overhead(),
.mem_buffer =NULL,
.no_alloc = /* no_alloc */ true
};
ggml_context * cpy_ctx = ggml_init( ip);
ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
@ -9126,7 +9135,12 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
const size_t elt_size = ggml_element_size(kv_self.k);
ggml_context * cpy_ctx = ggml_init({ 6*ggml_tensor_overhead() + ggml_graph_overhead(), NULL, /* no_alloc */ true });
ggml_init_params ip {
.mem_size= 6*ggml_tensor_overhead() + ggml_graph_overhead(),
.mem_buffer=NULL,
.no_alloc=true };
ggml_context * cpy_ctx = ggml_init(ip);
ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);