This commit is contained in:
mike dupont 2023-11-22 09:04:00 -05:00
parent 6f8adf99d5
commit 6fd690fae7
12 changed files with 116 additions and 97 deletions

View file

@ -1528,9 +1528,9 @@ int main(int argc, char ** argv) {
for (int ex=0; ex<n_examples; ++ex) { for (int ex=0; ex<n_examples; ++ex) {
struct ggml_init_params params = { struct ggml_init_params params = {
/*.mem_size =*/ compute_size, .mem_size = compute_size,
/*.mem_buffer =*/ compute_addr, .mem_buffer = compute_addr,
/*.no_alloc =*/ false, .no_alloc = false,
}; };
struct ggml_context * ctx0 = ggml_init(params); struct ggml_context * ctx0 = ggml_init(params);
@ -1603,9 +1603,9 @@ int main(int argc, char ** argv) {
printf("---\n"); printf("---\n");
for (int i=0; i<n_gen; ++i) { for (int i=0; i<n_gen; ++i) {
struct ggml_init_params params = { struct ggml_init_params params = {
/*.mem_size =*/ compute_size, .mem_size = compute_size,
/*.mem_buffer =*/ compute_addr, .mem_buffer = compute_addr,
/*.no_alloc =*/ false, .no_alloc = false,
}; };
struct ggml_context * ctx0 = ggml_init(params); struct ggml_context * ctx0 = ggml_init(params);

View file

@ -141,9 +141,9 @@ int main(int argc, char ** argv) {
printf("Allocating Memory of size %zi bytes, %zi MB\n",ctx_size, (ctx_size/1024/1024)); printf("Allocating Memory of size %zi bytes, %zi MB\n",ctx_size, (ctx_size/1024/1024));
struct ggml_init_params params = { struct ggml_init_params params = {
/*.mem_size =*/ ctx_size, .mem_size = ctx_size,
/*.mem_buffer =*/ NULL, .mem_buffer = NULL,
/* no_alloc =*/ 0 .no_alloc = 0
}; };
ctx = ggml_init(params); ctx = ggml_init(params);

View file

@ -554,8 +554,8 @@ static void load_vocab(const char *filename, Config *config, struct llama_vocab
struct ggml_context * ctx_data = NULL; struct ggml_context * ctx_data = NULL;
struct gguf_init_params params = { struct gguf_init_params params = {
/*.no_alloc = */ false, .no_alloc = false,
/*.ctx = */ &ctx_data, .ctx = &ctx_data,
}; };
struct gguf_context * ctx = gguf_init_from_file(filename, params); struct gguf_context * ctx = gguf_init_from_file(filename, params);

View file

@ -295,8 +295,8 @@ static void init_model(struct llama_model * input, struct my_llama_model * model
// get parameters directly from gguf file // get parameters directly from gguf file
{ {
struct gguf_init_params params = { struct gguf_init_params params = {
/*.no_alloc = */ false, .no_alloc = false,
/*.ctx = */ NULL, .ctx = NULL,
}; };
struct gguf_context * mctx = gguf_init_from_file(fn_model, params); struct gguf_context * mctx = gguf_init_from_file(fn_model, params);
@ -1709,9 +1709,9 @@ int main(int argc, char ** argv) {
// context for input tensors without their data // context for input tensors without their data
struct ggml_init_params ctx_input_params = { struct ggml_init_params ctx_input_params = {
ggml_tensor_overhead() * 2, // mem_size .mem_size= ggml_tensor_overhead() * 2, // mem_size
NULL, // mem_buffer .mem_buffer=NULL, // mem_buffer
true, // no_alloc .no_alloc=true, // no_alloc
}; };
struct ggml_context * ctx_input = ggml_init(ctx_input_params); struct ggml_context * ctx_input = ggml_init(ctx_input_params);
@ -1738,9 +1738,9 @@ int main(int argc, char ** argv) {
(params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true)) (params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true))
); );
struct ggml_init_params ctx_compute_params = { struct ggml_init_params ctx_compute_params = {
estimated_compute_size_wo_data, // mem_size .mem_size=estimated_compute_size_wo_data, // mem_size
NULL, // mem_buffer .mem_buffer=NULL, // mem_buffer
true, // no_alloc .no_alloc=true, // no_alloc
}; };
struct ggml_context * ctx_compute = NULL; struct ggml_context * ctx_compute = NULL;
@ -1905,9 +1905,9 @@ int main(int argc, char ** argv) {
// context for work buffer // context for work buffer
struct ggml_init_params ctx_work_params = { struct ggml_init_params ctx_work_params = {
max_work_size, // mem_size .mem_size= max_work_size, // mem_size
NULL, // mem_buffer .mem_buffer = NULL, // mem_buffer
false, // no_alloc .no_alloc = false, // no_alloc
}; };
struct ggml_context * ctx_work = ggml_init(ctx_work_params); struct ggml_context * ctx_work = ggml_init(ctx_work_params);

View file

@ -41,9 +41,9 @@ static bool gguf_ex_write(const std::string & fname) {
gguf_set_arr_str (ctx, "some.parameter.arr.str", std::vector<const char *>{ "hello", "world", "!" }.data(), 3); gguf_set_arr_str (ctx, "some.parameter.arr.str", std::vector<const char *>{ "hello", "world", "!" }.data(), 3);
struct ggml_init_params params = { struct ggml_init_params params = {
/*.mem_size =*/ 128ull*1024ull*1024ull, .mem_size = 128ull*1024ull*1024ull,
/*.mem_buffer =*/ NULL, .mem_buffer = NULL,
/*.no_alloc =*/ false, .no_alloc = false,
}; };
struct ggml_context * ctx_data = ggml_init(params); struct ggml_context * ctx_data = ggml_init(params);
@ -87,8 +87,8 @@ static bool gguf_ex_write(const std::string & fname) {
// just read tensor info // just read tensor info
static bool gguf_ex_read_0(const std::string & fname) { static bool gguf_ex_read_0(const std::string & fname) {
struct gguf_init_params params = { struct gguf_init_params params = {
/*.no_alloc = */ false, .no_alloc = false,
/*.ctx = */ NULL, .ctx = NULL,
}; };
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params); struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
@ -147,8 +147,8 @@ static bool gguf_ex_read_1(const std::string & fname) {
struct ggml_context * ctx_data = NULL; struct ggml_context * ctx_data = NULL;
struct gguf_init_params params = { struct gguf_init_params params = {
/*.no_alloc = */ false, .no_alloc = false,
/*.ctx = */ &ctx_data, .ctx = &ctx_data,
}; };
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params); struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);

View file

@ -256,9 +256,9 @@ static ggml_cgraph * clip_image_build_graph(const clip_ctx * ctx, const clip_ima
const auto & buf_compute = ctx->buf_compute; const auto & buf_compute = ctx->buf_compute;
struct ggml_init_params params = { struct ggml_init_params params = {
/*.mem_size =*/ buf_compute.size, .mem_size = buf_compute.size,
/*.mem_buffer =*/ buf_compute.data, .mem_buffer = buf_compute.data,
/*.no_alloc =*/ false, .no_alloc = false,
}; };
params.no_alloc = true; params.no_alloc = true;
@ -456,8 +456,8 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
struct ggml_context * meta = NULL; struct ggml_context * meta = NULL;
struct gguf_init_params params = { struct gguf_init_params params = {
/*.no_alloc = */ true, .no_alloc = true,
/*.ctx = */ &meta, .ctx = &meta,
}; };
struct gguf_context * ctx = gguf_init_from_file(fname, params); struct gguf_context * ctx = gguf_init_from_file(fname, params);
@ -553,9 +553,9 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
// load tensors // load tensors
{ {
struct ggml_init_params params = { struct ggml_init_params params = {
/*.mem_size =*/ ctx_size, .mem_size = ctx_size,
/*.mem_buffer =*/ NULL, .mem_buffer = NULL,
/*.no_alloc =*/ false, .no_alloc = false,
}; };
new_clip->ctx = ggml_init(params); new_clip->ctx = ggml_init(params);

View file

@ -601,8 +601,8 @@ static void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vo
// set vocab by copying from vocab_model gguf file // set vocab by copying from vocab_model gguf file
{ {
struct gguf_init_params params = { struct gguf_init_params params = {
/*.no_alloc = */ false, .no_alloc = false,
/*.ctx = */ NULL, .ctx = NULL,
}; };
struct gguf_context * vctx = gguf_init_from_file(fn_vocab_model, params); struct gguf_context * vctx = gguf_init_from_file(fn_vocab_model, params);
@ -1086,9 +1086,9 @@ int main(int argc, char ** argv) {
// context for input tensors without their data // context for input tensors without their data
struct ggml_init_params ctx_input_params = { struct ggml_init_params ctx_input_params = {
ggml_tensor_overhead() * 2, // mem_size .mem_size = ggml_tensor_overhead() * 2, // mem_size
NULL, // mem_buffer .mem_buffer = NULL, // mem_buffer
true, // no_alloc .no_alloc = true, // no_alloc
}; };
struct ggml_context * ctx_input = ggml_init(ctx_input_params); struct ggml_context * ctx_input = ggml_init(ctx_input_params);
@ -1115,9 +1115,9 @@ int main(int argc, char ** argv) {
(params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true)) (params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true))
); );
struct ggml_init_params ctx_compute_params = { struct ggml_init_params ctx_compute_params = {
estimated_compute_size_wo_data, // mem_size .mem_size = estimated_compute_size_wo_data, // mem_size
NULL, // mem_buffer .mem_buffer= NULL, // mem_buffer
true, // no_alloc .no_alloc = true, // no_alloc
}; };
struct ggml_context * ctx_compute = NULL; struct ggml_context * ctx_compute = NULL;
@ -1268,9 +1268,9 @@ int main(int argc, char ** argv) {
// context for work buffer // context for work buffer
struct ggml_init_params ctx_work_params = { struct ggml_init_params ctx_work_params = {
max_work_size, // mem_size .mem_size= max_work_size, //
NULL, // mem_buffer .mem_buffer= NULL, //
false, // no_alloc .no_alloc=false, //
}; };
struct ggml_context * ctx_work = ggml_init(ctx_work_params); struct ggml_context * ctx_work = ggml_init(ctx_work_params);

View file

@ -351,15 +351,17 @@ struct ggml_gallocr {
ggml_gallocr_t ggml_gallocr_new(void) { ggml_gallocr_t ggml_gallocr_new(void) {
ggml_gallocr_t galloc = (ggml_gallocr_t)malloc(sizeof(struct ggml_gallocr)); ggml_gallocr_t galloc = (ggml_gallocr_t)malloc(sizeof(struct ggml_gallocr));
ggml_hash_set hs = {.size=0, .keys=NULL};
*galloc = (struct ggml_gallocr) { *galloc = (struct ggml_gallocr) {
/*.talloc = */ NULL, .talloc = NULL,
/*.hash_set = */ {0}, .hash_set =hs,
/*.hash_values = */ NULL, .hash_values = NULL,
/*.hash_values_size = */ 0, .hash_values_size = 0,
/*.hash_allocs = */ NULL, .hash_allocs = NULL,
/*.parse_seq = */ NULL, .parse_seq = NULL,
/*.parse_seq_len = */ 0, .parse_seq_len = 0,
}; };
//((*galloc).hash_set)[0] = 0;
return galloc; return galloc;
} }
@ -706,8 +708,8 @@ struct ggml_allocr {
static ggml_allocr_t ggml_allocr_new_impl(ggml_tallocr_t talloc) { static ggml_allocr_t ggml_allocr_new_impl(ggml_tallocr_t talloc) {
ggml_allocr_t alloc = (ggml_allocr_t)malloc(sizeof(struct ggml_allocr)); ggml_allocr_t alloc = (ggml_allocr_t)malloc(sizeof(struct ggml_allocr));
*alloc = (struct ggml_allocr) { *alloc = (struct ggml_allocr) {
/*.talloc = */ talloc, .talloc = talloc,
/*.galloc = */ ggml_gallocr_new(), .galloc = ggml_gallocr_new(),
}; };
return alloc; return alloc;
} }

View file

@ -587,9 +587,9 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g
sched->n_splits = 0; sched->n_splits = 0;
struct ggml_init_params params = { struct ggml_init_params params = {
/*.mem_size = */ sizeof(sched->context_buffer), .mem_size = sizeof(sched->context_buffer),
/*.mem_buffer = */ sched->context_buffer, .mem_buffer = sched->context_buffer,
/*.no_alloc = */ true .no_alloc = true
}; };
if (sched->ctx != NULL) { if (sched->ctx != NULL) {

View file

@ -2,6 +2,8 @@
//https://github.com/Neargye/magic_enum.git //https://github.com/Neargye/magic_enum.git
#include <magic_enum.hpp> #include <magic_enum.hpp>
#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
#define _USE_MATH_DEFINES // For M_PI on MSVC #define _USE_MATH_DEFINES // For M_PI on MSVC
@ -16136,11 +16138,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
// all other threads are finished and spinning // all other threads are finished and spinning
// do finalize and init here so we don't have synchronize again // do finalize and init here so we don't have synchronize again
struct ggml_compute_params params = { struct ggml_compute_params params = {
/*.type =*/ GGML_TASK_FINALIZE, .type = GGML_TASK_FINALIZE,
/*.ith =*/ 0, .ith = 0,
/*.nth =*/ 0, .nth = 0,
/*.wsize =*/ cplan->work_size, .wsize = cplan->work_size,
/*.wdata =*/ cplan->work_data, .wdata = cplan->work_data,
}; };
if (node_n != -1) { if (node_n != -1) {
@ -16219,11 +16221,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
const int n_tasks = ggml_get_n_tasks(node, n_threads); const int n_tasks = ggml_get_n_tasks(node, n_threads);
struct ggml_compute_params params = { struct ggml_compute_params params = {
/*.type =*/ GGML_TASK_COMPUTE, .type = GGML_TASK_COMPUTE,
/*.ith =*/ state->ith, .ith = state->ith,
/*.nth =*/ n_tasks, .nth = n_tasks,
/*.wsize =*/ cplan->work_size, .wsize = cplan->work_size,
/*.wdata =*/ cplan->work_data, .wdata = cplan->work_data,
}; };
if (state->ith < n_tasks) { if (state->ith < n_tasks) {

33
ggml.h
View file

@ -1,5 +1,6 @@
#pragma once #pragma once
#include<refl-cpp/refl.hpp>
// //
// GGML Tensor Library // GGML Tensor Library
// //
@ -465,7 +466,7 @@ extern "C" {
}; };
// ggml object // ggml object
struct ggml_object { struct ggml_object : refl::attr::usage::type {
size_t offs; size_t offs;
size_t size; size_t size;
@ -479,7 +480,7 @@ extern "C" {
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object); static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
// n-dimensional tensor // n-dimensional tensor
struct ggml_tensor { struct ggml_tensor : refl::attr::usage::type{
enum ggml_type type; enum ggml_type type;
enum ggml_backend_type backend; enum ggml_backend_type backend;
@ -524,7 +525,7 @@ extern "C" {
// the compute plan that needs to be prepared for ggml_graph_compute() // the compute plan that needs to be prepared for ggml_graph_compute()
// since https://github.com/ggerganov/ggml/issues/287 // since https://github.com/ggerganov/ggml/issues/287
struct ggml_cplan { struct ggml_cplan : refl::attr::usage::type{
size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()` size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()` uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
@ -541,13 +542,13 @@ extern "C" {
GGML_CGRAPH_EVAL_ORDER_COUNT GGML_CGRAPH_EVAL_ORDER_COUNT
}; };
struct ggml_hash_set { struct ggml_hash_set : refl::attr::usage::type{
size_t size; size_t size;
struct ggml_tensor ** keys; struct ggml_tensor ** keys;
}; };
// computation graph // computation graph
struct ggml_cgraph { struct ggml_cgraph : refl::attr::usage::type{
int size; int size;
int n_nodes; int n_nodes;
int n_leafs; int n_leafs;
@ -567,7 +568,7 @@ extern "C" {
}; };
// scratch buffer // scratch buffer
struct ggml_scratch { struct ggml_scratch : refl::attr::usage::type{
size_t offs; size_t offs;
size_t size; size_t size;
void * data; void * data;
@ -579,7 +580,7 @@ extern "C" {
{} {}
}; };
struct ggml_init_params { struct ggml_init_params : refl::attr::usage::type{
// memory pool // memory pool
size_t mem_size; // bytes size_t mem_size; // bytes
void * mem_buffer; // if NULL, memory will be allocated internally void * mem_buffer; // if NULL, memory will be allocated internally
@ -597,7 +598,7 @@ extern "C" {
GGML_TASK_FINALIZE, GGML_TASK_FINALIZE,
}; };
struct ggml_compute_params { struct ggml_compute_params : refl::attr::usage::type{
enum ggml_task_type type; enum ggml_task_type type;
// ith = thread index, nth = number of threads // ith = thread index, nth = number of threads
@ -1835,7 +1836,7 @@ extern "C" {
// //
// see ggml.c (ggml_opt_default_params) for default values // see ggml.c (ggml_opt_default_params) for default values
// //
struct ggml_opt_params { struct ggml_opt_params : refl::attr::usage::type{
enum ggml_opt_type type; enum ggml_opt_type type;
size_t graph_size; size_t graph_size;
@ -1865,7 +1866,7 @@ extern "C" {
int n_gradient_accumulation; int n_gradient_accumulation;
// ADAM parameters // ADAM parameters
struct { struct ggml_adam: refl::attr::usage::type{
int n_iter; int n_iter;
float sched; // schedule multiplier (fixed, decay or warmup) float sched; // schedule multiplier (fixed, decay or warmup)
@ -1881,7 +1882,7 @@ extern "C" {
} adam; } adam;
// LBFGS parameters // LBFGS parameters
struct { struct ggml_lbfgs: refl::attr::usage::type{
int m; // number of corrections to approximate the inv. Hessian int m; // number of corrections to approximate the inv. Hessian
int n_iter; int n_iter;
int max_linesearch; int max_linesearch;
@ -1896,7 +1897,7 @@ extern "C" {
} lbfgs; } lbfgs;
}; };
struct ggml_opt_context { struct ggml_opt_context : refl::attr::usage::type{
struct ggml_context * ctx; struct ggml_context * ctx;
struct ggml_opt_params params; struct ggml_opt_params params;
@ -1908,7 +1909,7 @@ extern "C" {
float loss_before; float loss_before;
float loss_after; float loss_after;
struct { struct ggml_grad : refl::attr::usage::type{
struct ggml_tensor * g; // current gradient struct ggml_tensor * g; // current gradient
struct ggml_tensor * m; // first moment struct ggml_tensor * m; // first moment
struct ggml_tensor * v; // second moment struct ggml_tensor * v; // second moment
@ -1918,7 +1919,7 @@ extern "C" {
int n_no_improvement; int n_no_improvement;
} adam; } adam;
struct { struct ggml_params : refl::attr::usage::type{
struct ggml_tensor * x; // current parameters struct ggml_tensor * x; // current parameters
struct ggml_tensor * xp; // previous parameters struct ggml_tensor * xp; // previous parameters
struct ggml_tensor * g; // current gradient struct ggml_tensor * g; // current gradient
@ -2011,7 +2012,7 @@ extern "C" {
struct gguf_context; struct gguf_context;
struct gguf_init_params { struct gguf_init_params : refl::attr::usage::type{
bool no_alloc; bool no_alloc;
// if not NULL, create a ggml_context and allocate the tensor data in it // if not NULL, create a ggml_context and allocate the tensor data in it
@ -2148,7 +2149,7 @@ extern "C" {
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k); typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
typedef void (*ggml_vec_dot_t) (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y); typedef void (*ggml_vec_dot_t) (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
typedef struct { typedef struct ggml_something : refl::attr::usage::type{
const char * type_name; const char * type_name;
int blck_size; int blck_size;
size_t type_size; size_t type_size;

View file

@ -1786,8 +1786,8 @@ struct llama_model_loader {
llama_model_loader(const std::string & fname, bool use_mmap) : file(fname.c_str(), "rb") { llama_model_loader(const std::string & fname, bool use_mmap) : file(fname.c_str(), "rb") {
struct gguf_init_params params = { struct gguf_init_params params = {
/*.no_alloc = */ true, .no_alloc = true,
/*.ctx = */ &ctx_meta, .ctx = &ctx_meta,
}; };
ctx_gguf = gguf_init_from_file(fname.c_str(), params); ctx_gguf = gguf_init_from_file(fname.c_str(), params);
@ -2677,9 +2677,9 @@ static void llm_load_tensors(
} }
struct ggml_init_params params = { struct ggml_init_params params = {
/*.mem_size =*/ model.buf.size, .mem_size = model.buf.size,
/*.mem_buffer =*/ model.buf.data, .mem_buffer = model.buf.data,
/*.no_alloc =*/ ml.use_mmap, .no_alloc = ml.use_mmap,
}; };
model.ctx = ggml_init(params); model.ctx = ggml_init(params);
@ -3843,9 +3843,9 @@ struct llm_build_context {
void init() { void init() {
struct ggml_init_params params = { struct ggml_init_params params = {
/*.mem_size =*/ buf_compute.size, .mem_size = buf_compute.size,
/*.mem_buffer =*/ buf_compute.data, .mem_buffer = buf_compute.data,
/*.no_alloc =*/ true, .no_alloc = true,
}; };
ctx0 = ggml_init(params); ctx0 = ggml_init(params);
@ -8427,7 +8427,10 @@ void llama_backend_init(bool numa) {
// needed to initialize f16 tables // needed to initialize f16 tables
{ {
struct ggml_init_params params = { 0, NULL, false }; struct ggml_init_params params = { .mem_size = 0,
.mem_buffer = NULL,
.no_alloc = false
};
struct ggml_context * ctx = ggml_init(params); struct ggml_context * ctx = ggml_init(params);
ggml_free(ctx); ggml_free(ctx);
} }
@ -8998,7 +9001,13 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
if (kv_buf_size) { if (kv_buf_size) {
const size_t elt_size = ggml_element_size(kv_self.k); const size_t elt_size = ggml_element_size(kv_self.k);
ggml_context * cpy_ctx = ggml_init({ 6*ggml_tensor_overhead() + ggml_graph_overhead(), NULL, /* no_alloc */ true }); ggml_init_params ip = {
.mem_size = 6*ggml_tensor_overhead() + ggml_graph_overhead(),
.mem_buffer =NULL,
.no_alloc = /* no_alloc */ true
};
ggml_context * cpy_ctx = ggml_init( ip);
ggml_cgraph * gf = ggml_new_graph(cpy_ctx); ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer); ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
@ -9126,7 +9135,12 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
const size_t elt_size = ggml_element_size(kv_self.k); const size_t elt_size = ggml_element_size(kv_self.k);
ggml_context * cpy_ctx = ggml_init({ 6*ggml_tensor_overhead() + ggml_graph_overhead(), NULL, /* no_alloc */ true }); ggml_init_params ip {
.mem_size= 6*ggml_tensor_overhead() + ggml_graph_overhead(),
.mem_buffer=NULL,
.no_alloc=true };
ggml_context * cpy_ctx = ggml_init(ip);
ggml_cgraph * gf = ggml_new_graph(cpy_ctx); ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer); ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);