running
This commit is contained in:
parent
6f8adf99d5
commit
6fd690fae7
12 changed files with 116 additions and 97 deletions
|
@ -1528,9 +1528,9 @@ int main(int argc, char ** argv) {
|
|||
|
||||
for (int ex=0; ex<n_examples; ++ex) {
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ compute_size,
|
||||
/*.mem_buffer =*/ compute_addr,
|
||||
/*.no_alloc =*/ false,
|
||||
.mem_size = compute_size,
|
||||
.mem_buffer = compute_addr,
|
||||
.no_alloc = false,
|
||||
};
|
||||
|
||||
struct ggml_context * ctx0 = ggml_init(params);
|
||||
|
@ -1603,9 +1603,9 @@ int main(int argc, char ** argv) {
|
|||
printf("---\n");
|
||||
for (int i=0; i<n_gen; ++i) {
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ compute_size,
|
||||
/*.mem_buffer =*/ compute_addr,
|
||||
/*.no_alloc =*/ false,
|
||||
.mem_size = compute_size,
|
||||
.mem_buffer = compute_addr,
|
||||
.no_alloc = false,
|
||||
};
|
||||
struct ggml_context * ctx0 = ggml_init(params);
|
||||
|
||||
|
|
|
@ -141,9 +141,9 @@ int main(int argc, char ** argv) {
|
|||
printf("Allocating Memory of size %zi bytes, %zi MB\n",ctx_size, (ctx_size/1024/1024));
|
||||
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ ctx_size,
|
||||
/*.mem_buffer =*/ NULL,
|
||||
/* no_alloc =*/ 0
|
||||
.mem_size = ctx_size,
|
||||
.mem_buffer = NULL,
|
||||
.no_alloc = 0
|
||||
};
|
||||
|
||||
ctx = ggml_init(params);
|
||||
|
|
|
@ -554,8 +554,8 @@ static void load_vocab(const char *filename, Config *config, struct llama_vocab
|
|||
struct ggml_context * ctx_data = NULL;
|
||||
|
||||
struct gguf_init_params params = {
|
||||
/*.no_alloc = */ false,
|
||||
/*.ctx = */ &ctx_data,
|
||||
.no_alloc = false,
|
||||
.ctx = &ctx_data,
|
||||
};
|
||||
|
||||
struct gguf_context * ctx = gguf_init_from_file(filename, params);
|
||||
|
|
|
@ -295,8 +295,8 @@ static void init_model(struct llama_model * input, struct my_llama_model * model
|
|||
// get parameters directly from gguf file
|
||||
{
|
||||
struct gguf_init_params params = {
|
||||
/*.no_alloc = */ false,
|
||||
/*.ctx = */ NULL,
|
||||
.no_alloc = false,
|
||||
.ctx = NULL,
|
||||
};
|
||||
struct gguf_context * mctx = gguf_init_from_file(fn_model, params);
|
||||
|
||||
|
@ -1709,9 +1709,9 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// context for input tensors without their data
|
||||
struct ggml_init_params ctx_input_params = {
|
||||
ggml_tensor_overhead() * 2, // mem_size
|
||||
NULL, // mem_buffer
|
||||
true, // no_alloc
|
||||
.mem_size= ggml_tensor_overhead() * 2, // mem_size
|
||||
.mem_buffer=NULL, // mem_buffer
|
||||
.no_alloc=true, // no_alloc
|
||||
};
|
||||
struct ggml_context * ctx_input = ggml_init(ctx_input_params);
|
||||
|
||||
|
@ -1738,9 +1738,9 @@ int main(int argc, char ** argv) {
|
|||
(params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true))
|
||||
);
|
||||
struct ggml_init_params ctx_compute_params = {
|
||||
estimated_compute_size_wo_data, // mem_size
|
||||
NULL, // mem_buffer
|
||||
true, // no_alloc
|
||||
.mem_size=estimated_compute_size_wo_data, // mem_size
|
||||
.mem_buffer=NULL, // mem_buffer
|
||||
.no_alloc=true, // no_alloc
|
||||
};
|
||||
struct ggml_context * ctx_compute = NULL;
|
||||
|
||||
|
@ -1905,9 +1905,9 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// context for work buffer
|
||||
struct ggml_init_params ctx_work_params = {
|
||||
max_work_size, // mem_size
|
||||
NULL, // mem_buffer
|
||||
false, // no_alloc
|
||||
.mem_size= max_work_size, // mem_size
|
||||
.mem_buffer = NULL, // mem_buffer
|
||||
.no_alloc = false, // no_alloc
|
||||
};
|
||||
struct ggml_context * ctx_work = ggml_init(ctx_work_params);
|
||||
|
||||
|
|
|
@ -41,9 +41,9 @@ static bool gguf_ex_write(const std::string & fname) {
|
|||
gguf_set_arr_str (ctx, "some.parameter.arr.str", std::vector<const char *>{ "hello", "world", "!" }.data(), 3);
|
||||
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ 128ull*1024ull*1024ull,
|
||||
/*.mem_buffer =*/ NULL,
|
||||
/*.no_alloc =*/ false,
|
||||
.mem_size = 128ull*1024ull*1024ull,
|
||||
.mem_buffer = NULL,
|
||||
.no_alloc = false,
|
||||
};
|
||||
|
||||
struct ggml_context * ctx_data = ggml_init(params);
|
||||
|
@ -87,8 +87,8 @@ static bool gguf_ex_write(const std::string & fname) {
|
|||
// just read tensor info
|
||||
static bool gguf_ex_read_0(const std::string & fname) {
|
||||
struct gguf_init_params params = {
|
||||
/*.no_alloc = */ false,
|
||||
/*.ctx = */ NULL,
|
||||
.no_alloc = false,
|
||||
.ctx = NULL,
|
||||
};
|
||||
|
||||
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
|
||||
|
@ -147,8 +147,8 @@ static bool gguf_ex_read_1(const std::string & fname) {
|
|||
struct ggml_context * ctx_data = NULL;
|
||||
|
||||
struct gguf_init_params params = {
|
||||
/*.no_alloc = */ false,
|
||||
/*.ctx = */ &ctx_data,
|
||||
.no_alloc = false,
|
||||
.ctx = &ctx_data,
|
||||
};
|
||||
|
||||
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
|
||||
|
|
|
@ -256,9 +256,9 @@ static ggml_cgraph * clip_image_build_graph(const clip_ctx * ctx, const clip_ima
|
|||
const auto & buf_compute = ctx->buf_compute;
|
||||
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ buf_compute.size,
|
||||
/*.mem_buffer =*/ buf_compute.data,
|
||||
/*.no_alloc =*/ false,
|
||||
.mem_size = buf_compute.size,
|
||||
.mem_buffer = buf_compute.data,
|
||||
.no_alloc = false,
|
||||
};
|
||||
|
||||
params.no_alloc = true;
|
||||
|
@ -456,8 +456,8 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
|||
struct ggml_context * meta = NULL;
|
||||
|
||||
struct gguf_init_params params = {
|
||||
/*.no_alloc = */ true,
|
||||
/*.ctx = */ &meta,
|
||||
.no_alloc = true,
|
||||
.ctx = &meta,
|
||||
};
|
||||
|
||||
struct gguf_context * ctx = gguf_init_from_file(fname, params);
|
||||
|
@ -553,9 +553,9 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
|||
// load tensors
|
||||
{
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ ctx_size,
|
||||
/*.mem_buffer =*/ NULL,
|
||||
/*.no_alloc =*/ false,
|
||||
.mem_size = ctx_size,
|
||||
.mem_buffer = NULL,
|
||||
.no_alloc = false,
|
||||
};
|
||||
|
||||
new_clip->ctx = ggml_init(params);
|
||||
|
|
|
@ -601,8 +601,8 @@ static void save_llama_model_gguf(struct gguf_context * fctx, const char * fn_vo
|
|||
// set vocab by copying from vocab_model gguf file
|
||||
{
|
||||
struct gguf_init_params params = {
|
||||
/*.no_alloc = */ false,
|
||||
/*.ctx = */ NULL,
|
||||
.no_alloc = false,
|
||||
.ctx = NULL,
|
||||
};
|
||||
struct gguf_context * vctx = gguf_init_from_file(fn_vocab_model, params);
|
||||
|
||||
|
@ -1086,9 +1086,9 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// context for input tensors without their data
|
||||
struct ggml_init_params ctx_input_params = {
|
||||
ggml_tensor_overhead() * 2, // mem_size
|
||||
NULL, // mem_buffer
|
||||
true, // no_alloc
|
||||
.mem_size = ggml_tensor_overhead() * 2, // mem_size
|
||||
.mem_buffer = NULL, // mem_buffer
|
||||
.no_alloc = true, // no_alloc
|
||||
};
|
||||
struct ggml_context * ctx_input = ggml_init(ctx_input_params);
|
||||
|
||||
|
@ -1115,9 +1115,9 @@ int main(int argc, char ** argv) {
|
|||
(params.common.use_checkpointing ? 3 : 2)*(GGML_OBJECT_SIZE+ggml_graph_overhead_custom(LLAMA_TRAIN_MAX_NODES, true))
|
||||
);
|
||||
struct ggml_init_params ctx_compute_params = {
|
||||
estimated_compute_size_wo_data, // mem_size
|
||||
NULL, // mem_buffer
|
||||
true, // no_alloc
|
||||
.mem_size = estimated_compute_size_wo_data, // mem_size
|
||||
.mem_buffer= NULL, // mem_buffer
|
||||
.no_alloc = true, // no_alloc
|
||||
};
|
||||
struct ggml_context * ctx_compute = NULL;
|
||||
|
||||
|
@ -1268,9 +1268,9 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// context for work buffer
|
||||
struct ggml_init_params ctx_work_params = {
|
||||
max_work_size, // mem_size
|
||||
NULL, // mem_buffer
|
||||
false, // no_alloc
|
||||
.mem_size= max_work_size, //
|
||||
.mem_buffer= NULL, //
|
||||
.no_alloc=false, //
|
||||
};
|
||||
struct ggml_context * ctx_work = ggml_init(ctx_work_params);
|
||||
|
||||
|
|
|
@ -351,15 +351,17 @@ struct ggml_gallocr {
|
|||
ggml_gallocr_t ggml_gallocr_new(void) {
|
||||
ggml_gallocr_t galloc = (ggml_gallocr_t)malloc(sizeof(struct ggml_gallocr));
|
||||
|
||||
ggml_hash_set hs = {.size=0, .keys=NULL};
|
||||
*galloc = (struct ggml_gallocr) {
|
||||
/*.talloc = */ NULL,
|
||||
/*.hash_set = */ {0},
|
||||
/*.hash_values = */ NULL,
|
||||
/*.hash_values_size = */ 0,
|
||||
/*.hash_allocs = */ NULL,
|
||||
/*.parse_seq = */ NULL,
|
||||
/*.parse_seq_len = */ 0,
|
||||
.talloc = NULL,
|
||||
.hash_set =hs,
|
||||
.hash_values = NULL,
|
||||
.hash_values_size = 0,
|
||||
.hash_allocs = NULL,
|
||||
.parse_seq = NULL,
|
||||
.parse_seq_len = 0,
|
||||
};
|
||||
//((*galloc).hash_set)[0] = 0;
|
||||
|
||||
return galloc;
|
||||
}
|
||||
|
@ -706,8 +708,8 @@ struct ggml_allocr {
|
|||
static ggml_allocr_t ggml_allocr_new_impl(ggml_tallocr_t talloc) {
|
||||
ggml_allocr_t alloc = (ggml_allocr_t)malloc(sizeof(struct ggml_allocr));
|
||||
*alloc = (struct ggml_allocr) {
|
||||
/*.talloc = */ talloc,
|
||||
/*.galloc = */ ggml_gallocr_new(),
|
||||
.talloc = talloc,
|
||||
.galloc = ggml_gallocr_new(),
|
||||
};
|
||||
return alloc;
|
||||
}
|
||||
|
|
|
@ -587,9 +587,9 @@ static void sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * g
|
|||
sched->n_splits = 0;
|
||||
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size = */ sizeof(sched->context_buffer),
|
||||
/*.mem_buffer = */ sched->context_buffer,
|
||||
/*.no_alloc = */ true
|
||||
.mem_size = sizeof(sched->context_buffer),
|
||||
.mem_buffer = sched->context_buffer,
|
||||
.no_alloc = true
|
||||
};
|
||||
|
||||
if (sched->ctx != NULL) {
|
||||
|
|
22
ggml.cpp
22
ggml.cpp
|
@ -2,6 +2,8 @@
|
|||
//https://github.com/Neargye/magic_enum.git
|
||||
#include <magic_enum.hpp>
|
||||
|
||||
|
||||
|
||||
#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
|
||||
#define _USE_MATH_DEFINES // For M_PI on MSVC
|
||||
|
||||
|
@ -16136,11 +16138,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|||
// all other threads are finished and spinning
|
||||
// do finalize and init here so we don't have synchronize again
|
||||
struct ggml_compute_params params = {
|
||||
/*.type =*/ GGML_TASK_FINALIZE,
|
||||
/*.ith =*/ 0,
|
||||
/*.nth =*/ 0,
|
||||
/*.wsize =*/ cplan->work_size,
|
||||
/*.wdata =*/ cplan->work_data,
|
||||
.type = GGML_TASK_FINALIZE,
|
||||
.ith = 0,
|
||||
.nth = 0,
|
||||
.wsize = cplan->work_size,
|
||||
.wdata = cplan->work_data,
|
||||
};
|
||||
|
||||
if (node_n != -1) {
|
||||
|
@ -16219,11 +16221,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|||
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
||||
|
||||
struct ggml_compute_params params = {
|
||||
/*.type =*/ GGML_TASK_COMPUTE,
|
||||
/*.ith =*/ state->ith,
|
||||
/*.nth =*/ n_tasks,
|
||||
/*.wsize =*/ cplan->work_size,
|
||||
/*.wdata =*/ cplan->work_data,
|
||||
.type = GGML_TASK_COMPUTE,
|
||||
.ith = state->ith,
|
||||
.nth = n_tasks,
|
||||
.wsize = cplan->work_size,
|
||||
.wdata = cplan->work_data,
|
||||
};
|
||||
|
||||
if (state->ith < n_tasks) {
|
||||
|
|
33
ggml.h
33
ggml.h
|
@ -1,5 +1,6 @@
|
|||
#pragma once
|
||||
|
||||
#include<refl-cpp/refl.hpp>
|
||||
//
|
||||
// GGML Tensor Library
|
||||
//
|
||||
|
@ -465,7 +466,7 @@ extern "C" {
|
|||
};
|
||||
|
||||
// ggml object
|
||||
struct ggml_object {
|
||||
struct ggml_object : refl::attr::usage::type {
|
||||
size_t offs;
|
||||
size_t size;
|
||||
|
||||
|
@ -479,7 +480,7 @@ extern "C" {
|
|||
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
|
||||
|
||||
// n-dimensional tensor
|
||||
struct ggml_tensor {
|
||||
struct ggml_tensor : refl::attr::usage::type{
|
||||
enum ggml_type type;
|
||||
enum ggml_backend_type backend;
|
||||
|
||||
|
@ -524,7 +525,7 @@ extern "C" {
|
|||
|
||||
// the compute plan that needs to be prepared for ggml_graph_compute()
|
||||
// since https://github.com/ggerganov/ggml/issues/287
|
||||
struct ggml_cplan {
|
||||
struct ggml_cplan : refl::attr::usage::type{
|
||||
size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
|
||||
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
|
||||
|
||||
|
@ -541,13 +542,13 @@ extern "C" {
|
|||
GGML_CGRAPH_EVAL_ORDER_COUNT
|
||||
};
|
||||
|
||||
struct ggml_hash_set {
|
||||
struct ggml_hash_set : refl::attr::usage::type{
|
||||
size_t size;
|
||||
struct ggml_tensor ** keys;
|
||||
};
|
||||
|
||||
// computation graph
|
||||
struct ggml_cgraph {
|
||||
struct ggml_cgraph : refl::attr::usage::type{
|
||||
int size;
|
||||
int n_nodes;
|
||||
int n_leafs;
|
||||
|
@ -567,7 +568,7 @@ extern "C" {
|
|||
};
|
||||
|
||||
// scratch buffer
|
||||
struct ggml_scratch {
|
||||
struct ggml_scratch : refl::attr::usage::type{
|
||||
size_t offs;
|
||||
size_t size;
|
||||
void * data;
|
||||
|
@ -579,7 +580,7 @@ extern "C" {
|
|||
{}
|
||||
};
|
||||
|
||||
struct ggml_init_params {
|
||||
struct ggml_init_params : refl::attr::usage::type{
|
||||
// memory pool
|
||||
size_t mem_size; // bytes
|
||||
void * mem_buffer; // if NULL, memory will be allocated internally
|
||||
|
@ -597,7 +598,7 @@ extern "C" {
|
|||
GGML_TASK_FINALIZE,
|
||||
};
|
||||
|
||||
struct ggml_compute_params {
|
||||
struct ggml_compute_params : refl::attr::usage::type{
|
||||
enum ggml_task_type type;
|
||||
|
||||
// ith = thread index, nth = number of threads
|
||||
|
@ -1835,7 +1836,7 @@ extern "C" {
|
|||
//
|
||||
// see ggml.c (ggml_opt_default_params) for default values
|
||||
//
|
||||
struct ggml_opt_params {
|
||||
struct ggml_opt_params : refl::attr::usage::type{
|
||||
enum ggml_opt_type type;
|
||||
|
||||
size_t graph_size;
|
||||
|
@ -1865,7 +1866,7 @@ extern "C" {
|
|||
int n_gradient_accumulation;
|
||||
|
||||
// ADAM parameters
|
||||
struct {
|
||||
struct ggml_adam: refl::attr::usage::type{
|
||||
int n_iter;
|
||||
|
||||
float sched; // schedule multiplier (fixed, decay or warmup)
|
||||
|
@ -1881,7 +1882,7 @@ extern "C" {
|
|||
} adam;
|
||||
|
||||
// LBFGS parameters
|
||||
struct {
|
||||
struct ggml_lbfgs: refl::attr::usage::type{
|
||||
int m; // number of corrections to approximate the inv. Hessian
|
||||
int n_iter;
|
||||
int max_linesearch;
|
||||
|
@ -1896,7 +1897,7 @@ extern "C" {
|
|||
} lbfgs;
|
||||
};
|
||||
|
||||
struct ggml_opt_context {
|
||||
struct ggml_opt_context : refl::attr::usage::type{
|
||||
struct ggml_context * ctx;
|
||||
struct ggml_opt_params params;
|
||||
|
||||
|
@ -1908,7 +1909,7 @@ extern "C" {
|
|||
float loss_before;
|
||||
float loss_after;
|
||||
|
||||
struct {
|
||||
struct ggml_grad : refl::attr::usage::type{
|
||||
struct ggml_tensor * g; // current gradient
|
||||
struct ggml_tensor * m; // first moment
|
||||
struct ggml_tensor * v; // second moment
|
||||
|
@ -1918,7 +1919,7 @@ extern "C" {
|
|||
int n_no_improvement;
|
||||
} adam;
|
||||
|
||||
struct {
|
||||
struct ggml_params : refl::attr::usage::type{
|
||||
struct ggml_tensor * x; // current parameters
|
||||
struct ggml_tensor * xp; // previous parameters
|
||||
struct ggml_tensor * g; // current gradient
|
||||
|
@ -2011,7 +2012,7 @@ extern "C" {
|
|||
|
||||
struct gguf_context;
|
||||
|
||||
struct gguf_init_params {
|
||||
struct gguf_init_params : refl::attr::usage::type{
|
||||
bool no_alloc;
|
||||
|
||||
// if not NULL, create a ggml_context and allocate the tensor data in it
|
||||
|
@ -2148,7 +2149,7 @@ extern "C" {
|
|||
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
||||
typedef void (*ggml_vec_dot_t) (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
|
||||
|
||||
typedef struct {
|
||||
typedef struct ggml_something : refl::attr::usage::type{
|
||||
const char * type_name;
|
||||
int blck_size;
|
||||
size_t type_size;
|
||||
|
|
36
llama.cpp
36
llama.cpp
|
@ -1786,8 +1786,8 @@ struct llama_model_loader {
|
|||
|
||||
llama_model_loader(const std::string & fname, bool use_mmap) : file(fname.c_str(), "rb") {
|
||||
struct gguf_init_params params = {
|
||||
/*.no_alloc = */ true,
|
||||
/*.ctx = */ &ctx_meta,
|
||||
.no_alloc = true,
|
||||
.ctx = &ctx_meta,
|
||||
};
|
||||
|
||||
ctx_gguf = gguf_init_from_file(fname.c_str(), params);
|
||||
|
@ -2677,9 +2677,9 @@ static void llm_load_tensors(
|
|||
}
|
||||
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ model.buf.size,
|
||||
/*.mem_buffer =*/ model.buf.data,
|
||||
/*.no_alloc =*/ ml.use_mmap,
|
||||
.mem_size = model.buf.size,
|
||||
.mem_buffer = model.buf.data,
|
||||
.no_alloc = ml.use_mmap,
|
||||
};
|
||||
|
||||
model.ctx = ggml_init(params);
|
||||
|
@ -3843,9 +3843,9 @@ struct llm_build_context {
|
|||
|
||||
void init() {
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ buf_compute.size,
|
||||
/*.mem_buffer =*/ buf_compute.data,
|
||||
/*.no_alloc =*/ true,
|
||||
.mem_size = buf_compute.size,
|
||||
.mem_buffer = buf_compute.data,
|
||||
.no_alloc = true,
|
||||
};
|
||||
|
||||
ctx0 = ggml_init(params);
|
||||
|
@ -8427,7 +8427,10 @@ void llama_backend_init(bool numa) {
|
|||
|
||||
// needed to initialize f16 tables
|
||||
{
|
||||
struct ggml_init_params params = { 0, NULL, false };
|
||||
struct ggml_init_params params = { .mem_size = 0,
|
||||
.mem_buffer = NULL,
|
||||
.no_alloc = false
|
||||
};
|
||||
struct ggml_context * ctx = ggml_init(params);
|
||||
ggml_free(ctx);
|
||||
}
|
||||
|
@ -8998,7 +9001,13 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
|
|||
if (kv_buf_size) {
|
||||
const size_t elt_size = ggml_element_size(kv_self.k);
|
||||
|
||||
ggml_context * cpy_ctx = ggml_init({ 6*ggml_tensor_overhead() + ggml_graph_overhead(), NULL, /* no_alloc */ true });
|
||||
ggml_init_params ip = {
|
||||
.mem_size = 6*ggml_tensor_overhead() + ggml_graph_overhead(),
|
||||
.mem_buffer =NULL,
|
||||
.no_alloc = /* no_alloc */ true
|
||||
};
|
||||
|
||||
ggml_context * cpy_ctx = ggml_init( ip);
|
||||
ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
|
||||
|
||||
ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
|
||||
|
@ -9126,7 +9135,12 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
|
|||
|
||||
const size_t elt_size = ggml_element_size(kv_self.k);
|
||||
|
||||
ggml_context * cpy_ctx = ggml_init({ 6*ggml_tensor_overhead() + ggml_graph_overhead(), NULL, /* no_alloc */ true });
|
||||
ggml_init_params ip {
|
||||
.mem_size= 6*ggml_tensor_overhead() + ggml_graph_overhead(),
|
||||
.mem_buffer=NULL,
|
||||
.no_alloc=true };
|
||||
|
||||
ggml_context * cpy_ctx = ggml_init(ip);
|
||||
ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
|
||||
|
||||
ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue