llama : fix save/load state context size
ggml-ci
This commit is contained in:
parent
83c96d5809
commit
8401e3ebcd
1 changed files with 2 additions and 2 deletions
|
@ -8558,7 +8558,7 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
|
||||||
if (kv_buf_size) {
|
if (kv_buf_size) {
|
||||||
const size_t elt_size = ggml_element_size(kv_self.k);
|
const size_t elt_size = ggml_element_size(kv_self.k);
|
||||||
|
|
||||||
ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true });
|
ggml_context * cpy_ctx = ggml_init({ 6*ggml_tensor_overhead() + ggml_graph_overhead(), NULL, /* no_alloc */ true });
|
||||||
ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
|
ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
|
||||||
|
|
||||||
ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
|
ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
|
||||||
|
@ -8686,7 +8686,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
|
||||||
|
|
||||||
const size_t elt_size = ggml_element_size(kv_self.k);
|
const size_t elt_size = ggml_element_size(kv_self.k);
|
||||||
|
|
||||||
ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true });
|
ggml_context * cpy_ctx = ggml_init({ 6*ggml_tensor_overhead() + ggml_graph_overhead(), NULL, /* no_alloc */ true });
|
||||||
ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
|
ggml_cgraph * gf = ggml_new_graph(cpy_ctx);
|
||||||
|
|
||||||
ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
|
ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue