llama_control_vector_load: let gguf_init_from_file allocate the ggml_context
This commit is contained in:
parent
6e1fbf87b0
commit
181879f942
1 changed files with 2 additions and 13 deletions
|
@ -2650,12 +2650,7 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
|
||||||
|
|
||||||
// calculate size of ctx needed for tensors, ensure tensors are f32, and find max layer
|
// calculate size of ctx needed for tensors, ensure tensors are f32, and find max layer
|
||||||
{
|
{
|
||||||
struct ggml_init_params meta_params = {
|
ggml_context * meta_ctx = nullptr;
|
||||||
/* .mem_size = */ ggml_tensor_overhead() * 128 + ggml_graph_overhead(),
|
|
||||||
/* .mem_buffer = */ nullptr,
|
|
||||||
/* .no_alloc = */ true,
|
|
||||||
};
|
|
||||||
ggml_context * meta_ctx = ggml_init(meta_params);
|
|
||||||
struct gguf_init_params meta_gguf_params = {
|
struct gguf_init_params meta_gguf_params = {
|
||||||
/* .no_alloc = */ true,
|
/* .no_alloc = */ true,
|
||||||
/* .ctx = */ &meta_ctx,
|
/* .ctx = */ &meta_ctx,
|
||||||
|
@ -2720,13 +2715,7 @@ static llama_control_vector_data llama_control_vector_load_one(const llama_contr
|
||||||
}
|
}
|
||||||
|
|
||||||
// load and scale tensors into final control vector context
|
// load and scale tensors into final control vector context
|
||||||
struct ggml_init_params ggml_params = {
|
struct ggml_context * ctx = nullptr;
|
||||||
/* .mem_size = */ ggml_tensor_overhead() * n_tensors + n_bytes,
|
|
||||||
/* .mem_buffer = */ nullptr,
|
|
||||||
/* .no_alloc = */ false,
|
|
||||||
};
|
|
||||||
struct ggml_context * ctx = ggml_init(ggml_params);
|
|
||||||
|
|
||||||
struct gguf_init_params params = {
|
struct gguf_init_params params = {
|
||||||
/*.no_alloc = */ false,
|
/*.no_alloc = */ false,
|
||||||
/*.ctx = */ &ctx,
|
/*.ctx = */ &ctx,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue