llama : fix vram size computation
This commit is contained in:
parent
fadcd583fc
commit
a4da072d39
1 changed files with 2 additions and 2 deletions
|
@ -975,7 +975,7 @@ static void llama_model_load_internal(
|
||||||
size_t ctx_size;
|
size_t ctx_size;
|
||||||
size_t mmapped_size;
|
size_t mmapped_size;
|
||||||
ml->calc_sizes(&ctx_size, &mmapped_size);
|
ml->calc_sizes(&ctx_size, &mmapped_size);
|
||||||
fprintf(stderr, "%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/1024.0/1024.0);
|
fprintf(stderr, "%s: ggml ctx size = %7.2f MB\n", __func__, ctx_size/1024.0/1024.0);
|
||||||
|
|
||||||
// create the ggml context
|
// create the ggml context
|
||||||
{
|
{
|
||||||
|
@ -1050,7 +1050,7 @@ static void llama_model_load_internal(
|
||||||
layer.w2 = ml->get_tensor(layers_i + ".feed_forward.w2.weight", { n_ff, n_embd}, backend);
|
layer.w2 = ml->get_tensor(layers_i + ".feed_forward.w2.weight", { n_ff, n_embd}, backend);
|
||||||
layer.w3 = ml->get_tensor(layers_i + ".feed_forward.w3.weight", {n_embd, n_ff}, backend);
|
layer.w3 = ml->get_tensor(layers_i + ".feed_forward.w3.weight", {n_embd, n_ff}, backend);
|
||||||
|
|
||||||
if (backend == LLAMA_BACKEND_OFFLOAD) {
|
if (backend == GGML_BACKEND_CUDA) {
|
||||||
vram_total +=
|
vram_total +=
|
||||||
ggml_nbytes(layer.attention_norm) + ggml_nbytes(layer.wq) + ggml_nbytes(layer.wk) +
|
ggml_nbytes(layer.attention_norm) + ggml_nbytes(layer.wq) + ggml_nbytes(layer.wk) +
|
||||||
ggml_nbytes(layer.wv) + ggml_nbytes(layer.wo) + ggml_nbytes(layer.attention_norm) +
|
ggml_nbytes(layer.wv) + ggml_nbytes(layer.wo) + ggml_nbytes(layer.attention_norm) +
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue