cuda : print total VRAM used
This commit is contained in:
parent
5659391b6a
commit
17e841ac22
1 changed files with 23 additions and 0 deletions
23
llama.cpp
23
llama.cpp
|
@ -6773,6 +6773,29 @@ struct llama_context * llama_new_context_with_model(
|
||||||
#ifdef GGML_USE_CUBLAS
|
#ifdef GGML_USE_CUBLAS
|
||||||
ggml_cuda_set_scratch_size(alloc_size);
|
ggml_cuda_set_scratch_size(alloc_size);
|
||||||
LLAMA_LOG_INFO("%s: VRAM scratch buffer: %.2f MB\n", __func__, alloc_size / 1024.0 / 1024.0);
|
LLAMA_LOG_INFO("%s: VRAM scratch buffer: %.2f MB\n", __func__, alloc_size / 1024.0 / 1024.0);
|
||||||
|
|
||||||
|
// calculate total VRAM usage
|
||||||
|
auto add_tensor = [](const ggml_tensor * t, size_t & size) {
|
||||||
|
if (t->backend == GGML_BACKEND_GPU || t->backend == GGML_BACKEND_GPU_SPLIT) {
|
||||||
|
size += ggml_nbytes(t);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
size_t model_vram_size = 0;
|
||||||
|
for (const auto & kv : model->tensors_by_name) {
|
||||||
|
add_tensor(kv.second, model_vram_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t kv_vram_size = 0;
|
||||||
|
add_tensor(ctx->kv_self.k, kv_vram_size);
|
||||||
|
add_tensor(ctx->kv_self.v, kv_vram_size);
|
||||||
|
|
||||||
|
size_t ctx_vram_size = alloc_size + kv_vram_size;
|
||||||
|
size_t total_vram_size = model_vram_size + ctx_vram_size;
|
||||||
|
|
||||||
|
LLAMA_LOG_INFO("%s: total VRAM used: %.2f MB (model: %.2f MB, context: %.2f MB)\n", __func__,
|
||||||
|
total_vram_size / 1024.0 / 1024.0,
|
||||||
|
model_vram_size / 1024.0 / 1024.0,
|
||||||
|
ctx_vram_size / 1024.0 / 1024.0);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue