llama : model-based max number of graph nodes calculation

2024-08-10 15:14:49 +02:00 · 2024-08-10 15:14:49 +02:00 · ebeba4cf00
commit ebeba4cf00
parent 7eb23840ed
1 changed files with 2 additions and 7 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -3575,13 +3575,8 @@ namespace GGUFMeta {

 using llama_buf_map = std::unordered_map<uint32_t, ggml_backend_buffer_t>;

-// TODO: update when needed or think of some clever automatic way to do this
-static size_t llama_model_max_nodes(const llama_model & /*model*/) {
-    //if (model.arch == LLM_ARCH_LLAMA && model.hparams.n_layer > ??) { // llama-3 405B
-    //    return 32768;
-    //}
-
-    return 8192;
+static size_t llama_model_max_nodes(const llama_model & model) {
+    return std::max(8192, (int)model.tensors_by_name.size()*5);
 }

 struct llama_model_loader {