llama : model-based max number of graph nodes calculation

This commit is contained in:
Nico Bosshard 2024-08-10 15:14:49 +02:00
parent 7eb23840ed
commit ebeba4cf00

View file

@ -3575,13 +3575,8 @@ namespace GGUFMeta {
using llama_buf_map = std::unordered_map<uint32_t, ggml_backend_buffer_t>;
// TODO: update when needed or think of some clever automatic way to do this
static size_t llama_model_max_nodes(const llama_model & /*model*/) {
//if (model.arch == LLM_ARCH_LLAMA && model.hparams.n_layer > ??) { // llama-3 405B
// return 32768;
//}
return 8192;
static size_t llama_model_max_nodes(const llama_model & model) {
return std::max(8192, (int)model.tensors_by_name.size()*5);
}
struct llama_model_loader {