llama : disable 405B max_nodes path due to lack of complaints

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-07-27 13:32:44 +03:00
parent 7e27c17572
commit 2d74714535
No known key found for this signature in database
GPG key ID: 449E073F9DC10735

View file

@ -3657,10 +3657,10 @@ namespace GGUFMeta {
using llama_buf_map = std::unordered_map<uint32_t, ggml_backend_buffer_t>;
// TODO: update when needed or think of some clever automatic way to do this
static size_t llama_model_max_nodes(const llama_model & model) {
if (model.arch == LLM_ARCH_LLAMA && model.hparams.n_layer > 400) { // llama-3 405B
return 32768;
}
static size_t llama_model_max_nodes(const llama_model & /*model*/) {
//if (model.arch == LLM_ARCH_LLAMA && model.hparams.n_layer > ??) { // llama-3 405B
// return 32768;
//}
return 8192;
}