llama : fix llm_type enum names
ggml-ci
This commit is contained in:
parent
fffa6b15c4
commit
a48412f92b
5 changed files with 381 additions and 382 deletions
|
@ -35,7 +35,7 @@
|
|||
|
||||
// TODO: consider moving to llama-impl.h if needed in more places
|
||||
#if defined(_WIN32)
|
||||
std::string llama_format_win_err(DWORD err) {
|
||||
static std::string llama_format_win_err(DWORD err) {
|
||||
LPSTR buf;
|
||||
size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
|
||||
NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&buf, 0, NULL);
|
||||
|
|
|
@ -18,69 +18,69 @@
|
|||
|
||||
const char * llm_type_name(llm_type type) {
|
||||
switch (type) {
|
||||
case MODEL_14M: return "14M";
|
||||
case MODEL_17M: return "17M";
|
||||
case MODEL_22M: return "22M";
|
||||
case MODEL_33M: return "33M";
|
||||
case MODEL_60M: return "60M";
|
||||
case MODEL_70M: return "70M";
|
||||
case MODEL_80M: return "80M";
|
||||
case MODEL_109M: return "109M";
|
||||
case MODEL_137M: return "137M";
|
||||
case MODEL_160M: return "160M";
|
||||
case MODEL_220M: return "220M";
|
||||
case MODEL_250M: return "250M";
|
||||
case MODEL_270M: return "270M";
|
||||
case MODEL_335M: return "335M";
|
||||
case MODEL_410M: return "410M";
|
||||
case MODEL_450M: return "450M";
|
||||
case MODEL_770M: return "770M";
|
||||
case MODEL_780M: return "780M";
|
||||
case MODEL_0_5B: return "0.5B";
|
||||
case MODEL_1B: return "1B";
|
||||
case MODEL_1_3B: return "1.3B";
|
||||
case MODEL_1_4B: return "1.4B";
|
||||
case MODEL_1_5B: return "1.5B";
|
||||
case MODEL_1_6B: return "1.6B";
|
||||
case MODEL_2B: return "2B";
|
||||
case MODEL_2_8B: return "2.8B";
|
||||
case MODEL_3B: return "3B";
|
||||
case MODEL_4B: return "4B";
|
||||
case MODEL_6B: return "6B";
|
||||
case MODEL_6_9B: return "6.9B";
|
||||
case MODEL_7B: return "7B";
|
||||
case MODEL_8B: return "8B";
|
||||
case MODEL_9B: return "9B";
|
||||
case MODEL_11B: return "11B";
|
||||
case MODEL_12B: return "12B";
|
||||
case MODEL_13B: return "13B";
|
||||
case MODEL_14B: return "14B";
|
||||
case MODEL_15B: return "15B";
|
||||
case MODEL_16B: return "16B";
|
||||
case MODEL_20B: return "20B";
|
||||
case MODEL_30B: return "30B";
|
||||
case MODEL_32B: return "32B";
|
||||
case MODEL_34B: return "34B";
|
||||
case MODEL_35B: return "35B";
|
||||
case MODEL_40B: return "40B";
|
||||
case MODEL_65B: return "65B";
|
||||
case MODEL_70B: return "70B";
|
||||
case MODEL_236B: return "236B";
|
||||
case MODEL_314B: return "314B";
|
||||
case MODEL_671B: return "671B";
|
||||
case MODEL_SMALL: return "0.1B";
|
||||
case MODEL_MEDIUM: return "0.4B";
|
||||
case MODEL_LARGE: return "0.8B";
|
||||
case MODEL_XL: return "1.5B";
|
||||
case MODEL_A1_7B: return "A1.7B";
|
||||
case MODEL_A2_7B: return "A2.7B";
|
||||
case MODEL_8x7B: return "8x7B";
|
||||
case MODEL_8x22B: return "8x22B";
|
||||
case MODEL_16x12B: return "16x12B";
|
||||
case MODEL_16x3_8B: return "16x3.8B";
|
||||
case MODEL_10B_128x3_66B: return "10B+128x3.66B";
|
||||
case MODEL_57B_A14B: return "57B.A14B";
|
||||
case MODEL_27B: return "27B";
|
||||
case LLM_TYPE_14M: return "14M";
|
||||
case LLM_TYPE_17M: return "17M";
|
||||
case LLM_TYPE_22M: return "22M";
|
||||
case LLM_TYPE_33M: return "33M";
|
||||
case LLM_TYPE_60M: return "60M";
|
||||
case LLM_TYPE_70M: return "70M";
|
||||
case LLM_TYPE_80M: return "80M";
|
||||
case LLM_TYPE_109M: return "109M";
|
||||
case LLM_TYPE_137M: return "137M";
|
||||
case LLM_TYPE_160M: return "160M";
|
||||
case LLM_TYPE_220M: return "220M";
|
||||
case LLM_TYPE_250M: return "250M";
|
||||
case LLM_TYPE_270M: return "270M";
|
||||
case LLM_TYPE_335M: return "335M";
|
||||
case LLM_TYPE_410M: return "410M";
|
||||
case LLM_TYPE_450M: return "450M";
|
||||
case LLM_TYPE_770M: return "770M";
|
||||
case LLM_TYPE_780M: return "780M";
|
||||
case LLM_TYPE_0_5B: return "0.5B";
|
||||
case LLM_TYPE_1B: return "1B";
|
||||
case LLM_TYPE_1_3B: return "1.3B";
|
||||
case LLM_TYPE_1_4B: return "1.4B";
|
||||
case LLM_TYPE_1_5B: return "1.5B";
|
||||
case LLM_TYPE_1_6B: return "1.6B";
|
||||
case LLM_TYPE_2B: return "2B";
|
||||
case LLM_TYPE_2_8B: return "2.8B";
|
||||
case LLM_TYPE_3B: return "3B";
|
||||
case LLM_TYPE_4B: return "4B";
|
||||
case LLM_TYPE_6B: return "6B";
|
||||
case LLM_TYPE_6_9B: return "6.9B";
|
||||
case LLM_TYPE_7B: return "7B";
|
||||
case LLM_TYPE_8B: return "8B";
|
||||
case LLM_TYPE_9B: return "9B";
|
||||
case LLM_TYPE_11B: return "11B";
|
||||
case LLM_TYPE_12B: return "12B";
|
||||
case LLM_TYPE_13B: return "13B";
|
||||
case LLM_TYPE_14B: return "14B";
|
||||
case LLM_TYPE_15B: return "15B";
|
||||
case LLM_TYPE_16B: return "16B";
|
||||
case LLM_TYPE_20B: return "20B";
|
||||
case LLM_TYPE_30B: return "30B";
|
||||
case LLM_TYPE_32B: return "32B";
|
||||
case LLM_TYPE_34B: return "34B";
|
||||
case LLM_TYPE_35B: return "35B";
|
||||
case LLM_TYPE_40B: return "40B";
|
||||
case LLM_TYPE_65B: return "65B";
|
||||
case LLM_TYPE_70B: return "70B";
|
||||
case LLM_TYPE_236B: return "236B";
|
||||
case LLM_TYPE_314B: return "314B";
|
||||
case LLM_TYPE_671B: return "671B";
|
||||
case LLM_TYPE_SMALL: return "0.1B";
|
||||
case LLM_TYPE_MEDIUM: return "0.4B";
|
||||
case LLM_TYPE_LARGE: return "0.8B";
|
||||
case LLM_TYPE_XL: return "1.5B";
|
||||
case LLM_TYPE_A1_7B: return "A1.7B";
|
||||
case LLM_TYPE_A2_7B: return "A2.7B";
|
||||
case LLM_TYPE_8x7B: return "8x7B";
|
||||
case LLM_TYPE_8x22B: return "8x22B";
|
||||
case LLM_TYPE_16x12B: return "16x12B";
|
||||
case LLM_TYPE_16x3_8B: return "16x3.8B";
|
||||
case LLM_TYPE_10B_128x3_66B: return "10B+128x3.66B";
|
||||
case LLM_TYPE_57B_A14B: return "57B.A14B";
|
||||
case LLM_TYPE_27B: return "27B";
|
||||
default: return "?B";
|
||||
}
|
||||
}
|
||||
|
@ -336,6 +336,9 @@ static buft_list_t make_gpu_buft_list(ggml_backend_dev_t dev, enum llama_split_m
|
|||
|
||||
|
||||
struct llama_model::impl {
|
||||
impl() {}
|
||||
~impl() {}
|
||||
|
||||
uint64_t n_elements = 0;
|
||||
|
||||
size_t n_bytes = 0;
|
||||
|
@ -366,7 +369,6 @@ struct llama_model::impl {
|
|||
layer_dev dev_input = {};
|
||||
layer_dev dev_output = {};
|
||||
std::vector<layer_dev> dev_layer;
|
||||
|
||||
};
|
||||
|
||||
llama_model::llama_model(const struct llama_model_params & params) : params(params), pimpl(std::make_unique<impl>()) {
|
||||
|
@ -501,8 +503,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
hparams.n_embd_head_v = 0;
|
||||
}
|
||||
|
||||
using e_model = llm_type; // TMP
|
||||
|
||||
// arch-specific KVs
|
||||
switch (arch) {
|
||||
case LLM_ARCH_LLAMA:
|
||||
|
@ -511,24 +511,24 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
|
||||
if (hparams.n_expert == 8) {
|
||||
switch (hparams.n_layer) {
|
||||
case 32: type = e_model::MODEL_8x7B; break;
|
||||
case 56: type = e_model::MODEL_8x22B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 32: type = LLM_TYPE_8x7B; break;
|
||||
case 56: type = LLM_TYPE_8x22B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} else {
|
||||
switch (hparams.n_layer) {
|
||||
case 16: type = e_model::MODEL_1B; break; // Llama 3.2 1B
|
||||
case 22: type = e_model::MODEL_1B; break;
|
||||
case 26: type = e_model::MODEL_3B; break;
|
||||
case 28: type = e_model::MODEL_3B; break; // Llama 3.2 3B
|
||||
case 16: type = LLM_TYPE_1B; break; // Llama 3.2 1B
|
||||
case 22: type = LLM_TYPE_1B; break;
|
||||
case 26: type = LLM_TYPE_3B; break;
|
||||
case 28: type = LLM_TYPE_3B; break; // Llama 3.2 3B
|
||||
// granite uses a vocab with len 49152
|
||||
case 32: type = hparams.n_vocab == 49152 ? e_model::MODEL_3B : (hparams.n_vocab < 40000 ? e_model::MODEL_7B : e_model::MODEL_8B); break;
|
||||
case 36: type = e_model::MODEL_8B; break; // granite
|
||||
case 40: type = e_model::MODEL_13B; break;
|
||||
case 48: type = e_model::MODEL_34B; break;
|
||||
case 60: type = e_model::MODEL_30B; break;
|
||||
case 80: type = hparams.n_head() == hparams.n_head_kv() ? e_model::MODEL_65B : e_model::MODEL_70B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 32: type = hparams.n_vocab == 49152 ? LLM_TYPE_3B : (hparams.n_vocab < 40000 ? LLM_TYPE_7B : LLM_TYPE_8B); break;
|
||||
case 36: type = LLM_TYPE_8B; break; // granite
|
||||
case 40: type = LLM_TYPE_13B; break;
|
||||
case 48: type = LLM_TYPE_34B; break;
|
||||
case 60: type = LLM_TYPE_30B; break;
|
||||
case 80: type = hparams.n_head() == hparams.n_head_kv() ? LLM_TYPE_65B : LLM_TYPE_70B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
}
|
||||
} break;
|
||||
|
@ -536,9 +536,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 32: type = e_model::MODEL_7B; break;
|
||||
case 80: type = e_model::MODEL_70B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 32: type = LLM_TYPE_7B; break;
|
||||
case 80: type = LLM_TYPE_70B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_MINICPM:
|
||||
|
@ -549,9 +549,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_LOGIT_SCALE, hparams.f_logit_scale);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 52: type = e_model::MODEL_1B; break;
|
||||
case 40: type = e_model::MODEL_2B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 52: type = LLM_TYPE_1B; break;
|
||||
case 40: type = LLM_TYPE_2B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_MINICPM3:
|
||||
|
@ -561,8 +561,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_KV_LORA_RANK, hparams.n_lora_kv);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 62: type = e_model::MODEL_4B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 62: type = LLM_TYPE_4B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_GROK:
|
||||
|
@ -570,8 +570,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 64: type = e_model::MODEL_314B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 64: type = LLM_TYPE_314B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_FALCON:
|
||||
|
@ -579,21 +579,21 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 32: type = e_model::MODEL_7B; break;
|
||||
case 60: type = e_model::MODEL_40B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 32: type = LLM_TYPE_7B; break;
|
||||
case 60: type = LLM_TYPE_40B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_BAICHUAN:
|
||||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 32: type = e_model::MODEL_7B; break;
|
||||
case 40: type = e_model::MODEL_13B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 32: type = LLM_TYPE_7B; break;
|
||||
case 40: type = LLM_TYPE_13B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
|
||||
if (type == e_model::MODEL_13B) {
|
||||
if (type == LLM_TYPE_13B) {
|
||||
// TODO: become GGUF KV parameter
|
||||
hparams.f_max_alibi_bias = 8.0f;
|
||||
}
|
||||
|
@ -602,19 +602,19 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 24: type = e_model::MODEL_1B; break;
|
||||
case 36: type = e_model::MODEL_3B; break;
|
||||
case 42: type = e_model::MODEL_7B; break;
|
||||
case 40: type = e_model::MODEL_15B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 24: type = LLM_TYPE_1B; break;
|
||||
case 36: type = LLM_TYPE_3B; break;
|
||||
case 42: type = LLM_TYPE_7B; break;
|
||||
case 40: type = LLM_TYPE_15B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_REFACT:
|
||||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 32: type = e_model::MODEL_1B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 32: type = LLM_TYPE_1B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
|
||||
// TODO: become GGUF KV parameter
|
||||
|
@ -629,18 +629,18 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
|
||||
switch (hparams.n_layer) {
|
||||
case 3:
|
||||
type = e_model::MODEL_17M; break; // bge-micro
|
||||
type = LLM_TYPE_17M; break; // bge-micro
|
||||
case 6:
|
||||
type = e_model::MODEL_22M; break; // MiniLM-L6
|
||||
type = LLM_TYPE_22M; break; // MiniLM-L6
|
||||
case 12:
|
||||
switch (hparams.n_embd) {
|
||||
case 384: type = e_model::MODEL_33M; break; // MiniLM-L12, bge-small
|
||||
case 768: type = e_model::MODEL_109M; break; // bge-base
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 384: type = LLM_TYPE_33M; break; // MiniLM-L12, bge-small
|
||||
case 768: type = LLM_TYPE_109M; break; // bge-base
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
} break;
|
||||
case 24:
|
||||
type = e_model::MODEL_335M; break; // bge-large
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
type = LLM_TYPE_335M; break; // bge-large
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_JINA_BERT_V2:
|
||||
|
@ -652,9 +652,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
hparams.f_max_alibi_bias = 8.0f;
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 4: type = e_model::MODEL_33M; break; // jina-embeddings-small
|
||||
case 12: type = e_model::MODEL_137M; break; // jina-embeddings-base
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 4: type = LLM_TYPE_33M; break; // jina-embeddings-small
|
||||
case 12: type = LLM_TYPE_137M; break; // jina-embeddings-base
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_NOMIC_BERT:
|
||||
|
@ -665,7 +665,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type);
|
||||
|
||||
if (hparams.n_layer == 12 && hparams.n_embd == 768) {
|
||||
type = e_model::MODEL_137M;
|
||||
type = LLM_TYPE_137M;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_BLOOM:
|
||||
|
@ -673,14 +673,14 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 24: type = e_model::MODEL_1B; break;
|
||||
case 24: type = LLM_TYPE_1B; break;
|
||||
case 30:
|
||||
switch (hparams.n_embd) {
|
||||
case 2560: type = e_model::MODEL_3B; break;
|
||||
case 4096: type = e_model::MODEL_7B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 2560: type = LLM_TYPE_3B; break;
|
||||
case 4096: type = LLM_TYPE_7B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
} break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
|
||||
// TODO: become GGUF KV parameter
|
||||
|
@ -693,9 +693,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_MAX_ALIBI_BIAS, hparams.f_max_alibi_bias);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 32: type = e_model::MODEL_7B; break;
|
||||
case 48: type = e_model::MODEL_30B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 32: type = LLM_TYPE_7B; break;
|
||||
case 48: type = LLM_TYPE_30B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_STABLELM:
|
||||
|
@ -703,10 +703,10 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 24: type = e_model::MODEL_1B; break;
|
||||
case 32: type = e_model::MODEL_3B; break;
|
||||
case 40: type = e_model::MODEL_12B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 24: type = LLM_TYPE_1B; break;
|
||||
case 32: type = LLM_TYPE_3B; break;
|
||||
case 40: type = LLM_TYPE_12B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_QWEN:
|
||||
|
@ -714,9 +714,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 32: type = e_model::MODEL_7B; break;
|
||||
case 40: type = e_model::MODEL_13B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 32: type = LLM_TYPE_7B; break;
|
||||
case 40: type = LLM_TYPE_13B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_QWEN2VL:
|
||||
|
@ -728,15 +728,15 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 24: type = hparams.n_embd == 1024 ? e_model::MODEL_0_5B : e_model::MODEL_1B; break;
|
||||
case 28: type = hparams.n_embd == 1536 ? e_model::MODEL_1_5B : e_model::MODEL_7B; break;
|
||||
case 32: type = e_model::MODEL_7B; break;
|
||||
case 36: type = e_model::MODEL_3B; break;
|
||||
case 40: type = hparams.n_head() == 20 ? e_model::MODEL_4B : e_model::MODEL_13B; break;
|
||||
case 48: type = e_model::MODEL_14B; break;
|
||||
case 64: type = e_model::MODEL_32B; break;
|
||||
case 80: type = e_model::MODEL_70B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 24: type = hparams.n_embd == 1024 ? LLM_TYPE_0_5B : LLM_TYPE_1B; break;
|
||||
case 28: type = hparams.n_embd == 1536 ? LLM_TYPE_1_5B : LLM_TYPE_7B; break;
|
||||
case 32: type = LLM_TYPE_7B; break;
|
||||
case 36: type = LLM_TYPE_3B; break;
|
||||
case 40: type = hparams.n_head() == 20 ? LLM_TYPE_4B : LLM_TYPE_13B; break;
|
||||
case 48: type = LLM_TYPE_14B; break;
|
||||
case 64: type = LLM_TYPE_32B; break;
|
||||
case 80: type = LLM_TYPE_70B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_QWEN2MOE:
|
||||
|
@ -746,9 +746,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 24: type = e_model::MODEL_A2_7B; break;
|
||||
case 28: type = e_model::MODEL_57B_A14B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 24: type = LLM_TYPE_A2_7B; break;
|
||||
case 28: type = LLM_TYPE_57B_A14B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_PHI2:
|
||||
|
@ -756,9 +756,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 24: type = e_model::MODEL_1B; break;
|
||||
case 32: type = e_model::MODEL_3B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 24: type = LLM_TYPE_1B; break;
|
||||
case 32: type = LLM_TYPE_3B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_PHI3:
|
||||
|
@ -766,10 +766,10 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 24: type = e_model::MODEL_1B; break;
|
||||
case 32: type = e_model::MODEL_3B; break;
|
||||
case 40: type = e_model::MODEL_14B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 24: type = LLM_TYPE_1B; break;
|
||||
case 32: type = LLM_TYPE_3B; break;
|
||||
case 40: type = LLM_TYPE_14B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
|
||||
// for backward compatibility ; see: https://github.com/ggerganov/llama.cpp/pull/8931
|
||||
|
@ -802,27 +802,27 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 40: type = e_model::MODEL_13B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 40: type = LLM_TYPE_13B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_GPT2:
|
||||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 12: type = e_model::MODEL_SMALL; break;
|
||||
case 24: type = e_model::MODEL_MEDIUM; break;
|
||||
case 36: type = e_model::MODEL_LARGE; break;
|
||||
case 48: type = e_model::MODEL_XL; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 12: type = LLM_TYPE_SMALL; break;
|
||||
case 24: type = LLM_TYPE_MEDIUM; break;
|
||||
case 36: type = LLM_TYPE_LARGE; break;
|
||||
case 48: type = LLM_TYPE_XL; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_CODESHELL:
|
||||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 42: type = e_model::MODEL_7B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 42: type = LLM_TYPE_7B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_ORION:
|
||||
|
@ -830,17 +830,17 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 40: type = e_model::MODEL_14B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 40: type = LLM_TYPE_14B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_INTERNLM2:
|
||||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 32: type = e_model::MODEL_7B; break;
|
||||
case 48: type = e_model::MODEL_20B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 32: type = LLM_TYPE_7B; break;
|
||||
case 48: type = LLM_TYPE_20B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_GEMMA:
|
||||
|
@ -848,9 +848,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 18: type = e_model::MODEL_2B; break;
|
||||
case 28: type = e_model::MODEL_7B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 18: type = LLM_TYPE_2B; break;
|
||||
case 28: type = LLM_TYPE_7B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_GEMMA2:
|
||||
|
@ -863,22 +863,22 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
hparams.attn_soft_cap = true;
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 26: type = e_model::MODEL_2B; break;
|
||||
case 42: type = e_model::MODEL_9B; break;
|
||||
case 46: type = e_model::MODEL_27B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 26: type = LLM_TYPE_2B; break;
|
||||
case 42: type = LLM_TYPE_9B; break;
|
||||
case 46: type = LLM_TYPE_27B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_STARCODER2:
|
||||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 30: type = e_model::MODEL_3B; break;
|
||||
case 32: type = e_model::MODEL_7B; break;
|
||||
case 40: type = e_model::MODEL_15B; break;
|
||||
case 52: type = e_model::MODEL_20B; break; // granite
|
||||
case 88: type = e_model::MODEL_34B; break; // granite
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 30: type = LLM_TYPE_3B; break;
|
||||
case 32: type = LLM_TYPE_7B; break;
|
||||
case 40: type = LLM_TYPE_15B; break;
|
||||
case 52: type = LLM_TYPE_20B; break; // granite
|
||||
case 88: type = LLM_TYPE_34B; break; // granite
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_MAMBA:
|
||||
|
@ -894,32 +894,32 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
switch (hparams.n_layer) {
|
||||
case 24:
|
||||
switch (hparams.n_embd) {
|
||||
case 768: type = e_model::MODEL_SMALL; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 768: type = LLM_TYPE_SMALL; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
} break;
|
||||
case 48:
|
||||
switch (hparams.n_embd) {
|
||||
case 1024: type = e_model::MODEL_MEDIUM; break;
|
||||
case 1536: type = e_model::MODEL_LARGE; break;
|
||||
case 2048: type = e_model::MODEL_XL; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 1024: type = LLM_TYPE_MEDIUM; break;
|
||||
case 1536: type = LLM_TYPE_LARGE; break;
|
||||
case 2048: type = LLM_TYPE_XL; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
} break;
|
||||
case 64:
|
||||
switch (hparams.n_embd) {
|
||||
case 2560: type = e_model::MODEL_3B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 2560: type = LLM_TYPE_3B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
} break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_XVERSE:
|
||||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 32: type = e_model::MODEL_7B; break;
|
||||
case 40: type = e_model::MODEL_13B; break;
|
||||
case 80: type = e_model::MODEL_65B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 32: type = LLM_TYPE_7B; break;
|
||||
case 40: type = LLM_TYPE_13B; break;
|
||||
case 80: type = LLM_TYPE_65B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_COMMAND_R:
|
||||
|
@ -927,8 +927,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_LOGIT_SCALE, hparams.f_logit_scale);
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 40: type = e_model::MODEL_35B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 40: type = LLM_TYPE_35B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_COHERE2:
|
||||
|
@ -937,8 +937,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_LOGIT_SCALE, hparams.f_logit_scale);
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 32: type = e_model::MODEL_8B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 32: type = LLM_TYPE_8B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_DBRX:
|
||||
|
@ -947,8 +947,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_CLAMP_KQV, hparams.f_clamp_kqv);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 40: type = e_model::MODEL_16x12B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 40: type = LLM_TYPE_16x12B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_OLMO:
|
||||
|
@ -957,10 +957,10 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_CLAMP_KQV, hparams.f_clamp_kqv, false);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 22: type = e_model::MODEL_1B; break;
|
||||
case 32: type = e_model::MODEL_7B; break;
|
||||
case 80: type = e_model::MODEL_70B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 22: type = LLM_TYPE_1B; break;
|
||||
case 32: type = LLM_TYPE_7B; break;
|
||||
case 80: type = LLM_TYPE_70B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_OLMO2:
|
||||
|
@ -968,18 +968,18 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 16: type = e_model::MODEL_1B; break;
|
||||
case 32: type = e_model::MODEL_7B; break;
|
||||
case 40: type = e_model::MODEL_13B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 16: type = LLM_TYPE_1B; break;
|
||||
case 32: type = LLM_TYPE_7B; break;
|
||||
case 40: type = LLM_TYPE_13B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_OLMOE:
|
||||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 16: type = e_model::MODEL_A1_7B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 16: type = LLM_TYPE_A1_7B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_OPENELM:
|
||||
|
@ -987,11 +987,11 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 16: type = e_model::MODEL_270M; break;
|
||||
case 20: type = e_model::MODEL_450M; break;
|
||||
case 28: type = e_model::MODEL_1B; break;
|
||||
case 36: type = e_model::MODEL_3B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 16: type = LLM_TYPE_270M; break;
|
||||
case 20: type = LLM_TYPE_450M; break;
|
||||
case 28: type = LLM_TYPE_1B; break;
|
||||
case 36: type = LLM_TYPE_3B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_GPTNEOX:
|
||||
|
@ -1001,43 +1001,43 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
switch (hparams.n_layer) {
|
||||
case 6:
|
||||
switch (hparams.n_ff()) {
|
||||
case 512: type = e_model::MODEL_14M; break;
|
||||
case 2048: type = e_model::MODEL_70M; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 512: type = LLM_TYPE_14M; break;
|
||||
case 2048: type = LLM_TYPE_70M; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
} break;
|
||||
case 12:
|
||||
switch (hparams.n_ff()) {
|
||||
case 3072: type = e_model::MODEL_160M; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 3072: type = LLM_TYPE_160M; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
} break;
|
||||
case 16:
|
||||
switch (hparams.n_ff()) {
|
||||
case 8192: type = e_model::MODEL_1B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 8192: type = LLM_TYPE_1B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
} break;
|
||||
case 24:
|
||||
switch (hparams.n_ff()) {
|
||||
case 4096: type = e_model::MODEL_410M; break;
|
||||
case 8192: type = e_model::MODEL_1_4B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 4096: type = LLM_TYPE_410M; break;
|
||||
case 8192: type = LLM_TYPE_1_4B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
} break;
|
||||
case 32:
|
||||
switch (hparams.n_ff()) {
|
||||
case 10240: type = e_model::MODEL_2_8B; break;
|
||||
case 16384: type = e_model::MODEL_6_9B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 10240: type = LLM_TYPE_2_8B; break;
|
||||
case 16384: type = LLM_TYPE_6_9B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
} break;
|
||||
case 36:
|
||||
switch (hparams.n_ff()) {
|
||||
case 20480: type = e_model::MODEL_12B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 20480: type = LLM_TYPE_12B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
} break;
|
||||
case 44:
|
||||
switch (hparams.n_ff()) {
|
||||
case 24576: type = e_model::MODEL_20B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 24576: type = LLM_TYPE_20B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
} break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_ARCTIC:
|
||||
|
@ -1046,11 +1046,11 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
|
||||
if (hparams.n_expert == 128) {
|
||||
switch (hparams.n_layer) {
|
||||
case 35: type = e_model::MODEL_10B_128x3_66B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 35: type = LLM_TYPE_10B_128x3_66B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} else {
|
||||
type = e_model::MODEL_UNKNOWN;
|
||||
type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_DEEPSEEK:
|
||||
|
@ -1062,8 +1062,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 28: type = e_model::MODEL_20B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 28: type = LLM_TYPE_20B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_DEEPSEEK2:
|
||||
|
@ -1088,19 +1088,19 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 27: type = e_model::MODEL_16B; break;
|
||||
case 60: type = e_model::MODEL_236B; break;
|
||||
case 61: type = e_model::MODEL_671B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 27: type = LLM_TYPE_16B; break;
|
||||
case 60: type = LLM_TYPE_236B; break;
|
||||
case 61: type = LLM_TYPE_671B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_CHATGLM:
|
||||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 28: type = e_model::MODEL_6B; break;
|
||||
case 40: type = e_model::MODEL_9B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 28: type = LLM_TYPE_6B; break;
|
||||
case 40: type = LLM_TYPE_9B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_BITNET:
|
||||
|
@ -1108,8 +1108,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 26: type = e_model::MODEL_3B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 26: type = LLM_TYPE_3B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_T5:
|
||||
|
@ -1123,32 +1123,32 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
}
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 6: type = e_model::MODEL_60M; break; // t5-small
|
||||
case 8: type = e_model::MODEL_80M; break; // flan-t5-small
|
||||
case 6: type = LLM_TYPE_60M; break; // t5-small
|
||||
case 8: type = LLM_TYPE_80M; break; // flan-t5-small
|
||||
case 12:
|
||||
switch (hparams.n_ff()) {
|
||||
case 3072: type = e_model::MODEL_220M; break; // t5-base
|
||||
case 2048: type = e_model::MODEL_250M; break; // flan-t5-base
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 3072: type = LLM_TYPE_220M; break; // t5-base
|
||||
case 2048: type = LLM_TYPE_250M; break; // flan-t5-base
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
} break;
|
||||
case 24:
|
||||
switch (hparams.n_ff()) {
|
||||
case 4096: type = e_model::MODEL_770M; break; // t5-large
|
||||
case 2816: type = e_model::MODEL_780M; break; // flan-t5-large
|
||||
case 16384: type = e_model::MODEL_3B; break; // t5-3b
|
||||
case 5120: type = e_model::MODEL_3B; break; // flan-t5-xl
|
||||
case 65536: type = e_model::MODEL_11B; break; // t5-11b
|
||||
case 10240: type = e_model::MODEL_11B; break; // flan-t5-xxl
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 4096: type = LLM_TYPE_770M; break; // t5-large
|
||||
case 2816: type = LLM_TYPE_780M; break; // flan-t5-large
|
||||
case 16384: type = LLM_TYPE_3B; break; // t5-3b
|
||||
case 5120: type = LLM_TYPE_3B; break; // flan-t5-xl
|
||||
case 65536: type = LLM_TYPE_11B; break; // t5-11b
|
||||
case 10240: type = LLM_TYPE_11B; break; // flan-t5-xxl
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
} break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_T5ENCODER:
|
||||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
ml.get_key(LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, hparams.n_rel_attn_bkts);
|
||||
type = e_model::MODEL_UNKNOWN;
|
||||
type = LLM_TYPE_UNKNOWN;
|
||||
} break;
|
||||
case LLM_ARCH_JAIS:
|
||||
{
|
||||
|
@ -1156,18 +1156,18 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_MAX_ALIBI_BIAS, hparams.f_max_alibi_bias);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 24: type = e_model::MODEL_1_3B; break;
|
||||
case 40: type = e_model::MODEL_13B; break;
|
||||
case 24: type = LLM_TYPE_1_3B; break;
|
||||
case 40: type = LLM_TYPE_13B; break;
|
||||
/* TODO: add variants */
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_NEMOTRON:
|
||||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 32: type = e_model::MODEL_4B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 32: type = LLM_TYPE_4B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_EXAONE:
|
||||
|
@ -1175,8 +1175,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 32: type = e_model::MODEL_8B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 32: type = LLM_TYPE_8B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_RWKV6:
|
||||
|
@ -1188,15 +1188,15 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_RESCALE_EVERY_N_LAYERS, hparams.rescale_every_n_layers, false);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 24: type = e_model::MODEL_1_6B; break;
|
||||
case 24: type = LLM_TYPE_1_6B; break;
|
||||
case 32:
|
||||
switch (hparams.n_embd) {
|
||||
case 2560: type = e_model::MODEL_3B; break;
|
||||
case 4096: type = e_model::MODEL_7B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 2560: type = LLM_TYPE_3B; break;
|
||||
case 4096: type = LLM_TYPE_7B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
} break;
|
||||
case 61: type = e_model::MODEL_14B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 61: type = LLM_TYPE_14B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_GRANITE:
|
||||
|
@ -1209,10 +1209,10 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_ATTENTION_SCALE, hparams.f_attention_scale);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 32: type = e_model::MODEL_3B; break;
|
||||
case 40: type = e_model::MODEL_3B; break;
|
||||
case 32: type = LLM_TYPE_3B; break;
|
||||
case 40: type = LLM_TYPE_3B; break;
|
||||
// Add additional layer/vocab/etc checks here for other model sizes
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_CHAMELEON:
|
||||
|
@ -1222,9 +1222,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
ml.get_key(LLM_KV_SWIN_NORM, hparams.swin_norm);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 32: type = e_model::MODEL_7B; break;
|
||||
case 48: type = e_model::MODEL_34B; break;
|
||||
default: type = e_model::MODEL_UNKNOWN;
|
||||
case 32: type = LLM_TYPE_7B; break;
|
||||
case 48: type = LLM_TYPE_34B; break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_WAVTOKENIZER_DEC:
|
||||
|
|
|
@ -13,72 +13,71 @@
|
|||
struct llama_model_loader;
|
||||
|
||||
// available models
|
||||
// TODO: this enum does not follow the enum naming convention
|
||||
enum llm_type {
|
||||
MODEL_UNKNOWN,
|
||||
MODEL_14M,
|
||||
MODEL_17M,
|
||||
MODEL_22M,
|
||||
MODEL_33M,
|
||||
MODEL_60M,
|
||||
MODEL_70M,
|
||||
MODEL_80M,
|
||||
MODEL_109M,
|
||||
MODEL_137M,
|
||||
MODEL_160M,
|
||||
MODEL_220M,
|
||||
MODEL_250M,
|
||||
MODEL_270M,
|
||||
MODEL_335M,
|
||||
MODEL_410M,
|
||||
MODEL_450M,
|
||||
MODEL_770M,
|
||||
MODEL_780M,
|
||||
MODEL_0_5B,
|
||||
MODEL_1B,
|
||||
MODEL_1_3B,
|
||||
MODEL_1_4B,
|
||||
MODEL_1_5B,
|
||||
MODEL_1_6B,
|
||||
MODEL_2B,
|
||||
MODEL_2_8B,
|
||||
MODEL_3B,
|
||||
MODEL_4B,
|
||||
MODEL_6B,
|
||||
MODEL_6_9B,
|
||||
MODEL_7B,
|
||||
MODEL_8B,
|
||||
MODEL_9B,
|
||||
MODEL_11B,
|
||||
MODEL_12B,
|
||||
MODEL_13B,
|
||||
MODEL_14B,
|
||||
MODEL_15B,
|
||||
MODEL_16B,
|
||||
MODEL_20B,
|
||||
MODEL_30B,
|
||||
MODEL_32B,
|
||||
MODEL_34B,
|
||||
MODEL_35B,
|
||||
MODEL_40B,
|
||||
MODEL_65B,
|
||||
MODEL_70B,
|
||||
MODEL_236B,
|
||||
MODEL_314B,
|
||||
MODEL_671B,
|
||||
MODEL_SMALL,
|
||||
MODEL_MEDIUM,
|
||||
MODEL_LARGE,
|
||||
MODEL_XL,
|
||||
MODEL_A1_7B,
|
||||
MODEL_A2_7B,
|
||||
MODEL_8x7B,
|
||||
MODEL_8x22B,
|
||||
MODEL_16x12B,
|
||||
MODEL_16x3_8B,
|
||||
MODEL_10B_128x3_66B,
|
||||
MODEL_57B_A14B,
|
||||
MODEL_27B,
|
||||
LLM_TYPE_UNKNOWN,
|
||||
LLM_TYPE_14M,
|
||||
LLM_TYPE_17M,
|
||||
LLM_TYPE_22M,
|
||||
LLM_TYPE_33M,
|
||||
LLM_TYPE_60M,
|
||||
LLM_TYPE_70M,
|
||||
LLM_TYPE_80M,
|
||||
LLM_TYPE_109M,
|
||||
LLM_TYPE_137M,
|
||||
LLM_TYPE_160M,
|
||||
LLM_TYPE_220M,
|
||||
LLM_TYPE_250M,
|
||||
LLM_TYPE_270M,
|
||||
LLM_TYPE_335M,
|
||||
LLM_TYPE_410M,
|
||||
LLM_TYPE_450M,
|
||||
LLM_TYPE_770M,
|
||||
LLM_TYPE_780M,
|
||||
LLM_TYPE_0_5B,
|
||||
LLM_TYPE_1B,
|
||||
LLM_TYPE_1_3B,
|
||||
LLM_TYPE_1_4B,
|
||||
LLM_TYPE_1_5B,
|
||||
LLM_TYPE_1_6B,
|
||||
LLM_TYPE_2B,
|
||||
LLM_TYPE_2_8B,
|
||||
LLM_TYPE_3B,
|
||||
LLM_TYPE_4B,
|
||||
LLM_TYPE_6B,
|
||||
LLM_TYPE_6_9B,
|
||||
LLM_TYPE_7B,
|
||||
LLM_TYPE_8B,
|
||||
LLM_TYPE_9B,
|
||||
LLM_TYPE_11B,
|
||||
LLM_TYPE_12B,
|
||||
LLM_TYPE_13B,
|
||||
LLM_TYPE_14B,
|
||||
LLM_TYPE_15B,
|
||||
LLM_TYPE_16B,
|
||||
LLM_TYPE_20B,
|
||||
LLM_TYPE_30B,
|
||||
LLM_TYPE_32B,
|
||||
LLM_TYPE_34B,
|
||||
LLM_TYPE_35B,
|
||||
LLM_TYPE_40B,
|
||||
LLM_TYPE_65B,
|
||||
LLM_TYPE_70B,
|
||||
LLM_TYPE_236B,
|
||||
LLM_TYPE_314B,
|
||||
LLM_TYPE_671B,
|
||||
LLM_TYPE_SMALL,
|
||||
LLM_TYPE_MEDIUM,
|
||||
LLM_TYPE_LARGE,
|
||||
LLM_TYPE_XL,
|
||||
LLM_TYPE_A1_7B,
|
||||
LLM_TYPE_A2_7B,
|
||||
LLM_TYPE_8x7B,
|
||||
LLM_TYPE_8x22B,
|
||||
LLM_TYPE_16x12B,
|
||||
LLM_TYPE_16x3_8B,
|
||||
LLM_TYPE_10B_128x3_66B,
|
||||
LLM_TYPE_57B_A14B,
|
||||
LLM_TYPE_27B,
|
||||
};
|
||||
|
||||
struct llama_layer_posnet {
|
||||
|
@ -284,7 +283,7 @@ struct llama_layer {
|
|||
};
|
||||
|
||||
struct llama_model {
|
||||
llm_type type = MODEL_UNKNOWN;
|
||||
llm_type type = LLM_TYPE_UNKNOWN;
|
||||
llm_arch arch = LLM_ARCH_UNKNOWN;
|
||||
|
||||
std::string name = "n/a";
|
||||
|
|
|
@ -235,7 +235,7 @@ static ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_t
|
|||
else if ((ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q5_K_M) &&
|
||||
use_more_bits(qs.i_attention_wv, qs.n_attention_wv)) new_type = GGML_TYPE_Q6_K;
|
||||
else if (ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S && qs.i_attention_wv < 4) new_type = GGML_TYPE_Q5_K;
|
||||
if (qs.model.type == MODEL_70B) {
|
||||
if (qs.model.type == LLM_TYPE_70B) {
|
||||
// In the 70B model we have 8 heads sharing the same attn_v weights. As a result, the attn_v.weight tensor is
|
||||
// 8x smaller compared to attn_q.weight. Hence, we can get a nice boost in quantization accuracy with
|
||||
// nearly negligible increase in model size by quantizing this tensor with more bits:
|
||||
|
|
|
@ -1809,7 +1809,7 @@ struct llm_build_context {
|
|||
inpL = llm_build_inp_embd(ctx0, lctx, hparams, ubatch, model.tok_embd, cb);
|
||||
|
||||
// inp_pos - contains the positions
|
||||
struct ggml_tensor * inp_pos = model.type == MODEL_7B ? build_inp_pos() : nullptr;
|
||||
struct ggml_tensor * inp_pos = model.type == LLM_TYPE_7B ? build_inp_pos() : nullptr;
|
||||
|
||||
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
|
||||
struct ggml_tensor * KQ_mask = build_inp_KQ_mask();
|
||||
|
@ -1834,7 +1834,7 @@ struct llm_build_context {
|
|||
cb(Vcur, "Vcur", il);
|
||||
|
||||
switch (model.type) {
|
||||
case MODEL_7B:
|
||||
case LLM_TYPE_7B:
|
||||
Qcur = ggml_rope_ext(
|
||||
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos, nullptr,
|
||||
n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
|
||||
|
@ -1846,7 +1846,7 @@ struct llm_build_context {
|
|||
ext_factor, attn_factor, beta_fast, beta_slow
|
||||
);
|
||||
break;
|
||||
case MODEL_13B:
|
||||
case LLM_TYPE_13B:
|
||||
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd/n_head, n_head, n_tokens);
|
||||
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd/n_head, n_head, n_tokens);
|
||||
break;
|
||||
|
@ -4889,9 +4889,9 @@ struct llm_build_context {
|
|||
|
||||
// ref: https://github.com/google/gemma_pytorch/commit/03e657582d17cb5a8617ebf333c1c16f3694670e
|
||||
switch (model.type) {
|
||||
case llm_type::MODEL_2B:
|
||||
case llm_type::MODEL_9B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
|
||||
case llm_type::MODEL_27B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd / n_head))); break;
|
||||
case LLM_TYPE_2B:
|
||||
case LLM_TYPE_9B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head_k))); break;
|
||||
case LLM_TYPE_27B: Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd / n_head))); break;
|
||||
default: GGML_ABORT("fatal error");
|
||||
};
|
||||
cb(Qcur, "Qcur_scaled", il);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue