Update llama.cpp - Embeddings and output tensors strategy.

This commit is contained in:
Nexesenex 2024-03-25 20:37:11 +01:00 committed by GitHub
parent 51ff04e77e
commit 1c4da5ddac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -12447,6 +12447,10 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
if (arch == LLM_ARCH_FALCON || nx % QK_K != 0) {
new_type = GGML_TYPE_Q8_0;
}
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS) {
if (qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q5_K;
else new_type = GGML_TYPE_IQ4_XS;
}
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS ||
ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {
new_type = GGML_TYPE_Q5_K;
@ -12462,6 +12466,10 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S) {
new_type = GGML_TYPE_Q2_K;
}
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS) {
if (qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q2_K;
new_type = GGML_TYPE_IQ2_S;
}
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {
new_type = GGML_TYPE_IQ3_S;
}