custom quantization schemas
This commit is contained in:
parent
4e96a812b3
commit
31e2f5668c
1 changed files with 115 additions and 0 deletions
115
quant.cfg
Normal file
115
quant.cfg
Normal file
|
@ -0,0 +1,115 @@
|
||||||
|
ftype=15
|
||||||
|
|
||||||
|
blk.12.ffn_down.weight=11
|
||||||
|
blk.12.ffn_up.weight=11
|
||||||
|
|
||||||
|
blk.13.ffn_down.weight=11
|
||||||
|
blk.13.ffn_up.weight=11
|
||||||
|
|
||||||
|
blk.14.ffn_down.weight=11
|
||||||
|
blk.14.ffn_up.weight=11
|
||||||
|
|
||||||
|
blk.15.ffn_down.weight=11
|
||||||
|
blk.15.ffn_up.weight=11
|
||||||
|
|
||||||
|
blk.16.ffn_up.weight=10
|
||||||
|
blk.17.ffn_up.weight=10
|
||||||
|
blk.18.ffn_up.weight=10
|
||||||
|
blk.19.ffn_up.weight=10
|
||||||
|
blk.20.ffn_up.weight=10
|
||||||
|
blk.21.ffn_up.weight=10
|
||||||
|
blk.22.ffn_up.weight=10
|
||||||
|
blk.23.ffn_up.weight=10
|
||||||
|
blk.24.ffn_up.weight=10
|
||||||
|
blk.25.ffn_up.weight=10
|
||||||
|
|
||||||
|
blk.16.ffn_down.weight=10
|
||||||
|
blk.17.ffn_down.weight=10
|
||||||
|
blk.18.ffn_down.weight=10
|
||||||
|
blk.19.ffn_down.weight=10
|
||||||
|
blk.20.ffn_down.weight=10
|
||||||
|
blk.21.ffn_down.weight=10
|
||||||
|
blk.22.ffn_down.weight=10
|
||||||
|
blk.23.ffn_down.weight=10
|
||||||
|
blk.24.ffn_down.weight=10
|
||||||
|
blk.25.ffn_down.weight=10
|
||||||
|
|
||||||
|
blk.26.ffn_down.weight=10
|
||||||
|
blk.26.ffn_up.weight=10
|
||||||
|
|
||||||
|
blk.27.ffn_down.weight=11
|
||||||
|
blk.27.ffn_up.weight=11
|
||||||
|
|
||||||
|
blk.28.ffn_down.weight=11
|
||||||
|
blk.28.ffn_up.weight=11
|
||||||
|
|
||||||
|
blk.29.ffn_down.weight=11
|
||||||
|
blk.29.ffn_up.weight=11
|
||||||
|
|
||||||
|
token_embd.weight=21
|
||||||
|
output.weight=21
|
||||||
|
|
||||||
|
# LLAMA_FTYPE_ALL_F32 = 0,
|
||||||
|
# LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
|
||||||
|
# // LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed
|
||||||
|
# // LLAMA_FTYPE_MOSTLY_Q4_3 = 6, // support has been removed
|
||||||
|
# LLAMA_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_Q3_K_S = 11, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_Q3_K_M = 12, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_Q3_K_L = 13, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_Q4_K_S = 14, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_Q4_K_M = 15, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_Q5_K_S = 16, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_Q5_K_M = 17, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_Q6_K = 18, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_IQ3_XS = 22, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_IQ1_S = 24, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_IQ4_NL = 25, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_IQ3_S = 26, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_IQ3_M = 27, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_IQ2_S = 28, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors
|
||||||
|
# LLAMA_FTYPE_MOSTLY_IQ1_M = 31, // except 1d tensors
|
||||||
|
|
||||||
|
# GGML_TYPE_F32 = 0,
|
||||||
|
# GGML_TYPE_F16 = 1,
|
||||||
|
# GGML_TYPE_Q4_0 = 2,
|
||||||
|
# GGML_TYPE_Q4_1 = 3,
|
||||||
|
# // GGML_TYPE_Q4_2 = 4, support has been removed
|
||||||
|
# // GGML_TYPE_Q4_3 = 5, support has been removed
|
||||||
|
# GGML_TYPE_Q5_0 = 6,
|
||||||
|
# GGML_TYPE_Q5_1 = 7,
|
||||||
|
# GGML_TYPE_Q8_0 = 8,
|
||||||
|
# GGML_TYPE_Q8_1 = 9,
|
||||||
|
# GGML_TYPE_Q2_K = 10,
|
||||||
|
# GGML_TYPE_Q3_K = 11,
|
||||||
|
# GGML_TYPE_Q4_K = 12,
|
||||||
|
# GGML_TYPE_Q5_K = 13,
|
||||||
|
# GGML_TYPE_Q6_K = 14,
|
||||||
|
# GGML_TYPE_Q8_K = 15,
|
||||||
|
# GGML_TYPE_IQ2_XXS = 16,
|
||||||
|
# GGML_TYPE_IQ2_XS = 17,
|
||||||
|
# GGML_TYPE_IQ3_XXS = 18,
|
||||||
|
# GGML_TYPE_IQ1_S = 19,
|
||||||
|
# GGML_TYPE_IQ4_NL = 20,
|
||||||
|
# GGML_TYPE_IQ3_S = 21,
|
||||||
|
# GGML_TYPE_IQ2_S = 22,
|
||||||
|
# GGML_TYPE_IQ4_XS = 23,
|
||||||
|
# GGML_TYPE_I8 = 24,
|
||||||
|
# GGML_TYPE_I16 = 25,
|
||||||
|
# GGML_TYPE_I32 = 26,
|
||||||
|
# GGML_TYPE_I64 = 27,
|
||||||
|
# GGML_TYPE_F64 = 28,
|
||||||
|
# GGML_TYPE_IQ1_M = 29,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue