allow wildcards for tensor names
This commit is contained in:
parent
054e73e021
commit
6e09a26504
3 changed files with 75 additions and 50 deletions
|
@ -263,7 +263,6 @@ static bool read_custom_quant_config(const std::string& filename, llama_model_qu
|
|||
while (getline(file, line)) {
|
||||
// Skip empty lines and comments
|
||||
if (line.empty() || line[0] == '#') continue;
|
||||
printf(" %s\n", line.c_str());
|
||||
|
||||
// default file type
|
||||
if (line.find("ftype=") == 0) {
|
||||
|
|
24
llama.cpp
24
llama.cpp
|
@ -14196,6 +14196,26 @@ static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const floa
|
|||
return new_size;
|
||||
}
|
||||
|
||||
static bool match_string(const std::string& str, const std::string& pattern, uint32_t string_index = 0, uint32_t pattern_index = 0) {
|
||||
// if both index pointers reach the end of str and pattern respectively
|
||||
if (string_index == str.size() && pattern_index == pattern.size()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// if pattern character is '*', it can match with any sequence of characters.
|
||||
if (pattern_index < pattern.size() && pattern[pattern_index] == '*') {
|
||||
// move pattern index by 1 and match rest, or keep string index same and move pattern index
|
||||
return match_string(str, pattern, string_index, pattern_index + 1) || (string_index < str.size() && match_string(str, pattern, string_index + 1, pattern_index));
|
||||
}
|
||||
|
||||
// if current characters match or pattern character is '?'
|
||||
if (string_index < str.size() && pattern_index < pattern.size() && (str[string_index] == pattern[pattern_index] || pattern[pattern_index] == '?')) {
|
||||
return match_string(str, pattern, string_index + 1, pattern_index + 1);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void llama_model_quantize_internal(const std::string & fname_inp, const std::string & fname_out, const llama_model_quantize_params * params) {
|
||||
ggml_type default_type;
|
||||
|
||||
|
@ -14428,8 +14448,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||
// type as determined by the ftype.
|
||||
if(params->override_ftype) {
|
||||
for (uint32_t i = 0; i < params->override_ftype->count; ++i) {
|
||||
if (strcmp(params->override_ftype->names[i], tensor->name) == 0) {
|
||||
//LLAMA_LOG_INFO("\n%s: %s %s ---> %s\n", __func__, tensor->name, ggml_type_name(new_type), ggml_type_name(params->override_ftype->types[i]));
|
||||
if (match_string(tensor->name, params->override_ftype->names[i])) {
|
||||
// printf("\n -----> %s, %s\n", params->override_ftype->names[i], tensor->name);
|
||||
new_type = params->override_ftype->types[i];
|
||||
break;
|
||||
}
|
||||
|
|
100
quant.cfg
100
quant.cfg
|
@ -1,54 +1,60 @@
|
|||
ftype=15
|
||||
# this defines the default ftype (the quantization mix code,
|
||||
# that you pass to quantize if you're not using custom mix).
|
||||
# tensors that are not overriden below will be quantized
|
||||
# according to this scheme.
|
||||
|
||||
blk.12.ffn_down.weight=11
|
||||
blk.12.ffn_up.weight=11
|
||||
ftype=7
|
||||
|
||||
# allowed values:
|
||||
# LLAMA_FTYPE_ALL_F32 = 0,
|
||||
# LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
|
||||
# // LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed
|
||||
# // LLAMA_FTYPE_MOSTLY_Q4_3 = 6, // support has been removed
|
||||
# LLAMA_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_Q3_K_S = 11, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_Q3_K_M = 12, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_Q3_K_L = 13, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_Q4_K_S = 14, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_Q4_K_M = 15, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_Q5_K_S = 16, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_Q5_K_M = 17, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_Q6_K = 18, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_IQ3_XS = 22, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_IQ1_S = 24, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_IQ4_NL = 25, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_IQ3_S = 26, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_IQ3_M = 27, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_IQ2_S = 28, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_IQ1_M = 31, // except 1d tensors
|
||||
|
||||
blk.13.ffn_down.weight=11
|
||||
blk.13.ffn_up.weight=11
|
||||
# this defines an override for tensors with names matching
|
||||
# a given string. filters are processed in order given, and the
|
||||
# first matching will be used.
|
||||
# Wildcards are allowed:
|
||||
# ? single character
|
||||
# * multiple characters
|
||||
|
||||
blk.14.ffn_down.weight=11
|
||||
blk.14.ffn_up.weight=11
|
||||
|
||||
blk.15.ffn_down.weight=11
|
||||
blk.15.ffn_up.weight=11
|
||||
|
||||
blk.16.ffn_up.weight=10
|
||||
blk.17.ffn_up.weight=10
|
||||
blk.18.ffn_up.weight=10
|
||||
blk.19.ffn_up.weight=10
|
||||
blk.20.ffn_up.weight=10
|
||||
blk.21.ffn_up.weight=10
|
||||
blk.22.ffn_up.weight=10
|
||||
blk.23.ffn_up.weight=10
|
||||
blk.24.ffn_up.weight=10
|
||||
blk.25.ffn_up.weight=10
|
||||
|
||||
blk.16.ffn_down.weight=10
|
||||
blk.17.ffn_down.weight=10
|
||||
blk.18.ffn_down.weight=10
|
||||
blk.19.ffn_down.weight=10
|
||||
blk.20.ffn_down.weight=10
|
||||
blk.21.ffn_down.weight=10
|
||||
blk.22.ffn_down.weight=10
|
||||
blk.23.ffn_down.weight=10
|
||||
blk.24.ffn_down.weight=10
|
||||
blk.25.ffn_down.weight=10
|
||||
|
||||
blk.26.ffn_down.weight=10
|
||||
blk.26.ffn_up.weight=10
|
||||
|
||||
blk.27.ffn_down.weight=11
|
||||
blk.27.ffn_up.weight=11
|
||||
|
||||
blk.28.ffn_down.weight=11
|
||||
blk.28.ffn_up.weight=11
|
||||
|
||||
blk.29.ffn_down.weight=11
|
||||
blk.29.ffn_up.weight=11
|
||||
|
||||
token_embd.weight=21
|
||||
output.weight=21
|
||||
blk.10.ffn_up.weight=7
|
||||
blk.1?.ffn_up.weight=10
|
||||
blk.2?.ffn_up.weight=10
|
||||
blk.1?.attn*=23
|
||||
blk.2?.attn*=23
|
||||
*down*=14
|
||||
*gate*=12
|
||||
|
||||
# allowed values:
|
||||
# LLAMA_FTYPE_ALL_F32 = 0,
|
||||
# LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
|
||||
# LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue