By changing priorty between --token_embedding_type, --output_tensor_type and --pure, it is more friendly for user to define own quantization strategy
This commit is contained in:
parent
ba58993152
commit
d94eaa69d1
3 changed files with 33 additions and 5 deletions
|
@ -101,9 +101,9 @@ static void usage(const char * executable) {
|
||||||
printf(" --exclude-weights tensor_name: use importance matrix for this/these tensor(s)\n");
|
printf(" --exclude-weights tensor_name: use importance matrix for this/these tensor(s)\n");
|
||||||
printf(" --output-tensor-type ggml_type: use this ggml_type for the output.weight tensor\n");
|
printf(" --output-tensor-type ggml_type: use this ggml_type for the output.weight tensor\n");
|
||||||
printf(" --token-embedding-type ggml_type: use this ggml_type for the token embeddings tensor\n");
|
printf(" --token-embedding-type ggml_type: use this ggml_type for the token embeddings tensor\n");
|
||||||
printf(" --keep-split: will generate quatized model in the same shards as input");
|
printf(" --keep-split: will generate quatized model in the same shards as input\n");
|
||||||
printf(" --override-kv KEY=TYPE:VALUE\n");
|
printf(" --override-kv KEY=TYPE:VALUE\n");
|
||||||
printf(" Advanced option to override model metadata by key in the quantized model. May be specified multiple times.\n");
|
printf(" : Advanced option to override model metadata by key in the quantized model. May be specified multiple times.\n\n");
|
||||||
printf("Note: --include-weights and --exclude-weights cannot be used together\n");
|
printf("Note: --include-weights and --exclude-weights cannot be used together\n");
|
||||||
printf("\nAllowed quantization types:\n");
|
printf("\nAllowed quantization types:\n");
|
||||||
for (auto & it : QUANT_OPTIONS) {
|
for (auto & it : QUANT_OPTIONS) {
|
||||||
|
|
|
@ -15691,15 +15691,15 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||||
new_type = default_type;
|
new_type = default_type;
|
||||||
|
|
||||||
// get more optimal quantization type based on the tensor shape, layer, etc.
|
// get more optimal quantization type based on the tensor shape, layer, etc.
|
||||||
if (!params->pure && ggml_is_quantized(default_type)) {
|
|
||||||
new_type = llama_tensor_get_type(qs, new_type, tensor, ftype);
|
|
||||||
}
|
|
||||||
if (params->token_embedding_type < GGML_TYPE_COUNT && strcmp(tensor->name, "token_embd.weight") == 0) {
|
if (params->token_embedding_type < GGML_TYPE_COUNT && strcmp(tensor->name, "token_embd.weight") == 0) {
|
||||||
new_type = params->token_embedding_type;
|
new_type = params->token_embedding_type;
|
||||||
}
|
}
|
||||||
if (params->output_tensor_type < GGML_TYPE_COUNT && strcmp(tensor->name, "output.weight") == 0) {
|
if (params->output_tensor_type < GGML_TYPE_COUNT && strcmp(tensor->name, "output.weight") == 0) {
|
||||||
new_type = params->output_tensor_type;
|
new_type = params->output_tensor_type;
|
||||||
}
|
}
|
||||||
|
if (!params->pure && ggml_is_quantized(default_type)) {
|
||||||
|
new_type = llama_tensor_get_type(qs, new_type, tensor, ftype);
|
||||||
|
}
|
||||||
|
|
||||||
// If we've decided to quantize to the same type the tensor is already
|
// If we've decided to quantize to the same type the tensor is already
|
||||||
// in then there's nothing to do.
|
// in then there's nothing to do.
|
||||||
|
|
28
Normal file
28
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
export PATH=/usr/local/cuda-12.2/bin${PATH:+:${PATH}}
|
||||||
|
export LD_LIBRARY_PATH=/usr/local/cuda-12.2/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
|
||||||
|
export PATH=/local/mnt/workspace/miniconda3/bin:$PATH
|
||||||
|
export CUDA_VISIBLE_DEVICES=0,1,2,3ii:
|
||||||
|
:
|
||||||
|
:
|
||||||
|
# >>> conda initialize >>>
|
||||||
|
# !! Contents within this block are managed by 'conda init' !!
|
||||||
|
__conda_setup="$('/local/mnt/workspace/miniconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
eval "$__conda_setup"
|
||||||
|
else
|
||||||
|
if [ -f "/local/mnt/workspace/miniconda3/etc/profile.d/conda.sh" ]; then
|
||||||
|
. "/local/mnt/workspace/miniconda3/etc/profile.d/conda.sh"
|
||||||
|
else
|
||||||
|
export PATH="/local/mnt/workspace/miniconda3/bin:$PATH"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
unset __conda_setup
|
||||||
|
# <<< conda initialize <<<
|
||||||
|
|
||||||
|
# proxy setting
|
||||||
|
export http_proxy=http://secure-proxy-aprdc2-1.qualcomm.com:9090/
|
||||||
|
export https_proxy=http://secure-proxy-aprdc2-1.qualcomm.com:9090/
|
||||||
|
export ftp_proxy=http://secure-proxy-aprdc2-1.qualcomm.com:9090/
|
||||||
|
|
||||||
|
# HF cache
|
||||||
|
export HF_HOME=/local/mnt/zzx/.cache/huggingface
|
Loading…
Add table
Add a link
Reference in a new issue