Per convention, all QX_K quantizations use Q5_K for output.weight
This commit is contained in:
parent
b835d0f49f
commit
5c5191ab68
2 changed files with 3 additions and 0 deletions
2
ggml.c
2
ggml.c
|
@ -3497,6 +3497,7 @@ static const size_t GGML_TYPE_SIZE[GGML_TYPE_COUNT] = {
|
|||
[GGML_TYPE_Q5_1] = sizeof(block_q5_1),
|
||||
[GGML_TYPE_Q8_0] = sizeof(block_q8_0),
|
||||
[GGML_TYPE_Q8_1] = sizeof(block_q8_1),
|
||||
[GGML_TYPE_Q3_K] = sizeof(block_q3_K),
|
||||
[GGML_TYPE_Q4_K] = sizeof(block_q4_K),
|
||||
[GGML_TYPE_Q5_K] = sizeof(block_q5_K),
|
||||
[GGML_TYPE_Q6_K] = sizeof(block_q6_K),
|
||||
|
@ -3537,6 +3538,7 @@ static bool GGML_IS_QUANTIZED[GGML_TYPE_COUNT] = {
|
|||
[GGML_TYPE_Q5_1] = true,
|
||||
[GGML_TYPE_Q8_0] = true,
|
||||
[GGML_TYPE_Q8_1] = true,
|
||||
[GGML_TYPE_Q3_K] = true,
|
||||
[GGML_TYPE_Q4_K] = true,
|
||||
[GGML_TYPE_Q5_K] = true,
|
||||
[GGML_TYPE_Q6_K] = true,
|
||||
|
|
|
@ -2131,6 +2131,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||
printf("size = %8.3f MB\n", tensor.size/1024.0/1024.0);
|
||||
} else {
|
||||
new_type = quantized_type;
|
||||
if (tensor.name == "output.weight") new_type = GGML_TYPE_Q6_K;
|
||||
float * f32_data;
|
||||
size_t nelements = tensor.ne.at(0) * tensor.ne.at(1);
|
||||
llama_buffer f32_conv_buf;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue