ggml : remove old quantization functions (#5942)

* ggml : remove old quantization functions

ggml-ci

* ggml : simplify ggml_quantize_chunk

ggml-ci

* ggml : restrict correctness

ggml-ci

* ggml : remove hist data from the quantization API

ggml-ci

* tests : remove hist usage in test-backend-ops

ggml-ci

* vulkan : remove hist and fix typo
This commit is contained in:
Georgi Gerganov 2024-03-09 15:53:59 +02:00 committed by GitHub
parent 97c09585d6
commit 5b09797321
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 131 additions and 568 deletions

View file

@ -4102,45 +4102,7 @@ static void ggml_vk_test_transfer(ggml_backend_vk_context * ctx, size_t ne, bool
}
static void ggml_vk_quantize_data(const float * from, void * to, size_t ne, ggml_type quant) {
std::vector<int64_t> hist_cur(1 << 4, 0);
switch(quant) {
case GGML_TYPE_F32:
memcpy(to, from, sizeof(float) * ne);
break;
case GGML_TYPE_Q4_0:
ggml_quantize_q4_0(from, to, ne, ne, hist_cur.data());
break;
case GGML_TYPE_Q4_1:
ggml_quantize_q4_1(from, to, ne, ne, hist_cur.data());
break;
case GGML_TYPE_Q5_0:
ggml_quantize_q5_0(from, to, ne, ne, hist_cur.data());
break;
case GGML_TYPE_Q5_1:
ggml_quantize_q5_1(from, to, ne, ne, hist_cur.data());
break;
case GGML_TYPE_Q8_0:
ggml_quantize_q8_0(from, to, ne, ne, hist_cur.data());
break;
case GGML_TYPE_Q2_K:
ggml_quantize_q2_K(from, to, ne, ne, hist_cur.data());
break;
case GGML_TYPE_Q3_K:
ggml_quantize_q3_K(from, to, ne, ne, hist_cur.data());
break;
case GGML_TYPE_Q4_K:
ggml_quantize_q4_K(from, to, ne, ne, hist_cur.data());
break;
case GGML_TYPE_Q5_K:
ggml_quantize_q5_K(from, to, ne, ne, hist_cur.data());
break;
case GGML_TYPE_Q6_K:
ggml_quantize_q6_K(from, to, ne, ne, hist_cur.data());
break;
default:
GGML_ASSERT(false);
}
ggml_quantize_chunk(quant, from, to, 0, 1, ne, nullptr);
}
static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_type quant) {