ggml : generalize quantize_fns
for simpler FP16 handling (#1237)
* Generalize quantize_fns for simpler FP16 handling * Remove call to ggml_cuda_mul_mat_get_wsize * ci : disable FMA for mac os actions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
8567c76b53
commit
1b107b8550
9 changed files with 172 additions and 548 deletions
|
@ -40,26 +40,26 @@ float array_rmse(const float * a1, const float * a2, size_t n) {
|
|||
}
|
||||
|
||||
// Total quantization error on test data
|
||||
float total_quantization_error(quantize_fns_t & qfns, size_t test_size, const float * test_data) {
|
||||
float total_quantization_error(ggml_type_traits_t & qfns, size_t test_size, const float * test_data) {
|
||||
std::vector<uint8_t> tmp_q(2*test_size);
|
||||
std::vector<float> tmp_out(test_size);
|
||||
|
||||
qfns.quantize_row_q(test_data, tmp_q.data(), test_size);
|
||||
qfns.dequantize_row_q(tmp_q.data(), tmp_out.data(), test_size);
|
||||
qfns.from_float(test_data, tmp_q.data(), test_size);
|
||||
qfns.to_float(tmp_q.data(), tmp_out.data(), test_size);
|
||||
return array_rmse(test_data, tmp_out.data(), test_size);
|
||||
}
|
||||
|
||||
// Total quantization error on test data
|
||||
float reference_quantization_error(quantize_fns_t & qfns, size_t test_size, const float * test_data) {
|
||||
float reference_quantization_error(ggml_type_traits_t & qfns, size_t test_size, const float * test_data) {
|
||||
std::vector<uint8_t> tmp_q(2*test_size);
|
||||
std::vector<float> tmp_out(test_size);
|
||||
std::vector<float> tmp_out_ref(test_size);
|
||||
|
||||
qfns.quantize_row_q(test_data, tmp_q.data(), test_size);
|
||||
qfns.dequantize_row_q(tmp_q.data(), tmp_out.data(), test_size);
|
||||
qfns.from_float(test_data, tmp_q.data(), test_size);
|
||||
qfns.to_float(tmp_q.data(), tmp_out.data(), test_size);
|
||||
|
||||
qfns.quantize_row_q_reference(test_data, tmp_q.data(), test_size);
|
||||
qfns.dequantize_row_q(tmp_q.data(), tmp_out_ref.data(), test_size);
|
||||
qfns.from_float_reference(test_data, tmp_q.data(), test_size);
|
||||
qfns.to_float(tmp_q.data(), tmp_out_ref.data(), test_size);
|
||||
|
||||
return array_rmse(tmp_out.data(), tmp_out_ref.data(), test_size);
|
||||
}
|
||||
|
@ -73,15 +73,17 @@ float dot_product(const float * a1, const float * a2, size_t test_size) {
|
|||
}
|
||||
|
||||
// Total dot product error
|
||||
float dot_product_error(quantize_fns_t & qfns, size_t test_size, const float * test_data1, const float *test_data2) {
|
||||
float dot_product_error(ggml_type_traits_t & qfns, size_t test_size, const float * test_data1, const float *test_data2) {
|
||||
std::vector<uint8_t> tmp_q1(2*test_size);
|
||||
std::vector<uint8_t> tmp_q2(2*test_size);
|
||||
|
||||
qfns.quantize_row_q (test_data1, tmp_q1.data(), test_size);
|
||||
qfns.quantize_row_q_dot(test_data2, tmp_q2.data(), test_size);
|
||||
auto vdot = ggml_internal_get_type_traits(qfns.vec_dot_type);
|
||||
|
||||
qfns.from_float(test_data1, tmp_q1.data(), test_size);
|
||||
vdot.from_float(test_data2, tmp_q2.data(), test_size);
|
||||
|
||||
float result = INFINITY;
|
||||
qfns.vec_dot_q(test_size, &result, tmp_q1.data(), tmp_q2.data());
|
||||
qfns.vec_dot(test_size, &result, tmp_q1.data(), tmp_q2.data());
|
||||
|
||||
const float dot_ref = dot_product(test_data1, test_data2, test_size);
|
||||
|
||||
|
@ -123,9 +125,9 @@ int main(int argc, char * argv[]) {
|
|||
|
||||
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
|
||||
ggml_type type = (ggml_type) i;
|
||||
quantize_fns_t qfns = ggml_internal_get_quantize_fn(i);
|
||||
ggml_type_traits_t qfns = ggml_internal_get_type_traits(type);
|
||||
|
||||
if (qfns.quantize_row_q && qfns.dequantize_row_q) {
|
||||
if (qfns.from_float && qfns.to_float) {
|
||||
const float total_error = total_quantization_error(qfns, test_size, test_data.data());
|
||||
const float max_quantization_error =
|
||||
type == GGML_TYPE_Q2_K ? MAX_QUANTIZATION_TOTAL_ERROR_2BITS :
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue