test-quantize: fix for q8_0 intermediates

This commit is contained in:
Håkon H. Hitland 2023-04-16 00:37:16 +02:00
parent 6071228818
commit e95a8336d5
2 changed files with 21 additions and 5 deletions

View file

@ -12,7 +12,6 @@
const float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001; const float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001;
const float MAX_QUANTIZATION_TOTAL_ERROR = 0.002; const float MAX_QUANTIZATION_TOTAL_ERROR = 0.002;
// TODO: check why q4_1 is high
const float MAX_DOT_PRODUCT_ERROR = 0.02; const float MAX_DOT_PRODUCT_ERROR = 0.02;
const char* RESULT_STR[] = {"ok", "FAILED"}; const char* RESULT_STR[] = {"ok", "FAILED"};
@ -71,10 +70,10 @@ float dot_product(const float * a1, const float * a2, size_t test_size) {
// Total dot product error // Total dot product error
float dot_product_error(quantize_fns_t & qfns, size_t test_size, const float * test_data1, const float *test_data2) { float dot_product_error(quantize_fns_t & qfns, size_t test_size, const float * test_data1, const float *test_data2) {
std::vector<uint8_t> tmp_q1(test_size); std::vector<uint8_t> tmp_q1(test_size);
std::vector<uint8_t> tmp_q2(test_size); std::vector<uint8_t> tmp_q2(test_size*2);
qfns.quantize_row_q(test_data1, tmp_q1.data(), test_size); qfns.quantize_row_q(test_data1, tmp_q1.data(), test_size);
qfns.quantize_row_q(test_data2, tmp_q2.data(), test_size); qfns.quantize_row_q_dot(test_data2, tmp_q2.data(), test_size);
float result = INFINITY; float result = INFINITY;
qfns.vec_dot_q(test_size, &result, tmp_q1.data(), tmp_q2.data()); qfns.vec_dot_q(test_size, &result, tmp_q1.data(), tmp_q2.data());

View file

@ -30,6 +30,7 @@ struct quantize_perf_params {
bool op_quantize_row_q_reference = false; bool op_quantize_row_q_reference = false;
bool op_quantize_row_q = false; bool op_quantize_row_q = false;
bool op_dequantize_row_q = false; bool op_dequantize_row_q = false;
bool op_quantize_row_q_dot = false;
bool op_vec_dot_q = false; bool op_vec_dot_q = false;
}; };
@ -147,6 +148,8 @@ int main(int argc, char * argv[]) {
params.op_quantize_row_q = true; params.op_quantize_row_q = true;
} else if (op == "dequantize_row_q") { } else if (op == "dequantize_row_q") {
params.op_dequantize_row_q = true; params.op_dequantize_row_q = true;
} else if (op == "quantize_row_q_dot") {
params.op_quantize_row_q_dot = true;
} else if (op == "vec_dot_q") { } else if (op == "vec_dot_q") {
params.op_vec_dot_q = true; params.op_vec_dot_q = true;
} else { } else {
@ -184,8 +187,8 @@ int main(int argc, char * argv[]) {
if (params.test_sizes.empty()) { if (params.test_sizes.empty()) {
params.test_sizes.push_back(L1_SIZE); params.test_sizes.push_back(L1_SIZE);
} }
if (!(params.op_quantize_row_q_reference || params.op_quantize_row_q || params.op_dequantize_row_q || params.op_vec_dot_q)) { if (!(params.op_quantize_row_q_reference || params.op_quantize_row_q || params.op_dequantize_row_q || params.op_quantize_row_q_dot || params.op_vec_dot_q)) {
params.op_quantize_row_q_reference = params.op_quantize_row_q = params.op_dequantize_row_q = params.op_vec_dot_q = true; params.op_quantize_row_q_reference = params.op_quantize_row_q = params.op_dequantize_row_q = params.op_quantize_row_q_dot = params.op_vec_dot_q = true;
} }
std::sort(params.test_sizes.begin(), params.test_sizes.end()); std::sort(params.test_sizes.begin(), params.test_sizes.end());
@ -268,6 +271,20 @@ int main(int argc, char * argv[]) {
printf("\n"); printf("\n");
} }
if (params.op_quantize_row_q_dot) {
printf(" quantize_row_q_dot\n");
for (size_t size : params.test_sizes) {
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
auto quantize_fn = [&](void ) {
qfns.quantize_row_q_dot(test_data1, test_q1, size);
return test_q1[0];
};
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
benchmark_function(size, quantized_size, quantize_fn);
}
printf("\n");
}
if (params.op_vec_dot_q) { if (params.op_vec_dot_q) {
printf(" vec_dot_q\n"); printf(" vec_dot_q\n");
qfns.quantize_row_q(test_data1, test_q1, largest); qfns.quantize_row_q(test_data1, test_q1, largest);