Fix quantization error test
We cannot possibly be expecting rmse < 0.002 for 2- and 3-bit quantization variants.
This commit is contained in:
parent
8f5d42db9b
commit
6ef13823b8
1 changed files with 6 additions and 1 deletions
|
@ -12,6 +12,8 @@
|
||||||
|
|
||||||
const float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001;
|
const float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001;
|
||||||
const float MAX_QUANTIZATION_TOTAL_ERROR = 0.002;
|
const float MAX_QUANTIZATION_TOTAL_ERROR = 0.002;
|
||||||
|
const float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075;
|
||||||
|
const float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040;
|
||||||
const float MAX_DOT_PRODUCT_ERROR = 0.02;
|
const float MAX_DOT_PRODUCT_ERROR = 0.02;
|
||||||
|
|
||||||
const char* RESULT_STR[] = {"ok", "FAILED"};
|
const char* RESULT_STR[] = {"ok", "FAILED"};
|
||||||
|
@ -122,7 +124,10 @@ int main(int argc, char * argv[]) {
|
||||||
|
|
||||||
if (qfns.quantize_row_q && qfns.dequantize_row_q) {
|
if (qfns.quantize_row_q && qfns.dequantize_row_q) {
|
||||||
const float total_error = total_quantization_error(qfns, test_size, test_data.data());
|
const float total_error = total_quantization_error(qfns, test_size, test_data.data());
|
||||||
failed = !(total_error < MAX_QUANTIZATION_TOTAL_ERROR);
|
const float max_quantization_error =
|
||||||
|
type == GGML_TYPE_Q2_K ? MAX_QUANTIZATION_TOTAL_ERROR_2BITS :
|
||||||
|
type == GGML_TYPE_Q3_K ? MAX_QUANTIZATION_TOTAL_ERROR_3BITS : MAX_QUANTIZATION_TOTAL_ERROR;
|
||||||
|
failed = !(total_error < max_quantization_error);
|
||||||
num_failed += failed;
|
num_failed += failed;
|
||||||
if (failed || verbose) {
|
if (failed || verbose) {
|
||||||
printf("%5s absolute quantization error: %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], total_error);
|
printf("%5s absolute quantization error: %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], total_error);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue