quantize-stats: add option to test against reference quantization
Expose reference quantization implementation and add option to use it for tests.
This commit is contained in:
parent
d4915074c4
commit
63cfa43200
3 changed files with 21 additions and 7 deletions
|
@ -24,6 +24,7 @@ struct quantize_stats_params {
|
||||||
bool verbose = false;
|
bool verbose = false;
|
||||||
bool per_layer_stats = false;
|
bool per_layer_stats = false;
|
||||||
bool print_histogram = false;
|
bool print_histogram = false;
|
||||||
|
bool reference = false;
|
||||||
std::vector<std::string> include_layers;
|
std::vector<std::string> include_layers;
|
||||||
std::vector<std::string> exclude_layers;
|
std::vector<std::string> exclude_layers;
|
||||||
std::vector<enum ggml_type> include_types;
|
std::vector<enum ggml_type> include_types;
|
||||||
|
@ -49,6 +50,8 @@ void quantize_stats_print_usage(int /*argc*/, char ** argv) {
|
||||||
fprintf(stderr, " -h, --help show this help message and exit\n");
|
fprintf(stderr, " -h, --help show this help message and exit\n");
|
||||||
fprintf(stderr, " -m FNAME, --model FNAME\n");
|
fprintf(stderr, " -m FNAME, --model FNAME\n");
|
||||||
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
|
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
|
||||||
|
fprintf(stderr, " -r, --reference\n");
|
||||||
|
fprintf(stderr, " use reference implementation (default: false)\n");
|
||||||
fprintf(stderr, " -v, --verbose\n");
|
fprintf(stderr, " -v, --verbose\n");
|
||||||
fprintf(stderr, " verbose output (default: false)\n");
|
fprintf(stderr, " verbose output (default: false)\n");
|
||||||
fprintf(stderr, " -p, --per-layer-stats\n");
|
fprintf(stderr, " -p, --per-layer-stats\n");
|
||||||
|
@ -135,6 +138,7 @@ void test_roundtrip_on_layer(
|
||||||
std::string & name,
|
std::string & name,
|
||||||
bool print_layer_stats,
|
bool print_layer_stats,
|
||||||
const quantize_fns_t & qfns,
|
const quantize_fns_t & qfns,
|
||||||
|
bool use_reference,
|
||||||
const ggml_tensor * layer,
|
const ggml_tensor * layer,
|
||||||
float * input_scratch,
|
float * input_scratch,
|
||||||
char *quantized_scratch,
|
char *quantized_scratch,
|
||||||
|
@ -156,7 +160,11 @@ void test_roundtrip_on_layer(
|
||||||
input_scratch = ggml_get_data_f32(layer) + offset;
|
input_scratch = ggml_get_data_f32(layer) + offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
qfns.quantize_row_q(input_scratch, quantized_scratch, chunk_size);
|
if (use_reference) {
|
||||||
|
qfns.quantize_row_q_reference(input_scratch, quantized_scratch, chunk_size);
|
||||||
|
} else {
|
||||||
|
qfns.quantize_row_q(input_scratch, quantized_scratch, chunk_size);
|
||||||
|
}
|
||||||
qfns.dequantize_row_q(quantized_scratch, output_scratch, chunk_size);
|
qfns.dequantize_row_q(quantized_scratch, output_scratch, chunk_size);
|
||||||
|
|
||||||
update_error_stats(chunk_size, input_scratch, output_scratch, total_error);
|
update_error_stats(chunk_size, input_scratch, output_scratch, total_error);
|
||||||
|
@ -184,6 +192,8 @@ int main(int argc, char ** argv) {
|
||||||
if (arg == "-h" || arg == "--help") {
|
if (arg == "-h" || arg == "--help") {
|
||||||
quantize_stats_print_usage(argc, argv);
|
quantize_stats_print_usage(argc, argv);
|
||||||
exit(0);
|
exit(0);
|
||||||
|
} else if (arg == "-r" || arg == "--reference") {
|
||||||
|
params.reference = true;
|
||||||
} else if (arg == "-v") {
|
} else if (arg == "-v") {
|
||||||
params.verbose = true;
|
params.verbose = true;
|
||||||
} else if (arg == "-p" || arg == "--per-layer-stats") {
|
} else if (arg == "-p" || arg == "--per-layer-stats") {
|
||||||
|
@ -320,6 +330,7 @@ int main(int argc, char ** argv) {
|
||||||
layer_name,
|
layer_name,
|
||||||
params.per_layer_stats,
|
params.per_layer_stats,
|
||||||
qfns,
|
qfns,
|
||||||
|
params.reference,
|
||||||
kv_tensor.second,
|
kv_tensor.second,
|
||||||
input_scratch.data(),
|
input_scratch.data(),
|
||||||
quantized_scratch.data(),
|
quantized_scratch.data(),
|
||||||
|
|
14
ggml.c
14
ggml.c
|
@ -6499,14 +6499,16 @@ static void ggml_compute_forward_mul_mat_f16_f32(
|
||||||
|
|
||||||
static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
|
static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
|
||||||
[GGML_TYPE_Q4_0] = {
|
[GGML_TYPE_Q4_0] = {
|
||||||
.dequantize_row_q = dequantize_row_q4_0,
|
.dequantize_row_q = dequantize_row_q4_0,
|
||||||
.quantize_row_q = quantize_row_q4_0,
|
.quantize_row_q = quantize_row_q4_0,
|
||||||
.vec_dot_q = ggml_vec_dot_q4_0,
|
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference,
|
||||||
|
.vec_dot_q = ggml_vec_dot_q4_0,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_Q4_1] = {
|
[GGML_TYPE_Q4_1] = {
|
||||||
.dequantize_row_q = dequantize_row_q4_1,
|
.dequantize_row_q = dequantize_row_q4_1,
|
||||||
.quantize_row_q = quantize_row_q4_1,
|
.quantize_row_q = quantize_row_q4_1,
|
||||||
.vec_dot_q = ggml_vec_dot_q4_1,
|
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference,
|
||||||
|
.vec_dot_q = ggml_vec_dot_q4_1,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -15,6 +15,7 @@ typedef void (*vec_dot_q_t)(const int n, float * restrict s, const void * restri
|
||||||
typedef struct {
|
typedef struct {
|
||||||
dequantize_row_q_t dequantize_row_q;
|
dequantize_row_q_t dequantize_row_q;
|
||||||
quantize_row_q_t quantize_row_q;
|
quantize_row_q_t quantize_row_q;
|
||||||
|
quantize_row_q_t quantize_row_q_reference;
|
||||||
vec_dot_q_t vec_dot_q;
|
vec_dot_q_t vec_dot_q;
|
||||||
} quantize_fns_t;
|
} quantize_fns_t;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue