Add ability to use new quantization in quantize-stats
This commit is contained in:
parent
8b3d1f977b
commit
92408cd983
1 changed files with 8 additions and 1 deletions
|
@ -1,6 +1,7 @@
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
#include "llama_internal.h"
|
#include "llama_internal.h"
|
||||||
|
#include "ggml_extra.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
@ -29,7 +30,7 @@ struct quantize_stats_params {
|
||||||
std::vector<enum ggml_type> include_types;
|
std::vector<enum ggml_type> include_types;
|
||||||
};
|
};
|
||||||
|
|
||||||
const int64_t SCRATCH_ELEMENTS = 32*32;
|
const int64_t SCRATCH_ELEMENTS = 32*32*256; // So we use multi-threading in a meaningful way in the new quantization
|
||||||
const size_t HISTOGRAM_BUCKETS = 150;
|
const size_t HISTOGRAM_BUCKETS = 150;
|
||||||
const double HISTOGRAM_RANGE = 0.03;
|
const double HISTOGRAM_RANGE = 0.03;
|
||||||
|
|
||||||
|
@ -184,6 +185,7 @@ int main(int argc, char ** argv) {
|
||||||
// read command line
|
// read command line
|
||||||
|
|
||||||
bool invalid_param = false;
|
bool invalid_param = false;
|
||||||
|
bool checkNewQuantization = false;
|
||||||
std::string arg;
|
std::string arg;
|
||||||
for (int i = 1; i < argc; i++) {
|
for (int i = 1; i < argc; i++) {
|
||||||
arg = argv[i];
|
arg = argv[i];
|
||||||
|
@ -232,6 +234,8 @@ int main(int argc, char ** argv) {
|
||||||
fprintf(stderr, "error: %s not in list of types\n", argv[i]);
|
fprintf(stderr, "error: %s not in list of types\n", argv[i]);
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
}
|
}
|
||||||
|
} else if (arg == "-nq" || arg == "--new-quantization") {
|
||||||
|
checkNewQuantization = true;
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||||
quantize_stats_print_usage(argc, argv);
|
quantize_stats_print_usage(argc, argv);
|
||||||
|
@ -307,6 +311,9 @@ int main(int argc, char ** argv) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
quantize_fns_t qfns = ggml_internal_get_quantize_fn(i);
|
quantize_fns_t qfns = ggml_internal_get_quantize_fn(i);
|
||||||
|
if (i < 2 && checkNewQuantization) {
|
||||||
|
qfns.quantize_row_q = i == 0 ? kQuantizeQ4_0 : kQuantizeQ4_1;
|
||||||
|
}
|
||||||
if (qfns.quantize_row_q && qfns.dequantize_row_q) {
|
if (qfns.quantize_row_q && qfns.dequantize_row_q) {
|
||||||
if (params.verbose) {
|
if (params.verbose) {
|
||||||
printf("testing %s ...\n", type_strs[i]);
|
printf("testing %s ...\n", type_strs[i]);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue