quantize-stats : fix test + add it to Makefile default
This commit is contained in:
parent
3b894ec657
commit
19e7a6575d
4 changed files with 11 additions and 5 deletions
2
Makefile
2
Makefile
|
@ -133,7 +133,7 @@ $(info I CC: $(CCV))
|
||||||
$(info I CXX: $(CXXV))
|
$(info I CXX: $(CXXV))
|
||||||
$(info )
|
$(info )
|
||||||
|
|
||||||
default: main quantize perplexity embedding
|
default: main quantize quantize-stats perplexity embedding
|
||||||
|
|
||||||
#
|
#
|
||||||
# Build library
|
# Build library
|
||||||
|
|
|
@ -16,6 +16,9 @@
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
static const char * type_strs[] = { "f32", "f16", "q4_0", "q4_1", "q8_0", "i8", "i16", "i32", };
|
||||||
|
static_assert(sizeof(type_strs) == GGML_TYPE_COUNT * sizeof(char *), "Incomplete type list");
|
||||||
|
|
||||||
struct quantize_stats_params {
|
struct quantize_stats_params {
|
||||||
std::string model = "models/7B/ggml-model-f16.bin";
|
std::string model = "models/7B/ggml-model-f16.bin";
|
||||||
bool verbose = false;
|
bool verbose = false;
|
||||||
|
|
10
ggml.c
10
ggml.c
|
@ -7151,14 +7151,16 @@ static void ggml_compute_forward_mul_mat_f16_f32(
|
||||||
static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
|
static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
|
||||||
[GGML_TYPE_Q4_0] = {
|
[GGML_TYPE_Q4_0] = {
|
||||||
.dequantize_row_q = dequantize_row_q4_0,
|
.dequantize_row_q = dequantize_row_q4_0,
|
||||||
.quantize_row_q = quantize_row_q8_0,
|
.quantize_row_q = quantize_row_q4_0,
|
||||||
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference,
|
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference,
|
||||||
|
.quantize_row_q_dot = quantize_row_q8_0,
|
||||||
.vec_dot_q = ggml_vec_dot_q4_0_q8_0,
|
.vec_dot_q = ggml_vec_dot_q4_0_q8_0,
|
||||||
},
|
},
|
||||||
[GGML_TYPE_Q4_1] = {
|
[GGML_TYPE_Q4_1] = {
|
||||||
.dequantize_row_q = dequantize_row_q4_1,
|
.dequantize_row_q = dequantize_row_q4_1,
|
||||||
.quantize_row_q = quantize_row_q4_1,
|
.quantize_row_q = quantize_row_q4_1,
|
||||||
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference,
|
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference,
|
||||||
|
.quantize_row_q_dot = quantize_row_q8_0,
|
||||||
.vec_dot_q = ggml_vec_dot_q4_1,
|
.vec_dot_q = ggml_vec_dot_q4_1,
|
||||||
},
|
},
|
||||||
// TODO: GGML_TYPE_Q8_0
|
// TODO: GGML_TYPE_Q8_0
|
||||||
|
@ -7217,8 +7219,8 @@ static void ggml_compute_forward_mul_mat_q_f32(
|
||||||
GGML_ASSERT(ne3 == ne13);
|
GGML_ASSERT(ne3 == ne13);
|
||||||
|
|
||||||
const enum ggml_type type = src0->type;
|
const enum ggml_type type = src0->type;
|
||||||
quantize_row_q_t const quantize_row_q = quantize_fns[type].quantize_row_q;
|
quantize_row_q_t const quantize_row_q_dot = quantize_fns[type].quantize_row_q_dot;
|
||||||
vec_dot_q_t const vec_dot_q = quantize_fns[type].vec_dot_q;
|
vec_dot_q_t const vec_dot_q = quantize_fns[type].vec_dot_q;
|
||||||
|
|
||||||
// we don't support permuted src0 or src1
|
// we don't support permuted src0 or src1
|
||||||
GGML_ASSERT(nb00 == (int) GGML_TYPE_SIZE[type]);
|
GGML_ASSERT(nb00 == (int) GGML_TYPE_SIZE[type]);
|
||||||
|
@ -7292,7 +7294,7 @@ static void ggml_compute_forward_mul_mat_q_f32(
|
||||||
for (int64_t i13 = 0; i13 < ne13; ++i13) {
|
for (int64_t i13 = 0; i13 < ne13; ++i13) {
|
||||||
for (int64_t i12 = 0; i12 < ne12; ++i12) {
|
for (int64_t i12 = 0; i12 < ne12; ++i12) {
|
||||||
for (int64_t i11 = 0; i11 < ne11; ++i11) {
|
for (int64_t i11 = 0; i11 < ne11; ++i11) {
|
||||||
quantize_row_q((float *)((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11), (void *) wdata, ne10);
|
quantize_row_q_dot((float *)((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11), (void *) wdata, ne10);
|
||||||
wdata += row_size;
|
wdata += row_size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
1
ggml.h
1
ggml.h
|
@ -837,6 +837,7 @@ typedef struct {
|
||||||
dequantize_row_q_t dequantize_row_q;
|
dequantize_row_q_t dequantize_row_q;
|
||||||
quantize_row_q_t quantize_row_q;
|
quantize_row_q_t quantize_row_q;
|
||||||
quantize_row_q_t quantize_row_q_reference;
|
quantize_row_q_t quantize_row_q_reference;
|
||||||
|
quantize_row_q_t quantize_row_q_dot;
|
||||||
vec_dot_q_t vec_dot_q;
|
vec_dot_q_t vec_dot_q;
|
||||||
} quantize_fns_t;
|
} quantize_fns_t;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue