ggml : add TQ1_0 and TQ2_0 ternary quantization types

This commit is contained in:
Francis Couture-Harpin 2024-07-30 17:55:54 -04:00
parent 79a278e922
commit 77b8f84ae7
10 changed files with 563 additions and 16 deletions

View file

@ -15,13 +15,13 @@
constexpr float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001f;
constexpr float MAX_QUANTIZATION_TOTAL_ERROR = 0.002f;
constexpr float MAX_QUANTIZATION_TOTAL_ERROR_BITNET = 0.015625f;
constexpr float MAX_QUANTIZATION_TOTAL_ERROR_TERNARY = 0.015625f; // TODO: change to 0.01f
constexpr float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f;
constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f;
constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS_XXS = 0.0050f;
constexpr float MAX_DOT_PRODUCT_ERROR = 0.02f;
constexpr float MAX_DOT_PRODUCT_ERROR_LOWBIT = 0.04f;
constexpr float MAX_DOT_PRODUCT_ERROR_BITNET = 0.5f;
constexpr float MAX_DOT_PRODUCT_ERROR_TERNARY = 0.5f; // TODO: change to 0.15f
static const char* RESULT_STR[] = {"ok", "FAILED"};
@ -146,8 +146,10 @@ int main(int argc, char * argv[]) {
if (qfns.from_float && qfns.to_float) {
const float total_error = total_quantization_error(qfns, test_size, test_data.data());
const float max_quantization_error =
type == GGML_TYPE_Q1_3 ? MAX_QUANTIZATION_TOTAL_ERROR_BITNET :
type == GGML_TYPE_Q2_2 ? MAX_QUANTIZATION_TOTAL_ERROR_BITNET :
type == GGML_TYPE_Q1_3 ? MAX_QUANTIZATION_TOTAL_ERROR_TERNARY :
type == GGML_TYPE_Q2_2 ? MAX_QUANTIZATION_TOTAL_ERROR_TERNARY :
type == GGML_TYPE_TQ1_0 ? MAX_QUANTIZATION_TOTAL_ERROR_TERNARY :
type == GGML_TYPE_TQ2_0 ? MAX_QUANTIZATION_TOTAL_ERROR_TERNARY :
type == GGML_TYPE_Q2_K ? MAX_QUANTIZATION_TOTAL_ERROR_2BITS :
type == GGML_TYPE_IQ2_S ? MAX_QUANTIZATION_TOTAL_ERROR_2BITS :
type == GGML_TYPE_Q3_K ? MAX_QUANTIZATION_TOTAL_ERROR_3BITS :
@ -170,8 +172,8 @@ int main(int argc, char * argv[]) {
const float max_allowed_error = type == GGML_TYPE_Q2_K || type == GGML_TYPE_IQ2_XS || type == GGML_TYPE_IQ2_XXS ||
type == GGML_TYPE_IQ3_XXS || type == GGML_TYPE_IQ3_S || type == GGML_TYPE_IQ2_S
? MAX_DOT_PRODUCT_ERROR_LOWBIT
: type == GGML_TYPE_Q2_2 || type == GGML_TYPE_Q1_3
? MAX_DOT_PRODUCT_ERROR_BITNET
: type == GGML_TYPE_Q2_2 || type == GGML_TYPE_Q1_3 || type == GGML_TYPE_TQ1_0 || type == GGML_TYPE_TQ2_0
? MAX_DOT_PRODUCT_ERROR_TERNARY
: MAX_DOT_PRODUCT_ERROR;
failed = !(vec_dot_error < max_allowed_error);
num_failed += failed;