iq3_xxs: failing tests

This time the dot product accuracy did find an actual bug
in the AVX2 implementation.
This commit is contained in:
Iwan Kawrakow 2024-01-29 18:21:04 +02:00
parent 62623434af
commit fe2160eec9
3 changed files with 8 additions and 3 deletions

View file

@ -8686,10 +8686,10 @@ void ggml_vec_dot_iq3_xxs_q8_K(const int n, float * restrict s, const void * res
const __m256i q8_1 = _mm256_loadu_si256((const __m256i *)q8); q8 += 32;
const __m256i q8_2 = _mm256_loadu_si256((const __m256i *)q8); q8 += 32;
const __m256i q2_1 = _mm256_set_epi32(iq3xxs_grid[q3[7]], iq3xxs_grid[q3[6]], iq3xxs_grid[q3[5]], iq3xxs_grid[q3[4]],
iq3xxs_grid[q3[3]], iq3xxs_grid[q3[1]], iq3xxs_grid[q3[1]], iq3xxs_grid[q3[0]]);
iq3xxs_grid[q3[3]], iq3xxs_grid[q3[2]], iq3xxs_grid[q3[1]], iq3xxs_grid[q3[0]]);
q3 += 8;
const __m256i q2_2 = _mm256_set_epi32(iq3xxs_grid[q3[7]], iq3xxs_grid[q3[6]], iq3xxs_grid[q3[5]], iq3xxs_grid[q3[4]],
iq3xxs_grid[q3[3]], iq3xxs_grid[q3[1]], iq3xxs_grid[q3[1]], iq3xxs_grid[q3[0]]);
iq3xxs_grid[q3[3]], iq3xxs_grid[q3[2]], iq3xxs_grid[q3[1]], iq3xxs_grid[q3[0]]);
q3 += 8;
memcpy(aux32, gas, 8); gas += 8;
const __m256i s2_1 = _mm256_set_epi64x(signs64[(aux32[0] >> 21) & 127], signs64[(aux32[0] >> 14) & 127],

View file

@ -19,6 +19,7 @@ constexpr float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f;
constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f;
constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS_XXS = 0.0050f;
constexpr float MAX_DOT_PRODUCT_ERROR = 0.02f;
constexpr float MAX_DOT_PRODUCT_ERROR_LOWBIT = 0.04f;
static const char* RESULT_STR[] = {"ok", "FAILED"};
@ -165,7 +166,9 @@ int main(int argc, char * argv[]) {
}
const float vec_dot_error = dot_product_error(qfns, test_size, test_data.data(), test_data2.data());
failed = !(vec_dot_error < MAX_DOT_PRODUCT_ERROR);
const float max_allowed_error = type == GGML_TYPE_Q2_K || type == GGML_TYPE_IQ2_XS || type == GGML_TYPE_IQ2_XXS ||
type == GGML_TYPE_IQ3_XXS ? MAX_DOT_PRODUCT_ERROR_LOWBIT : MAX_DOT_PRODUCT_ERROR;
failed = !(vec_dot_error < max_allowed_error);
num_failed += failed;
if (failed || verbose) {
printf("%5s dot product error: %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], vec_dot_error);

View file

@ -278,6 +278,8 @@ int main(int argc, char * argv[]) {
if (qfns.from_float && qfns.to_float) {
printf("%s\n", ggml_type_name(type));
ggml_quantize_init(type);
if (params.op_quantize_row_q_reference) {
printf(" quantize_row_q_reference\n");
for (size_t size : params.test_sizes) {