Use Kahan summation for fsum and fsumf

This compensated summation gives a more precise result than the original
algorithm, as demonstrated through extra precision in the tests. It also
drops recursion and the extra branching.

I used double precision internally for fsumf since I assume its purpose
is to interface with single precision, not necessary constrain itself to
only using single precision. If needed, it can be trivially changed to
use single precision internally while still passing the tests.

In fsumf, naive summation using double precision is likely sufficient,
and more precise than single precision Kaham summation. In other words,
Kahan summation with double precision in fsumf may be overkill.

In the fsumf test, neither 10000.1 nor 1.1 can be represented exactly.
The former is rounded down and the latter is rounded up (by less) to the
nearest representable value. As a result, the most precise sum is just
shy of the "obvious" result.
This commit is contained in:
Christopher Wellons 2023-02-14 14:02:12 -05:00
parent 2b6261a52d
commit 35924d415e
3 changed files with 24 additions and 12 deletions

View file

@ -24,8 +24,14 @@
*/ */
double fsum(const double *p, size_t n) { double fsum(const double *p, size_t n) {
size_t i; size_t i;
double s; double err, sum, t, y;
if (n > 8) return fsum(p, n / 2) + fsum(p + n / 2, n - n / 2);
for (s = i = 0; i < n; ++i) s += p[i]; sum = err = 0;
return s; for (i = 0; i < n; ++i) {
y = p[i] - err;
t = sum + y;
err = (t - sum) - y;
sum = t;
}
return sum;
} }

View file

@ -23,9 +23,15 @@
* Adds floats in array. * Adds floats in array.
*/ */
float fsumf(const float *p, size_t n) { float fsumf(const float *p, size_t n) {
float s;
size_t i; size_t i;
if (n > 8) return fsumf(p, n / 2) + fsumf(p + n / 2, n - n / 2); double err, sum, t, y;
for (s = i = 0; i < n; ++i) s += p[i];
return s; sum = err = 0;
for (i = 0; i < n; ++i) {
y = p[i] - err;
t = sum + y;
err = (t - sum) - y;
sum = t;
}
return sum;
} }

View file

@ -31,21 +31,21 @@ double D[N];
void SetUp(void) { void SetUp(void) {
int i; int i;
for (i = 0; i < N / 2; ++i) { for (i = 0; i < N / 2; ++i) {
D[i * 2 + 0] = 1000000000.1; D[i * 2 + 0] = 10000000000.1;
D[i * 2 + 1] = 1.1; D[i * 2 + 1] = 1.1;
} }
for (i = 0; i < N / 2; ++i) { for (i = 0; i < N / 2; ++i) {
F[i * 2 + 0] = 1000.1; F[i * 2 + 0] = 10000.1;
F[i * 2 + 1] = 1.1; F[i * 2 + 1] = 1.1;
} }
} }
TEST(fsum, test) { TEST(fsum, test) {
EXPECT_STREQ("500000000.6", _gc(xasprintf("%.15g", fsum(D, N) / N))); EXPECT_STREQ("5000000000.6", _gc(xasprintf("%.16g", fsum(D, N) / N)));
} }
TEST(fsumf, test) { TEST(fsumf, test) {
EXPECT_STREQ("500.6", _gc(xasprintf("%.7g", fsumf(F, N) / N))); EXPECT_STREQ("5000.5996", _gc(xasprintf("%.8g", fsumf(F, N) / N)));
} }
BENCH(fsum, bench) { BENCH(fsum, bench) {