Make more ML improvements

- Fix UX issues with llama.com - Do housekeeping on libm code - Add more vectorization to GGML - Get GGJT quantizer programs working well - Have the quantizer keep the output layer as f16c - Prefetching improves performance 15% if you use fewer threads
2025-07-04 02:08:30 +00:00 · 2023-05-16 08:07:23 -07:00 · 2023-05-16 08:07:23 -07:00 · e7eb0b3070
commit e7eb0b3070
parent 80db9de173
46 changed files with 340 additions and 289 deletions
--- a/libc/tinymath/acos.c
+++ b/libc/tinymath/acos.c
@ -141,3 +141,7 @@ double acos(double x)
 	w = R(z)*s+c;
 	return 2*(df+w);
 }
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+__strong_reference(acos, acosl);
+#endif
--- a/libc/tinymath/acosf.c
+++ b/libc/tinymath/acosf.c
@ -32,7 +32,7 @@ asm(".ident\t\"\\n\\n\
 Musl libc (MIT License)\\n\
 Copyright 2005-2014 Rich Felker, et. al.\"");
 asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
+// clang-format off

 /* origin: FreeBSD /usr/src/lib/msun/src/e_acosf.c */
 /*
--- a/libc/tinymath/acosh.c
+++ b/libc/tinymath/acosh.c
@ -31,7 +31,7 @@ asm(".ident\t\"\\n\\n\
 Musl libc (MIT License)\\n\
 Copyright 2005-2014 Rich Felker, et. al.\"");
 asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
+// clang-format off

 /**
 * Returns inverse hyperbolic cosine of 𝑥.
@ -53,3 +53,7 @@ double acosh(double x)
 	/* |x| >= 0x1p26 or nan */
 	return log(x) + 0.693147180559945309417232121458176568;
 }
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+__strong_reference(acosh, acoshl);
+#endif
--- a/libc/tinymath/acoshl.c
+++ b/libc/tinymath/acoshl.c
@ -38,6 +38,7 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/freebsd.internal.h"
+#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)

 asm(".ident\t\"\\n\\n\
 FreeBSD libm (BSD-2 License)\\n\
@ -62,8 +63,6 @@ asm(".include \"libc/disclaimer.inc\"");
 #error "Unsupported long double format"
 #endif

-#define	BIAS	(LDBL_MAX_EXP - 1)
-
 static const double
 one	= 1.0;

@ -108,3 +107,5 @@ acoshl(long double x)
 	    RETURNI(log1pl(t+sqrtl(2.0*t+t*t)));
 	}
 }
+
+#endif /* long double is long */
--- a/libc/tinymath/acosl.c
+++ b/libc/tinymath/acosl.c
@ -28,6 +28,7 @@
 #include "libc/math.h"
 #include "libc/tinymath/invtrigl.internal.h"
 #include "libc/tinymath/ldshape.internal.h"
+#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)

 asm(".ident\t\"\\n\\n\
 fdlibm (fdlibm license)\\n\
@ -54,22 +55,20 @@ asm(".include \"libc/disclaimer.inc\"");
 * Converted to long double by David Schultz <das@FreeBSD.ORG>.
 */

-/**
- * Returns arc cosine of 𝑥.
- *
- * @define atan2(fabs(sqrt((1-𝑥)*(1+𝑥))),𝑥)
- * @domain -1 ≤ 𝑥 ≤ 1
- */
-long double acosl(long double x) {
-#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
-	return acos(x);
-#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
 #if LDBL_MANT_DIG == 64
 #define CLEARBOTTOM(u) (u.i.m &= -1ULL << 32)
 #elif LDBL_MANT_DIG == 113
 #define CLEARBOTTOM(u) (u.i.lo = 0)
 #endif

+/**
+ * Returns arc cosine of 𝑥.
+ *
+ * @define atan2(fabs(sqrt((1-𝑥)*(1+𝑥))),𝑥)
+ * @domain -1 ≤ 𝑥 ≤ 1
+ */
+long double acosl(long double x)
+{
 	union ldshape u = {x};
 	long double z, s, c, f;
 	uint16_t e = u.i.se & 0x7fff;
@ -102,8 +101,6 @@ long double acosl(long double x) {
 	f = u.f;
 	c = (z - f*f)/(s + f);
 	return 2*(__invtrigl_R(z)*s + c + f);
-
-#else
-#error "architecture unsupported"
-#endif
 }
+
+#endif /* long double is long */
--- a/libc/tinymath/asinf.c
+++ b/libc/tinymath/asinf.c
@ -35,7 +35,7 @@ asm(".ident\t\"\\n\\n\
 Musl libc (MIT License)\\n\
 Copyright 2005-2014 Rich Felker, et. al.\"");
 asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
+// clang-format off

 /* origin: FreeBSD /usr/src/lib/msun/src/e_asinf.c */
 /*
--- a/libc/tinymath/asinh.c
+++ b/libc/tinymath/asinh.c
@ -64,3 +64,7 @@ double asinh(double x)
 	}
 	return s ? -x : x;
 }
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+__strong_reference(asinh, asinhl);
+#endif
--- a/libc/tinymath/asinhl.c
+++ b/libc/tinymath/asinhl.c
@ -38,6 +38,7 @@
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
 #include "libc/tinymath/freebsd.internal.h"
+#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)

 asm(".ident\t\"\\n\\n\
 FreeBSD libm (BSD-2 License)\\n\
@ -65,8 +66,6 @@ asm(".include \"libc/disclaimer.inc\"");
 #error "Unsupported long double format"
 #endif

-#define	BIAS	(LDBL_MAX_EXP - 1)
-
 static const double
 one =  1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
 huge=  1.00000000000000000000e+300;
@ -110,3 +109,5 @@ asinhl(long double x)
 	}
 	RETURNI((hx & 0x8000) == 0 ? w : -w);
 }
+
+#endif /* long double is long */
--- a/libc/tinymath/cacos.c
+++ b/libc/tinymath/cacos.c
@ -33,9 +33,7 @@ asm(".ident\t\"\\n\\n\
 Musl libc (MIT License)\\n\
 Copyright 2005-2014 Rich Felker, et. al.\"");
 asm(".include \"libc/disclaimer.inc\"");
-/* clang-format off */
-
-
+// clang-format off

 // FIXME: Hull et al. "Implementing the complex arcsine and arccosine functions using exception handling" 1997

--- a/libc/tinymath/catan.c
+++ b/libc/tinymath/catan.c
@ -145,3 +145,7 @@ double complex catan(double complex z)
 	w = CMPLX(w, 0.25 * log(a));
 	return w;
 }
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+__strong_reference(catan, catanl);
+#endif
--- a/libc/tinymath/catanl.c
+++ b/libc/tinymath/catanl.c
@ -2,32 +2,26 @@
 │vi: set et ft=c ts=8 tw=8 fenc=utf-8                                       :vi│
 ╚──────────────────────────────────────────────────────────────────────────────╝
 │                                                                              │
-│  Musl Libc                                                                   │
-│  Copyright © 2005-2014 Rich Felker, et al.                                   │
+│ OpenBSD /usr/src/lib/libm/src/s_catanl.c                                     │
 │                                                                              │
-│  Permission is hereby granted, free of charge, to any person obtaining       │
-│  a copy of this software and associated documentation files (the             │
-│  "Software"), to deal in the Software without restriction, including         │
-│  without limitation the rights to use, copy, modify, merge, publish,         │
-│  distribute, sublicense, and/or sell copies of the Software, and to          │
-│  permit persons to whom the Software is furnished to do so, subject to       │
-│  the following conditions:                                                   │
+│ Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>                    │
 │                                                                              │
-│  The above copyright notice and this permission notice shall be              │
-│  included in all copies or substantial portions of the Software.             │
+│ Permission to use, copy, modify, and distribute this software for any        │
+│ purpose with or without fee is hereby granted, provided that the above       │
+│ copyright notice and this permission notice appear in all copies.            │
 │                                                                              │
-│  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,             │
-│  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF          │
-│  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.      │
-│  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY        │
-│  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,        │
-│  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE           │
-│  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                      │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES     │
+│ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF             │
+│ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR      │
+│ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES       │
+│ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN        │
+│ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF      │
 │                                                                              │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/complex.h"
 #include "libc/math.h"
 #include "libc/tinymath/complex.internal.h"
+#if !(LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024)

 asm(".ident\t\"\\n\\n\
 OpenBSD libm (ISC License)\\n\
@ -38,22 +32,6 @@ Copyright 2005-2014 Rich Felker, et. al.\"");
 asm(".include \"libc/disclaimer.inc\"");
 // clang-format off

-/* origin: OpenBSD /usr/src/lib/libm/src/s_catanl.c */
-/*
- * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
 /*
 *      Complex circular arc tangent
 *
@ -97,13 +75,6 @@ asm(".include \"libc/disclaimer.inc\"");
 * 2.9e-17.  See also clog().
 */

-
-#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
-long double complex catanl(long double complex z)
-{
-	return catan(z);
-}
-#else
 static const long double PIL = 3.141592653589793238462643383279502884197169L;
 static const long double DP1 = 3.14159265358979323829596852490908531763125L;
 static const long double DP2 = 1.6667485837041756656403424829301998703007e-19L;
@ -149,4 +120,4 @@ long double complex catanl(long double complex z)
 	return w;
 }

-#endif
+#endif /* long double is long */
--- a/libc/tinymath/fmin.c
+++ b/libc/tinymath/fmin.c
@ -26,10 +26,10 @@
 * signed zeroes.
 */
 double fmin(double x, double y) {
-  if (__builtin_isnan(x)) return y;
-  if (__builtin_isnan(y)) return x;
-  if (__builtin_signbit(x) != __builtin_signbit(y)) {
-    return __builtin_signbit(x) ? x : y; /* C99 Annex F.9.9.2 */
+  if (isnan(x)) return y;
+  if (isnan(y)) return x;
+  if (signbit(x) != signbit(y)) {
+    return signbit(x) ? x : y; /* C99 Annex F.9.9.2 */
  }
  return x < y ? x : y;
 }
--- a/libc/tinymath/fminf.c
+++ b/libc/tinymath/fminf.c
@ -17,6 +17,7 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/math.h"
+#include "libc/tinymath/freebsd.internal.h"

 /**
 * Returns minimum of two floats.
@ -26,10 +27,10 @@
 * signed zeroes.
 */
 float fminf(float x, float y) {
-  if (__builtin_isnan(x)) return y;
-  if (__builtin_isnan(y)) return x;
-  if (__builtin_signbitf(x) != __builtin_signbitf(y)) {
-    return __builtin_signbitf(x) ? x : y; /* C99 Annex F.9.9.2 */
+  if (isnan(x)) return y;
+  if (isnan(y)) return x;
+  if (signbit(x) != signbit(y)) {
+    return signbit(x) ? x : y; /* C99 Annex F.9.9.2 */
  }
  return x < y ? x : y;
 }
--- a/libc/tinymath/fminl.c
+++ b/libc/tinymath/fminl.c
@ -27,10 +27,10 @@
 * signed zeroes.
 */
 long double fminl(long double x, long double y) {
-  if (__builtin_isnan(x)) return y;
-  if (__builtin_isnan(y)) return x;
-  if (__builtin_signbitl(x) != __builtin_signbitl(y)) {
-    return __builtin_signbitl(x) ? x : y; /* C99 Annex F.9.9.2 */
+  if (isnan(x)) return y;
+  if (isnan(y)) return x;
+  if (signbit(x) != signbit(y)) {
+    return signbit(x) ? x : y; /* C99 Annex F.9.9.2 */
  }
  return x < y ? x : y;
 }
--- a/libc/tinymath/fsumf.c
+++ b/libc/tinymath/fsumf.c
@ -22,8 +22,8 @@
 /**
 * Adds floats in array.
 */
-float fsumf(const float *p, size_t n) {
-  float s;
+double fsumf(const float *p, size_t n) {
+  double s;
  size_t i;
  if (n > 8) return fsumf(p, n / 2) + fsumf(p + n / 2, n - n / 2);
  for (s = i = 0; i < n; ++i) s += p[i];