Make more ML improvements

- Fix UX issues with llama.com
- Do housekeeping on libm code
- Add more vectorization to GGML
- Get GGJT quantizer programs working well
- Have the quantizer keep the output layer as f16c
- Prefetching improves performance 15% if you use fewer threads
This commit is contained in:
Justine Tunney 2023-05-16 08:07:23 -07:00
parent 80db9de173
commit e7eb0b3070
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
46 changed files with 340 additions and 289 deletions

View file

@ -17,6 +17,7 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/math.h"
#include "libc/tinymath/freebsd.internal.h"
/**
* Returns minimum of two floats.
@ -26,10 +27,10 @@
* signed zeroes.
*/
float fminf(float x, float y) {
if (__builtin_isnan(x)) return y;
if (__builtin_isnan(y)) return x;
if (__builtin_signbitf(x) != __builtin_signbitf(y)) {
return __builtin_signbitf(x) ? x : y; /* C99 Annex F.9.9.2 */
if (isnan(x)) return y;
if (isnan(y)) return x;
if (signbit(x) != signbit(y)) {
return signbit(x) ? x : y; /* C99 Annex F.9.9.2 */
}
return x < y ? x : y;
}