Remove division from matrix multiplication

This change reduces llama.com CPU cycles systemically by 2.5% according
to the Linux Kernel `perf stat -Bddd` utility.
This commit is contained in:
Justine Tunney 2023-05-10 21:19:54 -07:00
parent a88290e595
commit 1f6f9e6701
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
7 changed files with 191 additions and 70 deletions

26
libc/tinymath/magicu.h Normal file
View file

@ -0,0 +1,26 @@
#ifndef COSMOPOLITAN_LIBC_TINYMATH_MAGICU_H_
#define COSMOPOLITAN_LIBC_TINYMATH_MAGICU_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
struct magicu {
uint32_t M;
uint32_t s;
};
struct magicu __magicu_get(uint32_t);
/**
* Performs fast division using precomputed magic for constant divisor.
*
* @param x is unsigned integer that shall be divided
* @param d should be `__magicu_get(y)` if computing `x / y`
* @return result of unsigned integer division
*/
static inline uint32_t __magicu_div(uint32_t x, struct magicu d) {
return ((((uint64_t)x * d.M) >> 32) + ((d.s & 64) ? x : 0)) >> (d.s & 63);
}
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_TINYMATH_MAGICU_H_ */