mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-14 06:59:10 +00:00
Make numerous improvements
- Python static hello world now 1.8mb - Python static fully loaded now 10mb - Python HTTPS client now uses MbedTLS - Python REPL now completes import stmts - Increase stack size for Python for now - Begin synthesizing posixpath and ntpath - Restore Python \N{UNICODE NAME} support - Restore Python NFKD symbol normalization - Add optimized code path for Intel SHA-NI - Get more Python unit tests passing faster - Get Python help() pagination working on NT - Python hashlib now supports MbedTLS PBKDF2 - Make memcpy/memmove/memcmp/bcmp/etc. faster - Add Mersenne Twister and Vigna to LIBC_RAND - Provide privileged __printf() for error code - Fix zipos opendir() so that it reports ENOTDIR - Add basic chmod() implementation for Windows NT - Add Cosmo's best functions to Python cosmo module - Pin function trace indent depth to that of caller - Show memory diagram on invalid access in MODE=dbg - Differentiate stack overflow on crash in MODE=dbg - Add stb_truetype and tools for analyzing font files - Upgrade to UNICODE 13 and reduce its binary footprint - COMPILE.COM now logs resource usage of build commands - Start implementing basic poll() support on bare metal - Set getauxval(AT_EXECFN) to GetModuleFileName() on NT - Add descriptions to strerror() in non-TINY build modes - Add COUNTBRANCH() macro to help with micro-optimizations - Make error / backtrace / asan / memory code more unbreakable - Add fast perfect C implementation of μ-Law and a-Law audio codecs - Make strtol() functions consistent with other libc implementations - Improve Linenoise implementation (see also github.com/jart/bestline) - COMPILE.COM now suppresses stdout/stderr of successful build commands
This commit is contained in:
parent
fa7b4f5bd1
commit
39bf41f4eb
806 changed files with 77494 additions and 63859 deletions
|
@ -41,10 +41,66 @@ libmpdec (BSD-2)\\n\
|
|||
Copyright 2008-2016 Stefan Krah\"");
|
||||
asm(".include \"libc/disclaimer.inc\"");
|
||||
|
||||
/*
|
||||
Cache Efficient Matrix Fourier Transform
|
||||
for arrays of form 2ⁿ
|
||||
|
||||
/* Bignum: Cache efficient Matrix Fourier Transform for arrays of the
|
||||
form 2**n (See literature/six-step.txt). */
|
||||
|
||||
The Six Step Transform
|
||||
══════════════════════
|
||||
|
||||
In libmpdec, the six-step transform is the Matrix Fourier Transform in
|
||||
disguise. It is called six-step transform after a variant that appears
|
||||
in [1]. The algorithm requires that the input array can be viewed as an
|
||||
R×C matrix.
|
||||
|
||||
|
||||
Algorithm six-step (forward transform)
|
||||
──────────────────────────────────────
|
||||
|
||||
1a) Transpose the matrix.
|
||||
|
||||
1b) Apply a length R FNT to each row.
|
||||
|
||||
1c) Transpose the matrix.
|
||||
|
||||
2) Multiply each matrix element (addressed by j×C+m) by r**(j×m).
|
||||
|
||||
3) Apply a length C FNT to each row.
|
||||
|
||||
4) Transpose the matrix.
|
||||
|
||||
Note that steps 1a) - 1c) are exactly equivalent to step 1) of the Matrix
|
||||
Fourier Transform. For large R, it is faster to transpose twice and do
|
||||
a transform on the rows than to perform a column transpose directly.
|
||||
|
||||
|
||||
Algorithm six-step (inverse transform)
|
||||
──────────────────────────────────────
|
||||
|
||||
0) View the matrix as a C×R matrix.
|
||||
|
||||
1) Transpose the matrix, producing an R×C matrix.
|
||||
|
||||
2) Apply a length C FNT to each row.
|
||||
|
||||
3) Multiply each matrix element (addressed by i×C+n) by r**(i×n).
|
||||
|
||||
4a) Transpose the matrix.
|
||||
|
||||
4b) Apply a length R FNT to each row.
|
||||
|
||||
4c) Transpose the matrix.
|
||||
|
||||
Again, steps 4a) - 4c) are equivalent to step 4) of the Matrix Fourier
|
||||
Transform.
|
||||
|
||||
|
||||
──
|
||||
|
||||
[1] David H. Bailey: FFTs in External or Hierarchical Memory
|
||||
http://crd.lbl.gov/~dhbailey/dhbpapers/
|
||||
*/
|
||||
|
||||
/* forward transform with sign = -1 */
|
||||
int
|
||||
|
@ -54,28 +110,18 @@ six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
|
|||
mpd_size_t log2n, C, R;
|
||||
mpd_uint_t kernel;
|
||||
mpd_uint_t umod;
|
||||
#ifdef PPRO
|
||||
double dmod;
|
||||
uint32_t dinvmod[3];
|
||||
#endif
|
||||
mpd_uint_t *x, w0, w1, wstep;
|
||||
mpd_size_t i, k;
|
||||
|
||||
|
||||
assert(ispower2(n));
|
||||
assert(n >= 16);
|
||||
assert(n <= MPD_MAXTRANSFORM_2N);
|
||||
|
||||
log2n = mpd_bsr(n);
|
||||
C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */
|
||||
R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */
|
||||
|
||||
|
||||
/* Transpose the matrix. */
|
||||
if (!transpose_pow2(a, R, C)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Length R transform on the rows. */
|
||||
if ((tparams = _mpd_init_fnt_params(R, -1, modnum)) == NULL) {
|
||||
return 0;
|
||||
|
@ -83,13 +129,11 @@ six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
|
|||
for (x = a; x < a+n; x += R) {
|
||||
fnt_dif2(x, R, tparams);
|
||||
}
|
||||
|
||||
/* Transpose the matrix. */
|
||||
if (!transpose_pow2(a, C, R)) {
|
||||
mpd_free(tparams);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
|
||||
SETMODULUS(modnum);
|
||||
kernel = _mpd_getkernel(n, -1, modnum);
|
||||
|
@ -106,7 +150,6 @@ six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
|
|||
a[i*C+k+1] = x1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Length C transform on the rows. */
|
||||
if (C != R) {
|
||||
mpd_free(tparams);
|
||||
|
@ -118,7 +161,6 @@ six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
|
|||
fnt_dif2(x, C, tparams);
|
||||
}
|
||||
mpd_free(tparams);
|
||||
|
||||
#if 0
|
||||
/* An unordered transform is sufficient for convolution. */
|
||||
/* Transpose the matrix. */
|
||||
|
@ -126,11 +168,9 @@ six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
|
|||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/* reverse transform, sign = 1 */
|
||||
int
|
||||
inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
|
||||
|
@ -139,23 +179,14 @@ inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
|
|||
mpd_size_t log2n, C, R;
|
||||
mpd_uint_t kernel;
|
||||
mpd_uint_t umod;
|
||||
#ifdef PPRO
|
||||
double dmod;
|
||||
uint32_t dinvmod[3];
|
||||
#endif
|
||||
mpd_uint_t *x, w0, w1, wstep;
|
||||
mpd_size_t i, k;
|
||||
|
||||
|
||||
assert(ispower2(n));
|
||||
assert(n >= 16);
|
||||
assert(n <= MPD_MAXTRANSFORM_2N);
|
||||
|
||||
log2n = mpd_bsr(n);
|
||||
C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */
|
||||
R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */
|
||||
|
||||
|
||||
#if 0
|
||||
/* An unordered transform is sufficient for convolution. */
|
||||
/* Transpose the matrix, producing an R*C matrix. */
|
||||
|
@ -163,7 +194,6 @@ inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
|
|||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Length C transform on the rows. */
|
||||
if ((tparams = _mpd_init_fnt_params(C, 1, modnum)) == NULL) {
|
||||
return 0;
|
||||
|
@ -171,7 +201,6 @@ inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
|
|||
for (x = a; x < a+n; x += C) {
|
||||
fnt_dif2(x, C, tparams);
|
||||
}
|
||||
|
||||
/* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */
|
||||
SETMODULUS(modnum);
|
||||
kernel = _mpd_getkernel(n, 1, modnum);
|
||||
|
@ -188,13 +217,11 @@ inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
|
|||
a[i*C+k+1] = x1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Transpose the matrix. */
|
||||
if (!transpose_pow2(a, R, C)) {
|
||||
mpd_free(tparams);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Length R transform on the rows. */
|
||||
if (R != C) {
|
||||
mpd_free(tparams);
|
||||
|
@ -206,11 +233,9 @@ inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum)
|
|||
fnt_dif2(x, R, tparams);
|
||||
}
|
||||
mpd_free(tparams);
|
||||
|
||||
/* Transpose the matrix. */
|
||||
if (!transpose_pow2(a, C, R)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue