Fix apelink shell script corruption bug

We were using a shell heredoc value '@' to terminate the dos stub, but
that's not sufficiently safe. We found out sh doesn't consider control
characters as contributing to the start of a line, and had the unlucky
chance of the linker choosing the number 2624 for e_lfanew, and that's
"@\n" in ASCII, which compromised the APE shell script.

We now use the heredoc 'justineXXXXXX' with 31 bits of entropy, that's
determistically generated by hashing apelink inputs w/ crc32 / blake2b
This commit is contained in:
Justine Tunney 2023-08-13 17:36:06 -07:00
parent 2366848db5
commit a033b65a33
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
3 changed files with 48 additions and 77 deletions

View file

@ -123,7 +123,7 @@ Cosmopolitan's third_party/gcc/ for source code and copying conditions.
There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.
EOF
exit 0
exit
fi
if [ "$1" = "--help" ]; then
@ -303,7 +303,7 @@ for x; do
elif [ x"$x" = x"-fsanitize=all" ] ||
[ x"$x" = x"-fsanitize=address" ] ||
[ x"$x" = x"-fsanitize=undefined" ]; then
fatal_error "$x use cosmo MODE=dbg rather than passing $x"
fatal_error "use cosmo MODE=dbg rather than passing $x"
elif [ x"$x" = x"-mno-red-zone" ]; then
# "Any memory below the stack beyond the red zone is considered
# volatile and may be modified by the operating system at any time."

View file

@ -1,47 +0,0 @@
#if 0
/*─────────────────────────────────────────────────────────────────╗
To the extent possible under law, Justine Tunney has waived
all copyright and related or neighboring rights to this file,
as it is written in the following disclaimers:
http://unlicense.org/ │
http://creativecommons.org/publicdomain/zero/1.0/ │
*/
#endif
#include "libc/stdio/stdio.h"
#include "libc/testlib/ezbench.h"
/**
* @fileoverview Fast Modulus Using Multiplication Tutorial
*
* Expected program output:
*
* 23 / 3 = 7
* 0x5555555555555556 1 1
* modulus l: 15𝑐 5𝑛𝑠
* fastmod l: 4𝑐 1𝑛𝑠
* precomp l: 18𝑐 6𝑛𝑠
*/
struct Modulus {
uint64_t c;
uint64_t d;
};
struct Modulus GetModulus(uint64_t d) {
return (struct Modulus){0xFFFFFFFFFFFFFFFFull / d + 1, d};
}
uint64_t Modulus(uint64_t x, struct Modulus m) {
return ((uint128_t)(m.c * x) * m.d) >> 64;
}
int main(int argc, char *argv[]) {
printf("%#lx %% %d = %d\n", 0x23, 3, Modulus(23, GetModulus(3)));
printf("%#lx %% %d = %d\n", 0x123, 17, Modulus(0x123, GetModulus(17)));
volatile struct Modulus v = GetModulus(3);
volatile uint64_t x = 23, y = 3, z;
EZBENCH2("modulus", donothing, z = x % y);
EZBENCH2("fastmod", donothing, z = Modulus(x, v));
EZBENCH2("precomp", donothing, v = GetModulus(y));
return 0;
}

View file

@ -240,6 +240,7 @@ static int macholoadcount;
static const char *outpath;
static struct Assets assets;
static struct Inputs inputs;
static char ape_heredoc[15];
static enum Strategy strategy;
static struct Loaders loaders;
static const char *custom_sh_code;
@ -1597,31 +1598,46 @@ static char *GenerateScriptIfMachine(char *p, struct Input *in) {
}
static char *FinishGeneratingDosHeader(char *p) {
p = WRITE16LE(p, 0x1000); // 10: MZ: lowers upper bound load / 16
p = WRITE16LE(p, 0xf800); // 12: MZ: roll greed on bss
p = WRITE16LE(p, 0); // 14: MZ: lower bound on stack segment
p = WRITE16LE(p, 0); // 16: MZ: initialize stack pointer
p = WRITE16LE(p, 0); // 18: MZ: ∑bₙ checksum don't bother
p = WRITE16LE(p, 0x0100); // 20: MZ: initial ip value
p = WRITE16LE(p, 0x0800); // 22: MZ: increases cs load lower bound
p = WRITE16LE(p, 0x0040); // 24: MZ: reloc table offset
p = WRITE16LE(p, 0); // 26: MZ: overlay number
p = WRITE16LE(p, 0); // 28: MZ: overlay information
p = WRITE16LE(p, 0); // 30
p = WRITE16LE(p, 0); // 32
p = WRITE16LE(p, 0); // 34
p = stpcpy(p, "JT"); // 36: Justine Tunney
p = WRITE16LE(p, 0); // 38
p = stpcpy(p, "' <<'@'\n"); // 40
p = WRITE16LE(p, 0); // 48
p = WRITE16LE(p, 0); // 50
p = WRITE16LE(p, 0); // 52
p = WRITE16LE(p, 0); // 54
p = WRITE16LE(p, 0); // 56
p = WRITE16LE(p, 0); // 58
p = WRITE32LE(p, 0); // 60: portable executable
r_off32_e_lfanew = p - 4;
return p;
p = WRITE16LE(p, 0x1000); // 10: MZ: lowers upper bound load / 16
p = WRITE16LE(p, 0xf800); // 12: MZ: roll greed on bss
p = WRITE16LE(p, 0); // 14: MZ: lower bound on stack segment
p = WRITE16LE(p, 0); // 16: MZ: initialize stack pointer
p = WRITE16LE(p, 0); // 18: MZ: ∑bₙ checksum don't bother
p = WRITE16LE(p, 0x0100); // 20: MZ: initial ip value
p = WRITE16LE(p, 0x0800); // 22: MZ: increases cs load lower bound
p = WRITE16LE(p, 0x0040); // 24: MZ: reloc table offset
p = WRITE16LE(p, 0); // 26: MZ: overlay number
p = WRITE16LE(p, 0); // 28: MZ: overlay information
p = WRITE16LE(p, 0); // 30
p = WRITE16LE(p, 0); // 32
p = WRITE16LE(p, 0); // 34
p = WRITE16LE(p, 0); // 36
p = WRITE16LE(p, 0); // 38
// terminate the shell quote started earlier in the ape magic. the big
// concern with shell script quoting is that binary content mimght get
// generated in the dos stub which has an ascii value that is the same
// as the end of quote. using a longer terminator reduces it to a very
// low order of probability. tacking on an unpredictable deterministic
// value makes it nearly impossible to break even with intent for that
// another terminator exists, which dates back to every version of ape
// ever released, which is "#'\"\n". programs wanting a simple way for
// scanning over the actually portable executable mz stub can use that
char *q = ape_heredoc;
q = stpcpy(q, "justine");
uint64_t w = READ64LE(hashpool);
for (int i = 0; i < 6; ++i) {
*q++ = "0123456789abcdefghijklmnopqrstuvwxyz"[w % 36];
w /= 36;
}
p = stpcpy(p, "' <<'");
p = stpcpy(p, ape_heredoc);
p = stpcpy(p, "'\n");
// here's our first unpredictable binary value, which is the offset of
// the portable executable headers.
r_off32_e_lfanew = p;
return WRITE32LE(p, 0);
}
static char *CopyMasterBootRecord(char *p) {
@ -1899,9 +1915,11 @@ int main(int argc, char *argv[]) {
if (support_vector & _HOSTMETAL) {
p = CopyMasterBootRecord(p);
}
p = stpcpy(p, "\n@\n"
"#'\"\n"
"\n");
p = stpcpy(p, "\n");
p = stpcpy(p, ape_heredoc);
p = stpcpy(p, "\n");
p = stpcpy(p, "#'\"\n"); // longstanding convention (see mz notes)
p = stpcpy(p, "\n");
if (custom_sh_code) {
p = stpcpy(p, custom_sh_code);
*p++ = '\n';