From 969174e155d4bddcec7a2c6c1f4e7e4d3c293f93 Mon Sep 17 00:00:00 2001
From: Justine Tunney <jtunney@gmail.com>
Date: Sat, 4 Sep 2021 00:58:21 -0700
Subject: [PATCH] Add APE interpreter example (#263)

---
 examples/examples.mk | 12 +++++-
 examples/loader.c    | 98 ++++++++++++++++++++++++++++++++++++++++++++
 examples/loader.lds  | 66 +++++++++++++++++++++++++++++
 libc/elf/def.h       |  6 +--
 libc/linux/execve.h  | 16 ++++++++
 libc/linux/mmap.h    | 14 +++----
 libc/str/wmemset.c   | 12 ++++--
 7 files changed, 210 insertions(+), 14 deletions(-)
 create mode 100644 examples/loader.c
 create mode 100644 examples/loader.lds
 create mode 100644 libc/linux/execve.h

diff --git a/examples/examples.mk b/examples/examples.mk
index f904ba104..006ce46f6 100644
--- a/examples/examples.mk
+++ b/examples/examples.mk
@@ -30,7 +30,8 @@ EXAMPLES_COMS =							\
 
 EXAMPLES_BINS =							\
 	$(EXAMPLES_COMS)					\
-	$(EXAMPLES_COMS:%=%.dbg)
+	$(EXAMPLES_COMS:%=%.dbg)				\
+	o/$(MODE)/examples/loader.elf
 
 EXAMPLES_DIRECTDEPS =						\
 	DSP_CORE						\
@@ -130,6 +131,15 @@ o/$(MODE)/examples/nesemu1.com.dbg:				\
 		$(APE)
 	@$(APELINK)
 
+o/$(MODE)/examples/loader.o:					\
+		OVERRIDE_CCFLAGS +=				\
+			-fno-record-gcc-switches
+
+o/$(MODE)/examples/loader.elf:					\
+		o/$(MODE)/examples/loader.o			\
+		examples/loader.lds
+	@$(ELFLINK) -s -z max-page-size=0x10
+
 $(EXAMPLES_OBJS): examples/examples.mk
 
 usr/share/dict/words: usr/share/dict/words.gz
diff --git a/examples/loader.c b/examples/loader.c
new file mode 100644
index 000000000..382c89e51
--- /dev/null
+++ b/examples/loader.c
@@ -0,0 +1,98 @@
+#if 0
+/*─────────────────────────────────────────────────────────────────╗
+│ To the extent possible under law, Justine Tunney has waived      │
+│ all copyright and related or neighboring rights to this file,    │
+│ as it is written in the following disclaimers:                   │
+│   • http://unlicense.org/                                        │
+│   • http://creativecommons.org/publicdomain/zero/1.0/            │
+╚─────────────────────────────────────────────────────────────────*/
+#endif
+#include "libc/bits/bits.h"
+#include "libc/calls/calls.h"
+#include "libc/calls/struct/stat.h"
+#include "libc/elf/def.h"
+#include "libc/elf/struct/ehdr.h"
+#include "libc/elf/struct/phdr.h"
+#include "libc/linux/close.h"
+#include "libc/linux/exit.h"
+#include "libc/linux/fstat.h"
+#include "libc/linux/mmap.h"
+#include "libc/linux/open.h"
+
+/**
+ * @fileoverview 704-byte APE executing payload for Linux, e.g.
+ *
+ *     m=tiny
+ *     make -j8 MODE=$m o/$m/examples
+ *     o/$m/examples/loader.elf o/$m/examples/printargs.com
+ *
+ * @note this can probably be used as a binfmt_misc interpreter
+ */
+
+#define O_RDONLY      0
+#define PROT_READ     1
+#define PROT_WRITE    2
+#define PROT_EXEC     4
+#define MAP_SHARED    1
+#define MAP_PRIVATE   2
+#define MAP_FIXED     16
+#define MAP_ANONYMOUS 32
+
+asm(".globl\t_start\n\t"
+    "_start:\n\t"
+    "mov\t%rsp,%rdi\n\t"
+    "jmp\tloader");
+
+static noasan noubsan void spawn(long *sp, char *b) {
+  struct Elf64_Ehdr *e;
+  struct Elf64_Phdr *h;
+  e = (void *)b;
+  h = (void *)(b + e->e_phoff);
+  if (LinuxMmap((void *)(h[1].p_vaddr + h[1].p_filesz),
+                h[1].p_memsz - h[1].p_filesz, PROT_READ | PROT_WRITE,
+                MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0) > 0) {
+    sp[1] = sp[0] - 1;
+    asm volatile("mov\t%2,%%rsp\n\t"
+                 "jmpq\t*%1"
+                 : /* no outputs */
+                 : "D"(0), "S"((void *)e->e_entry), "d"(sp + 1)
+                 : "memory");
+    unreachable;
+  }
+}
+
+noasan noubsan void loader(long *sp) {
+  struct stat st;
+  int c, i, fd, argc;
+  char *b, *p, *q, **argv;
+  argc = *sp;
+  argv = (char **)(sp + 1);
+  if (argc > 1 && (fd = LinuxOpen(argv[1], O_RDONLY, 0)) >= 0 &&
+      !LinuxFstat(fd, &st) &&
+      (b = (char *)LinuxMmap((void *)0x400000, st.st_size,
+                             PROT_READ | PROT_WRITE | PROT_EXEC,
+                             MAP_PRIVATE | MAP_FIXED, fd, 0)) > 0) {
+    LinuxClose(fd);
+    if (READ32LE(b) == READ32LE("\177ELF")) {
+      spawn(sp, b);
+    } else {
+      for (p = b; p < b + st.st_size; ++p) {
+        if (READ64LE(p) == READ64LE("printf '")) {
+          for (q = b, p += 8; (c = *p++) != '\'';) {
+            if (c == '\\') {
+              c = *p++ - '0';
+              if ('0' <= *p && *p <= '7') c *= 8, c += *p++ - '0';
+              if ('0' <= *p && *p <= '7') c *= 8, c += *p++ - '0';
+            }
+            *q++ = c;
+          }
+          if (READ32LE(b) == READ32LE("\177ELF")) {
+            spawn(sp, b);
+          }
+          break;
+        }
+      }
+    }
+  }
+  LinuxExit(127);
+}
diff --git a/examples/loader.lds b/examples/loader.lds
new file mode 100644
index 000000000..6e6fb1025
--- /dev/null
+++ b/examples/loader.lds
@@ -0,0 +1,66 @@
+/*-*- mode: ld-script; indent-tabs-mode: nil; tab-width: 2; coding: utf-8   -*-│
+│vi: set et sts=2 tw=2 fenc=utf-8                                           :vi│
+╞══════════════════════════════════════════════════════════════════════════════╡
+│ Copyright 2021 Justine Alexandra Roberts Tunney                              │
+│                                                                              │
+│ Permission to use, copy, modify, and/or distribute this software for         │
+│ any purpose with or without fee is hereby granted, provided that the         │
+│ above copyright notice and this permission notice appear in all copies.      │
+│                                                                              │
+│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
+│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
+│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
+│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
+│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
+│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
+│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
+│ PERFORMANCE OF THIS SOFTWARE.                                                │
+╚─────────────────────────────────────────────────────────────────────────────*/
+
+ENTRY(_start)
+
+SECTIONS {
+
+  . = 0x200000 + SIZEOF_HEADERS;
+
+  .text : {
+    *(.text .text.*)
+    *(.rodata .rodata.*)
+    *(.data .data.*)
+    *(.bss .bss.*)
+  }
+
+  .gnu_debuglink    0 : { *(.gnu_debuglink) }
+  .stab             0 : { *(.stab) }
+  .stabstr          0 : { *(.stabstr) }
+  .stab.excl        0 : { *(.stab.excl) }
+  .stab.exclstr     0 : { *(.stab.exclstr) }
+  .stab.index       0 : { *(.stab.index) }
+  .stab.indexstr    0 : { *(.stab.indexstr) }
+  .debug            0 : { *(.debug) }
+  .line             0 : { *(.line) }
+  .debug_srcinfo    0 : { *(.debug_srcinfo) }
+  .debug_sfnames    0 : { *(.debug_sfnames) }
+  .debug_aranges    0 : { *(.debug_aranges) }
+  .debug_pubnames   0 : { *(.debug_pubnames) }
+  .debug_info       0 : { *(.debug_info .gnu.linkonce.wi.*) }
+  .debug_abbrev     0 : { *(.debug_abbrev) }
+  .debug_line       0 : { *(.debug_line .debug_line.* .debug_line_end ) }
+  .debug_frame      0 : { *(.debug_frame) }
+  .debug_str        0 : { *(.debug_str) }
+  .debug_loc        0 : { *(.debug_loc) }
+  .debug_macinfo    0 : { *(.debug_macinfo) }
+  .debug_weaknames  0 : { *(.debug_weaknames) }
+  .debug_funcnames  0 : { *(.debug_funcnames) }
+  .debug_typenames  0 : { *(.debug_typenames) }
+  .debug_varnames   0 : { *(.debug_varnames) }
+  .debug_pubtypes   0 : { *(.debug_pubtypes) }
+  .debug_ranges     0 : { *(.debug_ranges) }
+  .debug_macro      0 : { *(.debug_macro) }
+  .debug_addr       0 : { *(.debug_addr) }
+  .gnu.attributes   0 : { KEEP(*(.gnu.attributes)) }
+
+  /DISCARD/ : {
+    *(.*)
+  }
+}
diff --git a/libc/elf/def.h b/libc/elf/def.h
index e5362ca22..b282babcb 100644
--- a/libc/elf/def.h
+++ b/libc/elf/def.h
@@ -123,9 +123,9 @@
 
 #define PN_XNUM 0xffff
 
-#define PF_X        (1 << 0)
-#define PF_W        (1 << 1)
-#define PF_R        (1 << 2)
+#define PF_X        1
+#define PF_W        2
+#define PF_R        4
 #define PF_MASKOS   0x0ff00000
 #define PF_MASKPROC 0xf0000000
 
diff --git a/libc/linux/execve.h b/libc/linux/execve.h
new file mode 100644
index 000000000..5ac2d6e7e
--- /dev/null
+++ b/libc/linux/execve.h
@@ -0,0 +1,16 @@
+#ifndef COSMOPOLITAN_LIBC_LINUX_EXECVE_H_
+#define COSMOPOLITAN_LIBC_LINUX_EXECVE_H_
+#if !(__ASSEMBLER__ + __LINKER__ + 0)
+
+forceinline long LinuxExecve(const char *program, char *const argv[],
+                             char *const envp[]) {
+  long rc;
+  asm volatile("syscall"
+               : "=a"(rc)
+               : "0"(59), "D"(program), "S"(argv), "d"(envp)
+               : "rcx", "r11", "memory");
+  return rc;
+}
+
+#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
+#endif /* COSMOPOLITAN_LIBC_LINUX_EXECVE_H_ */
diff --git a/libc/linux/mmap.h b/libc/linux/mmap.h
index a23fdfafe..e1ba56b63 100644
--- a/libc/linux/mmap.h
+++ b/libc/linux/mmap.h
@@ -5,14 +5,14 @@
 forceinline long LinuxMmap(void *addr, size_t size, long prot, long flags,
                            long fd, long off) {
   long rc;
-  asm volatile("mov\t%5,%%r10\n\t"
-               "mov\t%6,%%r8\n\t"
-               "mov\t%7,%%r9\n\t"
-               "syscall"
+  register long flags_ asm("r10") = flags;
+  register long fd_ asm("r8") = fd;
+  register long off_ asm("r9") = off;
+  asm volatile("syscall"
                : "=a"(rc)
-               : "0"(9), "D"(addr), "S"(size), "d"(prot), "g"(flags), "g"(fd),
-                 "g"(off)
-               : "rcx", "r8", "r9", "r10", "r11", "memory");
+               : "0"(9), "D"(addr), "S"(size), "d"(prot), "r"(flags_), "r"(fd_),
+                 "r"(off_)
+               : "rcx", "r11", "memory");
   return rc;
 }
 
diff --git a/libc/str/wmemset.c b/libc/str/wmemset.c
index a79ecff2f..950458a16 100644
--- a/libc/str/wmemset.c
+++ b/libc/str/wmemset.c
@@ -23,9 +23,15 @@
  * @asyncsignalsafe
  */
 wchar_t *wmemset(wchar_t *p, wchar_t c, size_t n) {
-  size_t i;
-  for (i = 0; i < n; ++i) {
-    p[i] = c;
+  size_t i = 0;
+  if (n >= 4) {
+    wchar_t v __attribute__((__vector_size__(16))) = {c, c, c, c};
+    do {
+      __builtin_memcpy(p + i, &v, 16);
+    } while ((i += 4) + 4 <= n);
+  }
+  while (i < n) {
+    p[i++] = c;
   }
   return p;
 }