From ad11fc32ad0ad9048b4b39f49b5ef682447cff81 Mon Sep 17 00:00:00 2001
From: Justine Tunney <jtunney@gmail.com>
Date: Mon, 7 Oct 2024 18:39:25 -0700
Subject: [PATCH] Avoid an --ftrace crash on Windows

---
 libc/intrin/kprintf.greg.c | 25 +++++++++++++++++++------
 libc/runtime/clone.c       |  5 +++--
 libc/runtime/stack.h       |  5 ++---
 3 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/libc/intrin/kprintf.greg.c b/libc/intrin/kprintf.greg.c
index ee9f6a539..1654038fe 100644
--- a/libc/intrin/kprintf.greg.c
+++ b/libc/intrin/kprintf.greg.c
@@ -40,9 +40,11 @@
 #include "libc/nt/enum/fileflagandattributes.h"
 #include "libc/nt/enum/filesharemode.h"
 #include "libc/nt/errors.h"
+#include "libc/nt/events.h"
 #include "libc/nt/files.h"
 #include "libc/nt/process.h"
 #include "libc/nt/runtime.h"
+#include "libc/nt/struct/overlapped.h"
 #include "libc/nt/thunk/msabi.h"
 #include "libc/runtime/internal.h"
 #include "libc/runtime/memtrack.internal.h"
@@ -113,10 +115,13 @@
   }
 
 // clang-format off
+__msabi extern typeof(CloseHandle) *const __imp_CloseHandle;
+__msabi extern typeof(CreateEvent) *const __imp_CreateEventW;
 __msabi extern typeof(CreateFile) *const __imp_CreateFileW;
 __msabi extern typeof(DuplicateHandle) *const __imp_DuplicateHandle;
 __msabi extern typeof(GetEnvironmentVariable) *const __imp_GetEnvironmentVariableW;
 __msabi extern typeof(GetLastError) *const __imp_GetLastError;
+__msabi extern typeof(GetOverlappedResult) *const __imp_GetOverlappedResult;
 __msabi extern typeof(GetStdHandle) *const __imp_GetStdHandle;
 __msabi extern typeof(SetLastError) *const __imp_SetLastError;
 __msabi extern typeof(WriteFile) *const __imp_WriteFile;
@@ -283,7 +288,7 @@ privileged long kloghandle(void) {
         hand = __imp_CreateFileW(
             path, kNtFileAppendData,
             kNtFileShareRead | kNtFileShareWrite | kNtFileShareDelete, 0,
-            kNtOpenAlways, kNtFileAttributeNormal, 0);
+            kNtOpenAlways, kNtFileAttributeNormal | kNtFileFlagOverlapped, 0);
       } else {
         hand = -1;  // KPRINTF_LOG was empty string or too long
       }
@@ -359,7 +364,6 @@ privileged void _klog_serial(const char *b, size_t n) {
 
 privileged void klog(const char *b, size_t n) {
 #ifdef __x86_64__
-  int e;
   long h;
   uint32_t wrote;
   long rax, rdi, rsi, rdx;
@@ -367,11 +371,20 @@ privileged void klog(const char *b, size_t n) {
     return;
   }
   if (IsWindows()) {
-    e = __imp_GetLastError();
-    if (!__imp_WriteFile(h, b, n, &wrote, 0)) {
-      __imp_SetLastError(e);
-      __klog_handle = 0;
+    bool32 ok;
+    intptr_t ev;
+    int e = __imp_GetLastError();
+    if ((ev = __imp_CreateEventW(0, 0, 0, 0))) {
+      struct NtOverlapped overlap = {.hEvent = ev};
+      ok = !!__imp_WriteFile(h, b, n, 0, &overlap);
+      if (!ok && __imp_GetLastError() == kNtErrorIoPending)
+        ok = true;
+      ok &= !!__imp_GetOverlappedResult(h, &overlap, &wrote, true);
+      if (!ok)
+        __klog_handle = 0;
+      __imp_CloseHandle(ev);
     }
+    __imp_SetLastError(e);
   } else if (IsMetal()) {
     if (_weaken(_klog_vga)) {
       _weaken(_klog_vga)(b, n);
diff --git a/libc/runtime/clone.c b/libc/runtime/clone.c
index 3f2f822dd..d7cc911c3 100644
--- a/libc/runtime/clone.c
+++ b/libc/runtime/clone.c
@@ -106,8 +106,9 @@ static long AlignStack(long sp, char *stk, long stksz, int mal) {
 ////////////////////////////////////////////////////////////////////////////////
 // THE NEW TECHNOLOGY
 
-__msabi extern typeof(TlsSetValue) *const __imp_TlsSetValue;
 __msabi extern typeof(ExitThread) *const __imp_ExitThread;
+__msabi extern typeof(GetCurrentThreadId) *const __imp_GetCurrentThreadId;
+__msabi extern typeof(TlsSetValue) *const __imp_TlsSetValue;
 __msabi extern typeof(WakeByAddressAll) *const __imp_WakeByAddressAll;
 
 static textwindows dontinstrument wontreturn void  //
@@ -118,7 +119,7 @@ WinThreadEntry(int rdi,                            // rcx
   int rc;
   if (wt->tls)
     __set_tls_win32(wt->tls);
-  *wt->ctid = GetCurrentThreadId();
+  *wt->ctid = __imp_GetCurrentThreadId();
   rc = __stack_call(wt->arg, wt->tid, 0, 0, wt->func, wt->sp);
   // we can now clear ctid directly since we're no longer using our own
   // stack memory, which can now be safely free'd by the parent thread.
diff --git a/libc/runtime/stack.h b/libc/runtime/stack.h
index 87dcd2440..d526bb3da 100644
--- a/libc/runtime/stack.h
+++ b/libc/runtime/stack.h
@@ -69,9 +69,8 @@ uintptr_t GetStackBottom(void) pureconst;
  * will also trigger the stack to grow down safely.
  */
 forceinline void CheckLargeStackAllocation(void *p, ssize_t n) {
-  for (; n > 0; n -= 4096) {
-    ((char *)p)[n - 1] = 0;
-  }
+  for (; n > 0; n -= 4096)
+    ((volatile char *)p)[n - 1] = 0;
 }
 
 void *NewCosmoStack(void) vallocesque;