From f491276b62d3fc81e73eee7a93ca1bdf7a1b34aa Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Sat, 12 Aug 2023 07:45:32 -0700 Subject: [PATCH] Add support for C++ thread safe statics --- Makefile | 2 +- bin/cosmocc | 4 +- build/definitions.mk | 1 - libc/runtime/cxaguard.c | 114 +++++++++++++++++++++++++++++++++++++++ libc/runtime/runtime.mk | 1 + tool/scripts/fat-aarch64 | 2 +- tool/scripts/fat-x86_64 | 2 +- 7 files changed, 120 insertions(+), 6 deletions(-) create mode 100644 libc/runtime/cxaguard.c diff --git a/Makefile b/Makefile index ebbb2f1a3..f0ce482c4 100644 --- a/Makefile +++ b/Makefile @@ -137,9 +137,9 @@ include ape/ape.mk # │ include libc/fmt/fmt.mk # │ include libc/vga/vga.mk #─┘ include libc/calls/calls.mk #─┐ +include third_party/nsync/nsync.mk # │ include libc/runtime/runtime.mk # ├──SYSTEMS RUNTIME include libc/crt/crt.mk # │ You can issue system calls -include third_party/nsync/nsync.mk # │ include third_party/dlmalloc/dlmalloc.mk #─┘ include libc/mem/mem.mk #─┐ include third_party/gdtoa/gdtoa.mk # ├──DYNAMIC RUNTIME diff --git a/bin/cosmocc b/bin/cosmocc index 5234f328f..5f4b801b0 100755 --- a/bin/cosmocc +++ b/bin/cosmocc @@ -82,7 +82,7 @@ CC="$COSMO/o/third_party/gcc/bin/x86_64-linux-musl-gcc" ORIGINAL="$0 $*" PLATFORM="-D__COSMOPOLITAN__" PREDEF="-include libc/integral/normalize.inc" -CCFLAGS="-fdata-sections -ffunction-sections -fno-pie -mno-tls-direct-seg-refs -mno-red-zone -fportcosmo" +CCFLAGS="-fno-pie -mno-tls-direct-seg-refs -mno-red-zone -fportcosmo" CPPFLAGS="-nostdinc -iquote $COSMO -isystem $COSMOS/include -isystem $COSMO/libc/isystem" LDFLAGS="-static -no-pie -nostdlib -fuse-ld=bfd -Wl,-melf_x86_64" APEFLAGS="-L$COSMOS/lib -Wl,--gc-sections -Wl,-T,$COSMO/o/$MODE/ape/public/ape.lds $COSMO/o/$MODE/ape/ape-no-modify-self.o $COSMO/o/$MODE/libc/crt/crt.o" @@ -90,7 +90,7 @@ LDLIBS="$COSMO/o/$MODE/cosmopolitan.a" if [ x"$0" != x"${0%++}" ]; then CC="$COSMO/o/third_party/gcc/bin/x86_64-linux-musl-g++" - CCFLAGS="$CCFLAGS -fno-rtti -fno-exceptions -fuse-cxa-atexit -fno-threadsafe-statics" + CCFLAGS="$CCFLAGS -fno-rtti -fno-exceptions -fuse-cxa-atexit" LDLIBS="$COSMO/o/$MODE/third_party/libcxx/libcxx.a $LDLIBS" fi diff --git a/build/definitions.mk b/build/definitions.mk index 1306edcf2..25bcf1e96 100644 --- a/build/definitions.mk +++ b/build/definitions.mk @@ -231,7 +231,6 @@ DEFAULT_CXXFLAGS = \ -fno-rtti \ -fno-exceptions \ -fuse-cxa-atexit \ - -fno-threadsafe-statics \ -Wno-int-in-bool-context \ -Wno-narrowing \ -Wno-literal-suffix diff --git a/libc/runtime/cxaguard.c b/libc/runtime/cxaguard.c new file mode 100644 index 000000000..e18a1beda --- /dev/null +++ b/libc/runtime/cxaguard.c @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2006 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "libc/atomic.h" +#include "libc/intrin/atomic.h" +#include "libc/limits.h" +#include "third_party/nsync/futex.internal.h" + +// This file contains C++ ABI support functions for one time +// constructors as defined in the "Run-time ABI for the ARM Architecture" +// section 4.4.2 +// +// ARM C++ ABI and Itanium/x86 C++ ABI has different definition for +// one time construction: +// +// ARM C++ ABI defines the LSB of guard variable should be tested +// by compiler-generated code before calling __cxa_guard_acquire et al. +// +// The Itanium/x86 C++ ABI defines the low-order _byte_ should be +// tested instead. +// +// Meanwhile, guard variable are 32bit aligned for ARM, and 64bit +// aligned for x86. +// +// Reference documentation: +// +// section 3.2.3 of ARM IHI 0041C (for ARM) +// section 3.3.2 of the Itanium C++ ABI specification v1.83 (for x86). +// +// There is no C++ ABI available for other ARCH. But the gcc source +// shows all other ARCH follow the definition of Itanium/x86 C++ ABI. + +// The Itanium/x86 C++ ABI (used by all architectures that aren't ARM32) +// mandates guard variables are 64-bit aligned, 64-bit values. The least +// significant byte is tested by the compiler-generated code before it's +// calling __cxa_guard_acquire. +union CxaGuardValue { + atomic_int state; + int64_t aligner; +}; + +// Set construction state values according to reference documentation. 0 +// is the initialization value. Arm requires ((*gv & 1) == 1) after +// __cxa_guard_release, ((*gv & 3) == 0) after __cxa_guard_abort. X86 +// requires first byte not modified by __cxa_guard_acquire, first byte +// is non-zero after __cxa_guard_release. +#define CONSTRUCTION_NOT_YET_STARTED 0 +#define CONSTRUCTION_COMPLETE 1 +#define CONSTRUCTION_UNDERWAY_WITHOUT_WAITER 0x100 +#define CONSTRUCTION_UNDERWAY_WITH_WAITER 0x200 + +int __cxa_guard_acquire(union CxaGuardValue *gv) { + int old_value = atomic_load_explicit(&gv->state, memory_order_relaxed); + while (true) { + if (old_value == CONSTRUCTION_COMPLETE) { + // A load_acquire operation is needed before exiting with COMPLETE + // state, as we must ensure that all the stores performed by the + // construction function are observable on this CPU after we exit. + atomic_thread_fence(memory_order_acquire); + return 0; + } else if (old_value == CONSTRUCTION_NOT_YET_STARTED) { + if (!atomic_compare_exchange_weak_explicit( + &gv->state, &old_value, CONSTRUCTION_UNDERWAY_WITHOUT_WAITER, + memory_order_relaxed, memory_order_relaxed)) { + continue; + } + // The acquire fence may not be needed. But as described in section 3.3.2 + // of the Itanium C++ ABI specification, it probably has to behave like + // the acquisition of a mutex, which needs an acquire fence. + atomic_thread_fence(memory_order_acquire); + return 1; + } else if (old_value == CONSTRUCTION_UNDERWAY_WITHOUT_WAITER) { + if (!atomic_compare_exchange_weak_explicit( + &gv->state, &old_value, CONSTRUCTION_UNDERWAY_WITH_WAITER, + memory_order_relaxed, memory_order_relaxed)) { + continue; + } + } + nsync_futex_wait_(&gv->state, CONSTRUCTION_UNDERWAY_WITH_WAITER, 0, 0); + old_value = atomic_load_explicit(&gv->state, memory_order_relaxed); + } +} + +void __cxa_guard_release(union CxaGuardValue *gv) { + // Release fence is used to make all stores performed by the construction + // function visible in other threads. + int old_value = atomic_exchange_explicit(&gv->state, CONSTRUCTION_COMPLETE, + memory_order_release); + if (old_value == CONSTRUCTION_UNDERWAY_WITH_WAITER) { + nsync_futex_wake_(&gv->state, INT_MAX, 0); + } +} + +void __cxa_guard_abort(union CxaGuardValue *gv) { + // Release fence is used to make all stores performed by the construction + // function visible in other threads. + int old_value = atomic_exchange_explicit( + &gv->state, CONSTRUCTION_NOT_YET_STARTED, memory_order_release); + if (old_value == CONSTRUCTION_UNDERWAY_WITH_WAITER) { + nsync_futex_wake_(&gv->state, INT_MAX, 0); + } +} diff --git a/libc/runtime/runtime.mk b/libc/runtime/runtime.mk index 69679c437..d5317e4d1 100644 --- a/libc/runtime/runtime.mk +++ b/libc/runtime/runtime.mk @@ -45,6 +45,7 @@ LIBC_RUNTIME_A_DIRECTDEPS = \ LIBC_STR \ LIBC_SYSV \ LIBC_SYSV_CALLS \ + THIRD_PARTY_NSYNC \ THIRD_PARTY_PUFF \ THIRD_PARTY_XED diff --git a/tool/scripts/fat-aarch64 b/tool/scripts/fat-aarch64 index ed8b5d8ab..082861a26 100755 --- a/tool/scripts/fat-aarch64 +++ b/tool/scripts/fat-aarch64 @@ -9,7 +9,7 @@ LDLIBS="$COSMO/o/$MODE/cosmopolitan.a" if [ x"$PROG" != x"${PROG%++}" ]; then CC="$COSMO/o/third_party/gcc/bin/aarch64-linux-musl-g++" - CCFLAGS="$CCFLAGS -fno-rtti -fno-exceptions -fuse-cxa-atexit -fno-threadsafe-statics" + CCFLAGS="$CCFLAGS -fno-rtti -fno-exceptions -fuse-cxa-atexit" LDLIBS="$COSMO/o/$MODE/third_party/libcxx/libcxx.a $LDLIBS" fi diff --git a/tool/scripts/fat-x86_64 b/tool/scripts/fat-x86_64 index c5996c8a8..0f36e0c17 100755 --- a/tool/scripts/fat-x86_64 +++ b/tool/scripts/fat-x86_64 @@ -9,7 +9,7 @@ LDLIBS="$COSMO/o/$MODE/cosmopolitan.a" if [ x"$PROG" != x"${PROG%++}" ]; then CC="$COSMO/o/third_party/gcc/bin/x86_64-linux-musl-g++" - CCFLAGS="$CCFLAGS -fno-rtti -fno-exceptions -fuse-cxa-atexit -fno-threadsafe-statics" + CCFLAGS="$CCFLAGS -fno-rtti -fno-exceptions -fuse-cxa-atexit" LDLIBS="$COSMO/o/$MODE/third_party/libcxx/libcxx.a $LDLIBS" fi