From 4d629fd424993b389fdfaa8578b5087b86a0c049 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Thu, 8 Jun 2023 06:44:54 -0700 Subject: [PATCH] Fix stack abuse in llama.cc This change also incorporates improvements for MODE=asan. It's been confirmed that o/asan/third_party/ggml/llama.com will work. Fixes #829 --- build/config.mk | 6 +++--- build/definitions.mk | 15 ++++++------- libc/intrin/stpcpy.c | 36 ++++++++++++++----------------- libc/intrin/strcmp.c | 8 +++---- libc/intrin/strcpy.c | 37 +++++++++++++++----------------- libc/str/strcasecmp.c | 10 ++------- test/libc/release/test.mk | 4 ++++ third_party/chibicc/chibicc.mk | 4 ++++ third_party/chibicc/test/test.mk | 4 ++++ third_party/ggml/ggml.mk | 1 + third_party/ggml/llama.cc | 18 +++++++++------- third_party/sqlite3/sqlite3.mk | 6 +++--- 12 files changed, 73 insertions(+), 76 deletions(-) diff --git a/build/config.mk b/build/config.mk index 427f25195..0a8d34249 100644 --- a/build/config.mk +++ b/build/config.mk @@ -33,7 +33,6 @@ endif ifeq ($(MODE), zero) OVERRIDE_CFLAGS += -O0 OVERRIDE_CXXFLAGS += -O0 -OVERRIDE_CCFLAGS = -fno-omit-frame-pointer CONFIG_CPPFLAGS += -DSYSDEBUG endif ifeq ($(MODE), aarch64-zero) @@ -132,6 +131,7 @@ ifeq ($(MODE), asan) CONFIG_CCFLAGS += $(BACKTRACES) -O2 -DSYSDEBUG CONFIG_COPTS += -fsanitize=address TARGET_ARCH ?= -msse3 +QUOTA ?= -C64 -L300 endif # Debug Mode @@ -151,12 +151,14 @@ CONFIG_CCFLAGS += $(BACKTRACES) -DSYSDEBUG -O0 -fno-inline CONFIG_COPTS += -fsanitize=address -fsanitize=undefined TARGET_ARCH ?= -msse3 OVERRIDE_CCFLAGS += -fno-pie +QUOTA ?= -C64 -L300 endif ifeq ($(MODE), aarch64-dbg) ENABLE_FTRACE = 1 CONFIG_CPPFLAGS += -DMODE_DBG CONFIG_CCFLAGS += $(BACKTRACES) -DSYSDEBUG -O0 -fno-inline CONFIG_COPTS += -fsanitize=undefined +QUOTA ?= -C64 -L300 endif # System Five Mode @@ -200,7 +202,6 @@ CONFIG_CCFLAGS += \ -fno-align-labels \ -fno-align-loops \ -fschedule-insns2 \ - -fomit-frame-pointer \ -momit-leaf-frame-pointer \ -foptimize-sibling-calls \ -DDWARFLESS @@ -227,7 +228,6 @@ CONFIG_CCFLAGS += \ -fno-align-labels \ -fno-align-loops \ -fschedule-insns2 \ - -fno-omit-frame-pointer \ -momit-leaf-frame-pointer \ -foptimize-sibling-calls \ -DDWARFLESS diff --git a/build/definitions.mk b/build/definitions.mk index b0d99b23e..2c0f3dd39 100644 --- a/build/definitions.mk +++ b/build/definitions.mk @@ -84,13 +84,6 @@ PWD := $(shell build/bootstrap/pwd.com) IGNORE := $(shell $(ECHO) -2 ♥cosmo) IGNORE := $(shell $(MKDIR) o/tmp) -ifeq ($(MODE), dbg) -# be generous about resources in debug mode -# let commands use 64 seconds cpu time max -# let commands use 300 seconds wall time max -QUOTA ?= -C64 -L300 -endif - ifneq ($(findstring aarch64,$(MODE)),) ARCH = aarch64 VM = o/third_party/qemu/qemu-aarch64 @@ -124,7 +117,11 @@ GCC = $(PREFIX)gcc STRIP = $(PREFIX)strip OBJCOPY = $(PREFIX)objcopy OBJDUMP = $(PREFIX)objdump -ADDR2LINE = $(join $(PWD),$(PREFIX))addr2line +ifneq ($(wildcard $(PWD)/$(PREFIX)addr2line), ) +ADDR2LINE = $(PWD)/$(PREFIX)addr2line +else +ADDR2LINE = $(PREFIX)addr2line +endif export ADDR2LINE export LC_ALL @@ -146,7 +143,6 @@ IMAGE_BASE_VIRTUAL ?= 0x400000 endif BACKTRACES = \ - -fno-omit-frame-pointer \ -fno-optimize-sibling-calls \ -mno-omit-leaf-frame-pointer @@ -174,6 +170,7 @@ TRADITIONAL = \ DEFAULT_CCFLAGS += \ -Wall \ -Werror \ + -fno-omit-frame-pointer \ -fdebug-prefix-map='$(PWD)'= \ -frecord-gcc-switches diff --git a/libc/intrin/stpcpy.c b/libc/intrin/stpcpy.c index 4c19c92ed..0dab9b320 100644 --- a/libc/intrin/stpcpy.c +++ b/libc/intrin/stpcpy.c @@ -16,30 +16,14 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/dce.h" +#include "libc/intrin/asan.internal.h" #include "libc/str/str.h" #ifndef __aarch64__ -// TODO(jart): ASAN support here is important. - typedef char xmm_u __attribute__((__vector_size__(16), __aligned__(1))); typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); -#ifdef __x86_64__ -static inline noasan size_t stpcpy_sse2(char *d, const char *s, size_t i) { - xmm_t v, z = {0}; - for (;;) { - v = *(xmm_t *)(s + i); - if (!__builtin_ia32_pmovmskb128(v == z)) { - *(xmm_u *)(d + i) = v; - i += 16; - } else { - break; - } - } - return i; -} -#endif - /** * Copies bytes from 𝑠 to 𝑑 until a NUL is encountered. * @@ -49,15 +33,27 @@ static inline noasan size_t stpcpy_sse2(char *d, const char *s, size_t i) { * @return pointer to nul byte * @asyncsignalsafe */ -char *stpcpy(char *d, const char *s) { +noasan char *stpcpy(char *d, const char *s) { size_t i = 0; + if (IsAsan()) { + __asan_verify(d, strlen(s) + 1); + } #ifdef __x86_64__ for (; (uintptr_t)(s + i) & 15; ++i) { if (!(d[i] = s[i])) { return d + i; } } - i = stpcpy_sse2(d, s, i); + for (;;) { + xmm_t z = {0}; + xmm_t v = *(xmm_t *)(s + i); + if (!__builtin_ia32_pmovmskb128(v == z)) { + *(xmm_u *)(d + i) = v; + i += 16; + } else { + break; + } + } #endif for (;;) { if (!(d[i] = s[i])) { diff --git a/libc/intrin/strcmp.c b/libc/intrin/strcmp.c index 574bd4a9b..c93cdfb00 100644 --- a/libc/intrin/strcmp.c +++ b/libc/intrin/strcmp.c @@ -29,11 +29,13 @@ * @return is <0, 0, or >0 based on uint8_t comparison * @asyncsignalsafe */ -int strcmp(const char *a, const char *b) { +noasan int strcmp(const char *a, const char *b) { int c; size_t i = 0; uint64_t v, w, d; if (a == b) return 0; + if (IsAsan()) __asan_verify_str(a); + if (IsAsan()) __asan_verify_str(b); if ((c = (*a & 255) - (*b & 255))) return c; if (!IsTiny() && ((uintptr_t)a & 7) == ((uintptr_t)b & 7)) { for (; (uintptr_t)(a + i) & 7; ++i) { @@ -53,10 +55,6 @@ int strcmp(const char *a, const char *b) { } else { while (a[i] == b[i] && b[i]) ++i; } - if (IsAsan()) { - __asan_verify(a, i + 1); - __asan_verify(b, i + 1); - } return (a[i] & 255) - (b[i] & 255); } diff --git a/libc/intrin/strcpy.c b/libc/intrin/strcpy.c index fec4570d3..b4f4e907e 100644 --- a/libc/intrin/strcpy.c +++ b/libc/intrin/strcpy.c @@ -16,30 +16,15 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/dce.h" +#include "libc/intrin/asan.internal.h" +#include "libc/intrin/kprintf.h" #include "libc/str/str.h" #ifndef __aarch64__ -// TODO(jart): ASAN support here is important. - typedef char xmm_u __attribute__((__vector_size__(16), __aligned__(1))); typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); -#ifdef __x86_64__ -static inline noasan size_t strcpy_sse2(char *d, const char *s, size_t i) { - xmm_t v, z = {0}; - for (;;) { - v = *(xmm_t *)(s + i); - if (!__builtin_ia32_pmovmskb128(v == z)) { - *(xmm_u *)(d + i) = v; - i += 16; - } else { - break; - } - } - return i; -} -#endif - /** * Copies bytes from 𝑠 to 𝑑 until a NUL is encountered. * @@ -49,15 +34,27 @@ static inline noasan size_t strcpy_sse2(char *d, const char *s, size_t i) { * @return original dest * @asyncsignalsafe */ -char *strcpy(char *d, const char *s) { +noasan char *strcpy(char *d, const char *s) { size_t i = 0; + if (IsAsan()) { + __asan_verify(d, strlen(s) + 1); + } #ifdef __x86_64__ for (; (uintptr_t)(s + i) & 15; ++i) { if (!(d[i] = s[i])) { return d; } } - i = strcpy_sse2(d, s, i); + for (;;) { + xmm_t z = {0}; + xmm_t v = *(xmm_t *)(s + i); + if (!__builtin_ia32_pmovmskb128(v == z)) { + *(xmm_u *)(d + i) = v; + i += 16; + } else { + break; + } + } #endif for (;;) { if (!(d[i] = s[i])) { diff --git a/libc/str/strcasecmp.c b/libc/str/strcasecmp.c index 70e06e8ab..219f2c730 100644 --- a/libc/str/strcasecmp.c +++ b/libc/str/strcasecmp.c @@ -34,13 +34,11 @@ noasan int strcasecmp(const char *a, const char *b) { size_t i = 0; uint64_t v, w, d; if (a == b) return 0; + if (IsAsan()) __asan_verify_str(a); + if (IsAsan()) __asan_verify_str(b); if (((uintptr_t)a & 7) == ((uintptr_t)b & 7)) { for (; (uintptr_t)(a + i) & 7; ++i) { CheckEm: - if (IsAsan()) { - __asan_verify(a, i + 1); - __asan_verify(b, i + 1); - } if ((x = kToLower[a[i] & 255]) != (y = kToLower[b[i] & 255]) || !y) { return x - y; } @@ -56,10 +54,6 @@ noasan int strcasecmp(const char *a, const char *b) { } } else { while ((x = kToLower[a[i] & 255]) == (y = kToLower[b[i] & 255]) && y) ++i; - if (IsAsan()) { - __asan_verify(a, i + 1); - __asan_verify(b, i + 1); - } return x - y; } } diff --git a/test/libc/release/test.mk b/test/libc/release/test.mk index 33eb41317..986a79f09 100644 --- a/test/libc/release/test.mk +++ b/test/libc/release/test.mk @@ -1,6 +1,8 @@ #-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐ #───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘ +ifneq ($(MODE), dbg) +ifneq ($(MODE), asan) ifeq ($(ARCH), x86_64) o/$(MODE)/test/libc/release/cosmopolitan.zip: private .UNSANDBOXED = 1 @@ -198,6 +200,8 @@ o/$(MODE)/test/libc/release: \ o/$(MODE)/test/libc/release/smokeansi.com.runs endif +endif +endif .PHONY: o/$(MODE)/test/libc/release o/$(MODE)/test/libc/release: diff --git a/third_party/chibicc/chibicc.mk b/third_party/chibicc/chibicc.mk index eb7c8f5df..00bc01945 100644 --- a/third_party/chibicc/chibicc.mk +++ b/third_party/chibicc/chibicc.mk @@ -10,6 +10,8 @@ # This makefile compiles and runs each test twice. The first with # GCC-built chibicc, and a second time with chibicc-built chibicc +ifneq ($(MODE), dbg) +ifneq ($(MODE), asan) ifeq ($(ARCH), x86_64) CHIBICC = o/$(MODE)/third_party/chibicc/chibicc.com @@ -113,6 +115,8 @@ THIRD_PARTY_CHIBICC_CHECKS = $(foreach x,$(THIRD_PARTY_CHIBICC_ARTIFACTS),$($(x) THIRD_PARTY_CHIBICC_OBJS = $(foreach x,$(THIRD_PARTY_CHIBICC_ARTIFACTS),$($(x)_OBJS)) $(THIRD_PARTY_CHIBICC_OBJS): $(BUILD_FILES) third_party/chibicc/chibicc.mk +endif +endif endif .PHONY: o/$(MODE)/third_party/chibicc diff --git a/third_party/chibicc/test/test.mk b/third_party/chibicc/test/test.mk index 19dffa25f..550424ac7 100644 --- a/third_party/chibicc/test/test.mk +++ b/third_party/chibicc/test/test.mk @@ -10,6 +10,8 @@ # This makefile compiles and runs each test twice. The first with # GCC-built chibicc, and a second time with chibicc-built chibicc +ifneq ($(MODE), dbg) +ifneq ($(MODE), asan) ifeq ($(ARCH), x86_64) PKGS += THIRD_PARTY_CHIBICC_TEST @@ -77,6 +79,8 @@ o/$(MODE)/third_party/chibicc/test/%.o: \ o/$(MODE)/third_party/chibicc/test/int128_test.o: private QUOTA = -M1024m +endif +endif endif .PHONY: o/$(MODE)/third_party/chibicc/test diff --git a/third_party/ggml/ggml.mk b/third_party/ggml/ggml.mk index 6e59f8ec5..d19eeb079 100644 --- a/third_party/ggml/ggml.mk +++ b/third_party/ggml/ggml.mk @@ -185,6 +185,7 @@ o/$(MODE)/third_party/ggml/companionai.txt.zip.o: private \ -B o/$(MODE)/third_party/ggml/ggml.o: private QUOTA = -C64 +o/$(MODE)/third_party/ggml/llama.o: private QUOTA = -C64 ################################################################################ diff --git a/third_party/ggml/llama.cc b/third_party/ggml/llama.cc index e18a2d727..64a87e7ea 100644 --- a/third_party/ggml/llama.cc +++ b/third_party/ggml/llama.cc @@ -2660,13 +2660,14 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) { rng_ss << ctx->rng; const size_t rng_size = rng_ss.str().size(); - char rng_buf[LLAMA_MAX_RNG_STATE]; + llama_buffer rng_buf; + rng_buf.resize(LLAMA_MAX_RNG_STATE); - memset(&rng_buf[0], 0, LLAMA_MAX_RNG_STATE); - memcpy(&rng_buf[0], rng_ss.str().data(), rng_ss.str().size()); + memset(&rng_buf.addr[0], 0, LLAMA_MAX_RNG_STATE); + memcpy(&rng_buf.addr[0], rng_ss.str().data(), rng_ss.str().size()); memcpy(out, &rng_size, sizeof(rng_size)); out += sizeof(rng_size); - memcpy(out, &rng_buf[0], LLAMA_MAX_RNG_STATE); out += LLAMA_MAX_RNG_STATE; + memcpy(out, &rng_buf.addr[0], LLAMA_MAX_RNG_STATE); out += LLAMA_MAX_RNG_STATE; } // copy logits @@ -2759,13 +2760,14 @@ size_t llama_set_state_data(struct llama_context * ctx, const uint8_t * src) { // set rng { size_t rng_size; - char rng_buf[LLAMA_MAX_RNG_STATE]; + llama_buffer rng_buf; + rng_buf.resize(LLAMA_MAX_RNG_STATE); - memcpy(&rng_size, in, sizeof(rng_size)); in += sizeof(rng_size); - memcpy(&rng_buf[0], in, LLAMA_MAX_RNG_STATE); in += LLAMA_MAX_RNG_STATE; + memcpy(&rng_size, in, sizeof(rng_size)); in += sizeof(rng_size); + memcpy(&rng_buf.addr[0], in, LLAMA_MAX_RNG_STATE); in += LLAMA_MAX_RNG_STATE; std::stringstream rng_ss; - rng_ss.str(std::string(&rng_buf[0], rng_size)); + rng_ss.str(std::string((char *)&rng_buf.addr[0], rng_size)); rng_ss >> ctx->rng; LLAMA_ASSERT(rng_ss.fail() == false); diff --git a/third_party/sqlite3/sqlite3.mk b/third_party/sqlite3/sqlite3.mk index 098233c39..9b433a33d 100644 --- a/third_party/sqlite3/sqlite3.mk +++ b/third_party/sqlite3/sqlite3.mk @@ -190,11 +190,11 @@ o/$(MODE)/third_party/sqlite3/parse.o: private \ CFLAGS += \ -fpie -o/$(MODE)/third_party/sqlite3/shell.o: private QUOTA = -M512m -C16 -L180 +o/$(MODE)/third_party/sqlite3/shell.o: private QUOTA = -M512m -C32 -L180 o/$(MODE)/third_party/sqlite3/vdbe.o: private QUOTA = -M1024m o/$(MODE)/third_party/sqlite3/vdbe.shell.o: private QUOTA = -M1024m -o/$(MODE)/third_party/sqlite3/fts5.o: private QUOTA = -M512m -C16 -o/$(MODE)/third_party/sqlite3/fts5.shell.o: private QUOTA = -M512m -C16 -L180 +o/$(MODE)/third_party/sqlite3/fts5.o: private QUOTA = -M512m -C32 +o/$(MODE)/third_party/sqlite3/fts5.shell.o: private QUOTA = -M512m -C32 -L180 o/$(MODE)/third_party/sqlite3/rtree.o: \ third_party/sqlite3/rtree.c \