Fix stack abuse in llama.cc

This change also incorporates improvements for MODE=asan. It's been
confirmed that o/asan/third_party/ggml/llama.com will work.

Fixes #829
This commit is contained in:
Justine Tunney 2023-06-08 06:44:54 -07:00
parent 32682f0ce7
commit 4d629fd424
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
12 changed files with 73 additions and 76 deletions

View file

@ -33,7 +33,6 @@ endif
ifeq ($(MODE), zero) ifeq ($(MODE), zero)
OVERRIDE_CFLAGS += -O0 OVERRIDE_CFLAGS += -O0
OVERRIDE_CXXFLAGS += -O0 OVERRIDE_CXXFLAGS += -O0
OVERRIDE_CCFLAGS = -fno-omit-frame-pointer
CONFIG_CPPFLAGS += -DSYSDEBUG CONFIG_CPPFLAGS += -DSYSDEBUG
endif endif
ifeq ($(MODE), aarch64-zero) ifeq ($(MODE), aarch64-zero)
@ -132,6 +131,7 @@ ifeq ($(MODE), asan)
CONFIG_CCFLAGS += $(BACKTRACES) -O2 -DSYSDEBUG CONFIG_CCFLAGS += $(BACKTRACES) -O2 -DSYSDEBUG
CONFIG_COPTS += -fsanitize=address CONFIG_COPTS += -fsanitize=address
TARGET_ARCH ?= -msse3 TARGET_ARCH ?= -msse3
QUOTA ?= -C64 -L300
endif endif
# Debug Mode # Debug Mode
@ -151,12 +151,14 @@ CONFIG_CCFLAGS += $(BACKTRACES) -DSYSDEBUG -O0 -fno-inline
CONFIG_COPTS += -fsanitize=address -fsanitize=undefined CONFIG_COPTS += -fsanitize=address -fsanitize=undefined
TARGET_ARCH ?= -msse3 TARGET_ARCH ?= -msse3
OVERRIDE_CCFLAGS += -fno-pie OVERRIDE_CCFLAGS += -fno-pie
QUOTA ?= -C64 -L300
endif endif
ifeq ($(MODE), aarch64-dbg) ifeq ($(MODE), aarch64-dbg)
ENABLE_FTRACE = 1 ENABLE_FTRACE = 1
CONFIG_CPPFLAGS += -DMODE_DBG CONFIG_CPPFLAGS += -DMODE_DBG
CONFIG_CCFLAGS += $(BACKTRACES) -DSYSDEBUG -O0 -fno-inline CONFIG_CCFLAGS += $(BACKTRACES) -DSYSDEBUG -O0 -fno-inline
CONFIG_COPTS += -fsanitize=undefined CONFIG_COPTS += -fsanitize=undefined
QUOTA ?= -C64 -L300
endif endif
# System Five Mode # System Five Mode
@ -200,7 +202,6 @@ CONFIG_CCFLAGS += \
-fno-align-labels \ -fno-align-labels \
-fno-align-loops \ -fno-align-loops \
-fschedule-insns2 \ -fschedule-insns2 \
-fomit-frame-pointer \
-momit-leaf-frame-pointer \ -momit-leaf-frame-pointer \
-foptimize-sibling-calls \ -foptimize-sibling-calls \
-DDWARFLESS -DDWARFLESS
@ -227,7 +228,6 @@ CONFIG_CCFLAGS += \
-fno-align-labels \ -fno-align-labels \
-fno-align-loops \ -fno-align-loops \
-fschedule-insns2 \ -fschedule-insns2 \
-fno-omit-frame-pointer \
-momit-leaf-frame-pointer \ -momit-leaf-frame-pointer \
-foptimize-sibling-calls \ -foptimize-sibling-calls \
-DDWARFLESS -DDWARFLESS

View file

@ -84,13 +84,6 @@ PWD := $(shell build/bootstrap/pwd.com)
IGNORE := $(shell $(ECHO) -2 ♥cosmo) IGNORE := $(shell $(ECHO) -2 ♥cosmo)
IGNORE := $(shell $(MKDIR) o/tmp) IGNORE := $(shell $(MKDIR) o/tmp)
ifeq ($(MODE), dbg)
# be generous about resources in debug mode
# let commands use 64 seconds cpu time max
# let commands use 300 seconds wall time max
QUOTA ?= -C64 -L300
endif
ifneq ($(findstring aarch64,$(MODE)),) ifneq ($(findstring aarch64,$(MODE)),)
ARCH = aarch64 ARCH = aarch64
VM = o/third_party/qemu/qemu-aarch64 VM = o/third_party/qemu/qemu-aarch64
@ -124,7 +117,11 @@ GCC = $(PREFIX)gcc
STRIP = $(PREFIX)strip STRIP = $(PREFIX)strip
OBJCOPY = $(PREFIX)objcopy OBJCOPY = $(PREFIX)objcopy
OBJDUMP = $(PREFIX)objdump OBJDUMP = $(PREFIX)objdump
ADDR2LINE = $(join $(PWD),$(PREFIX))addr2line ifneq ($(wildcard $(PWD)/$(PREFIX)addr2line), )
ADDR2LINE = $(PWD)/$(PREFIX)addr2line
else
ADDR2LINE = $(PREFIX)addr2line
endif
export ADDR2LINE export ADDR2LINE
export LC_ALL export LC_ALL
@ -146,7 +143,6 @@ IMAGE_BASE_VIRTUAL ?= 0x400000
endif endif
BACKTRACES = \ BACKTRACES = \
-fno-omit-frame-pointer \
-fno-optimize-sibling-calls \ -fno-optimize-sibling-calls \
-mno-omit-leaf-frame-pointer -mno-omit-leaf-frame-pointer
@ -174,6 +170,7 @@ TRADITIONAL = \
DEFAULT_CCFLAGS += \ DEFAULT_CCFLAGS += \
-Wall \ -Wall \
-Werror \ -Werror \
-fno-omit-frame-pointer \
-fdebug-prefix-map='$(PWD)'= \ -fdebug-prefix-map='$(PWD)'= \
-frecord-gcc-switches -frecord-gcc-switches

View file

@ -16,30 +16,14 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/str/str.h" #include "libc/str/str.h"
#ifndef __aarch64__ #ifndef __aarch64__
// TODO(jart): ASAN support here is important.
typedef char xmm_u __attribute__((__vector_size__(16), __aligned__(1))); typedef char xmm_u __attribute__((__vector_size__(16), __aligned__(1)));
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
#ifdef __x86_64__
static inline noasan size_t stpcpy_sse2(char *d, const char *s, size_t i) {
xmm_t v, z = {0};
for (;;) {
v = *(xmm_t *)(s + i);
if (!__builtin_ia32_pmovmskb128(v == z)) {
*(xmm_u *)(d + i) = v;
i += 16;
} else {
break;
}
}
return i;
}
#endif
/** /**
* Copies bytes from 𝑠 to 𝑑 until a NUL is encountered. * Copies bytes from 𝑠 to 𝑑 until a NUL is encountered.
* *
@ -49,15 +33,27 @@ static inline noasan size_t stpcpy_sse2(char *d, const char *s, size_t i) {
* @return pointer to nul byte * @return pointer to nul byte
* @asyncsignalsafe * @asyncsignalsafe
*/ */
char *stpcpy(char *d, const char *s) { noasan char *stpcpy(char *d, const char *s) {
size_t i = 0; size_t i = 0;
if (IsAsan()) {
__asan_verify(d, strlen(s) + 1);
}
#ifdef __x86_64__ #ifdef __x86_64__
for (; (uintptr_t)(s + i) & 15; ++i) { for (; (uintptr_t)(s + i) & 15; ++i) {
if (!(d[i] = s[i])) { if (!(d[i] = s[i])) {
return d + i; return d + i;
} }
} }
i = stpcpy_sse2(d, s, i); for (;;) {
xmm_t z = {0};
xmm_t v = *(xmm_t *)(s + i);
if (!__builtin_ia32_pmovmskb128(v == z)) {
*(xmm_u *)(d + i) = v;
i += 16;
} else {
break;
}
}
#endif #endif
for (;;) { for (;;) {
if (!(d[i] = s[i])) { if (!(d[i] = s[i])) {

View file

@ -29,11 +29,13 @@
* @return is <0, 0, or >0 based on uint8_t comparison * @return is <0, 0, or >0 based on uint8_t comparison
* @asyncsignalsafe * @asyncsignalsafe
*/ */
int strcmp(const char *a, const char *b) { noasan int strcmp(const char *a, const char *b) {
int c; int c;
size_t i = 0; size_t i = 0;
uint64_t v, w, d; uint64_t v, w, d;
if (a == b) return 0; if (a == b) return 0;
if (IsAsan()) __asan_verify_str(a);
if (IsAsan()) __asan_verify_str(b);
if ((c = (*a & 255) - (*b & 255))) return c; if ((c = (*a & 255) - (*b & 255))) return c;
if (!IsTiny() && ((uintptr_t)a & 7) == ((uintptr_t)b & 7)) { if (!IsTiny() && ((uintptr_t)a & 7) == ((uintptr_t)b & 7)) {
for (; (uintptr_t)(a + i) & 7; ++i) { for (; (uintptr_t)(a + i) & 7; ++i) {
@ -53,10 +55,6 @@ int strcmp(const char *a, const char *b) {
} else { } else {
while (a[i] == b[i] && b[i]) ++i; while (a[i] == b[i] && b[i]) ++i;
} }
if (IsAsan()) {
__asan_verify(a, i + 1);
__asan_verify(b, i + 1);
}
return (a[i] & 255) - (b[i] & 255); return (a[i] & 255) - (b[i] & 255);
} }

View file

@ -16,30 +16,15 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/intrin/kprintf.h"
#include "libc/str/str.h" #include "libc/str/str.h"
#ifndef __aarch64__ #ifndef __aarch64__
// TODO(jart): ASAN support here is important.
typedef char xmm_u __attribute__((__vector_size__(16), __aligned__(1))); typedef char xmm_u __attribute__((__vector_size__(16), __aligned__(1)));
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
#ifdef __x86_64__
static inline noasan size_t strcpy_sse2(char *d, const char *s, size_t i) {
xmm_t v, z = {0};
for (;;) {
v = *(xmm_t *)(s + i);
if (!__builtin_ia32_pmovmskb128(v == z)) {
*(xmm_u *)(d + i) = v;
i += 16;
} else {
break;
}
}
return i;
}
#endif
/** /**
* Copies bytes from 𝑠 to 𝑑 until a NUL is encountered. * Copies bytes from 𝑠 to 𝑑 until a NUL is encountered.
* *
@ -49,15 +34,27 @@ static inline noasan size_t strcpy_sse2(char *d, const char *s, size_t i) {
* @return original dest * @return original dest
* @asyncsignalsafe * @asyncsignalsafe
*/ */
char *strcpy(char *d, const char *s) { noasan char *strcpy(char *d, const char *s) {
size_t i = 0; size_t i = 0;
if (IsAsan()) {
__asan_verify(d, strlen(s) + 1);
}
#ifdef __x86_64__ #ifdef __x86_64__
for (; (uintptr_t)(s + i) & 15; ++i) { for (; (uintptr_t)(s + i) & 15; ++i) {
if (!(d[i] = s[i])) { if (!(d[i] = s[i])) {
return d; return d;
} }
} }
i = strcpy_sse2(d, s, i); for (;;) {
xmm_t z = {0};
xmm_t v = *(xmm_t *)(s + i);
if (!__builtin_ia32_pmovmskb128(v == z)) {
*(xmm_u *)(d + i) = v;
i += 16;
} else {
break;
}
}
#endif #endif
for (;;) { for (;;) {
if (!(d[i] = s[i])) { if (!(d[i] = s[i])) {

View file

@ -34,13 +34,11 @@ noasan int strcasecmp(const char *a, const char *b) {
size_t i = 0; size_t i = 0;
uint64_t v, w, d; uint64_t v, w, d;
if (a == b) return 0; if (a == b) return 0;
if (IsAsan()) __asan_verify_str(a);
if (IsAsan()) __asan_verify_str(b);
if (((uintptr_t)a & 7) == ((uintptr_t)b & 7)) { if (((uintptr_t)a & 7) == ((uintptr_t)b & 7)) {
for (; (uintptr_t)(a + i) & 7; ++i) { for (; (uintptr_t)(a + i) & 7; ++i) {
CheckEm: CheckEm:
if (IsAsan()) {
__asan_verify(a, i + 1);
__asan_verify(b, i + 1);
}
if ((x = kToLower[a[i] & 255]) != (y = kToLower[b[i] & 255]) || !y) { if ((x = kToLower[a[i] & 255]) != (y = kToLower[b[i] & 255]) || !y) {
return x - y; return x - y;
} }
@ -56,10 +54,6 @@ noasan int strcasecmp(const char *a, const char *b) {
} }
} else { } else {
while ((x = kToLower[a[i] & 255]) == (y = kToLower[b[i] & 255]) && y) ++i; while ((x = kToLower[a[i] & 255]) == (y = kToLower[b[i] & 255]) && y) ++i;
if (IsAsan()) {
__asan_verify(a, i + 1);
__asan_verify(b, i + 1);
}
return x - y; return x - y;
} }
} }

View file

@ -1,6 +1,8 @@
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐ #-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘ #───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
ifneq ($(MODE), dbg)
ifneq ($(MODE), asan)
ifeq ($(ARCH), x86_64) ifeq ($(ARCH), x86_64)
o/$(MODE)/test/libc/release/cosmopolitan.zip: private .UNSANDBOXED = 1 o/$(MODE)/test/libc/release/cosmopolitan.zip: private .UNSANDBOXED = 1
@ -198,6 +200,8 @@ o/$(MODE)/test/libc/release: \
o/$(MODE)/test/libc/release/smokeansi.com.runs o/$(MODE)/test/libc/release/smokeansi.com.runs
endif endif
endif
endif
.PHONY: o/$(MODE)/test/libc/release .PHONY: o/$(MODE)/test/libc/release
o/$(MODE)/test/libc/release: o/$(MODE)/test/libc/release:

View file

@ -10,6 +10,8 @@
# This makefile compiles and runs each test twice. The first with # This makefile compiles and runs each test twice. The first with
# GCC-built chibicc, and a second time with chibicc-built chibicc # GCC-built chibicc, and a second time with chibicc-built chibicc
ifneq ($(MODE), dbg)
ifneq ($(MODE), asan)
ifeq ($(ARCH), x86_64) ifeq ($(ARCH), x86_64)
CHIBICC = o/$(MODE)/third_party/chibicc/chibicc.com CHIBICC = o/$(MODE)/third_party/chibicc/chibicc.com
@ -113,6 +115,8 @@ THIRD_PARTY_CHIBICC_CHECKS = $(foreach x,$(THIRD_PARTY_CHIBICC_ARTIFACTS),$($(x)
THIRD_PARTY_CHIBICC_OBJS = $(foreach x,$(THIRD_PARTY_CHIBICC_ARTIFACTS),$($(x)_OBJS)) THIRD_PARTY_CHIBICC_OBJS = $(foreach x,$(THIRD_PARTY_CHIBICC_ARTIFACTS),$($(x)_OBJS))
$(THIRD_PARTY_CHIBICC_OBJS): $(BUILD_FILES) third_party/chibicc/chibicc.mk $(THIRD_PARTY_CHIBICC_OBJS): $(BUILD_FILES) third_party/chibicc/chibicc.mk
endif
endif
endif endif
.PHONY: o/$(MODE)/third_party/chibicc .PHONY: o/$(MODE)/third_party/chibicc

View file

@ -10,6 +10,8 @@
# This makefile compiles and runs each test twice. The first with # This makefile compiles and runs each test twice. The first with
# GCC-built chibicc, and a second time with chibicc-built chibicc # GCC-built chibicc, and a second time with chibicc-built chibicc
ifneq ($(MODE), dbg)
ifneq ($(MODE), asan)
ifeq ($(ARCH), x86_64) ifeq ($(ARCH), x86_64)
PKGS += THIRD_PARTY_CHIBICC_TEST PKGS += THIRD_PARTY_CHIBICC_TEST
@ -77,6 +79,8 @@ o/$(MODE)/third_party/chibicc/test/%.o: \
o/$(MODE)/third_party/chibicc/test/int128_test.o: private QUOTA = -M1024m o/$(MODE)/third_party/chibicc/test/int128_test.o: private QUOTA = -M1024m
endif
endif
endif endif
.PHONY: o/$(MODE)/third_party/chibicc/test .PHONY: o/$(MODE)/third_party/chibicc/test

View file

@ -185,6 +185,7 @@ o/$(MODE)/third_party/ggml/companionai.txt.zip.o: private \
-B -B
o/$(MODE)/third_party/ggml/ggml.o: private QUOTA = -C64 o/$(MODE)/third_party/ggml/ggml.o: private QUOTA = -C64
o/$(MODE)/third_party/ggml/llama.o: private QUOTA = -C64
################################################################################ ################################################################################

View file

@ -2660,13 +2660,14 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
rng_ss << ctx->rng; rng_ss << ctx->rng;
const size_t rng_size = rng_ss.str().size(); const size_t rng_size = rng_ss.str().size();
char rng_buf[LLAMA_MAX_RNG_STATE]; llama_buffer rng_buf;
rng_buf.resize(LLAMA_MAX_RNG_STATE);
memset(&rng_buf[0], 0, LLAMA_MAX_RNG_STATE); memset(&rng_buf.addr[0], 0, LLAMA_MAX_RNG_STATE);
memcpy(&rng_buf[0], rng_ss.str().data(), rng_ss.str().size()); memcpy(&rng_buf.addr[0], rng_ss.str().data(), rng_ss.str().size());
memcpy(out, &rng_size, sizeof(rng_size)); out += sizeof(rng_size); memcpy(out, &rng_size, sizeof(rng_size)); out += sizeof(rng_size);
memcpy(out, &rng_buf[0], LLAMA_MAX_RNG_STATE); out += LLAMA_MAX_RNG_STATE; memcpy(out, &rng_buf.addr[0], LLAMA_MAX_RNG_STATE); out += LLAMA_MAX_RNG_STATE;
} }
// copy logits // copy logits
@ -2759,13 +2760,14 @@ size_t llama_set_state_data(struct llama_context * ctx, const uint8_t * src) {
// set rng // set rng
{ {
size_t rng_size; size_t rng_size;
char rng_buf[LLAMA_MAX_RNG_STATE]; llama_buffer rng_buf;
rng_buf.resize(LLAMA_MAX_RNG_STATE);
memcpy(&rng_size, in, sizeof(rng_size)); in += sizeof(rng_size); memcpy(&rng_size, in, sizeof(rng_size)); in += sizeof(rng_size);
memcpy(&rng_buf[0], in, LLAMA_MAX_RNG_STATE); in += LLAMA_MAX_RNG_STATE; memcpy(&rng_buf.addr[0], in, LLAMA_MAX_RNG_STATE); in += LLAMA_MAX_RNG_STATE;
std::stringstream rng_ss; std::stringstream rng_ss;
rng_ss.str(std::string(&rng_buf[0], rng_size)); rng_ss.str(std::string((char *)&rng_buf.addr[0], rng_size));
rng_ss >> ctx->rng; rng_ss >> ctx->rng;
LLAMA_ASSERT(rng_ss.fail() == false); LLAMA_ASSERT(rng_ss.fail() == false);

View file

@ -190,11 +190,11 @@ o/$(MODE)/third_party/sqlite3/parse.o: private \
CFLAGS += \ CFLAGS += \
-fpie -fpie
o/$(MODE)/third_party/sqlite3/shell.o: private QUOTA = -M512m -C16 -L180 o/$(MODE)/third_party/sqlite3/shell.o: private QUOTA = -M512m -C32 -L180
o/$(MODE)/third_party/sqlite3/vdbe.o: private QUOTA = -M1024m o/$(MODE)/third_party/sqlite3/vdbe.o: private QUOTA = -M1024m
o/$(MODE)/third_party/sqlite3/vdbe.shell.o: private QUOTA = -M1024m o/$(MODE)/third_party/sqlite3/vdbe.shell.o: private QUOTA = -M1024m
o/$(MODE)/third_party/sqlite3/fts5.o: private QUOTA = -M512m -C16 o/$(MODE)/third_party/sqlite3/fts5.o: private QUOTA = -M512m -C32
o/$(MODE)/third_party/sqlite3/fts5.shell.o: private QUOTA = -M512m -C16 -L180 o/$(MODE)/third_party/sqlite3/fts5.shell.o: private QUOTA = -M512m -C32 -L180
o/$(MODE)/third_party/sqlite3/rtree.o: \ o/$(MODE)/third_party/sqlite3/rtree.o: \
third_party/sqlite3/rtree.c \ third_party/sqlite3/rtree.c \