kbuild: add support for Clang LTO

This change adds build system support for Clang's Link Time
Optimization (LTO). With -flto, instead of ELF object files, Clang
produces LLVM bitcode, which is compiled into native code at link
time, allowing the final binary to be optimized globally. For more
details, see:

  https://llvm.org/docs/LinkTimeOptimization.html

The Kconfig option CONFIG_LTO_CLANG is implemented as a choice,
which defaults to LTO being disabled. To use LTO, the architecture
must select ARCH_SUPPORTS_LTO_CLANG and support:

  - compiling with Clang,
  - compiling all assembly code with Clang's integrated assembler,
  - and linking with LLD.

While using CONFIG_LTO_CLANG_FULL results in the best runtime
performance, the compilation is not scalable in time or
memory. CONFIG_LTO_CLANG_THIN enables ThinLTO, which allows
parallel optimization and faster incremental builds. ThinLTO is
used by default if the architecture also selects
ARCH_SUPPORTS_LTO_CLANG_THIN:

  https://clang.llvm.org/docs/ThinLTO.html

To enable LTO, LLVM tools must be used to handle bitcode files, by
passing LLVM=1 and LLVM_IAS=1 options to make:

  $ make LLVM=1 LLVM_IAS=1 defconfig
  $ scripts/config -e LTO_CLANG_THIN
  $ make LLVM=1 LLVM_IAS=1

To prepare for LTO support with other compilers, common parts are
gated behind the CONFIG_LTO option, and LTO can be disabled for
specific files by filtering out CC_FLAGS_LTO.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20201211184633.3213045-3-samitolvanen@google.com
This commit is contained in:
Sami Tolvanen 2020-12-11 10:46:19 -08:00 committed by Kees Cook
parent 3b15cdc159
commit dc5723b02e
7 changed files with 174 additions and 18 deletions

View File

@ -893,6 +893,21 @@ KBUILD_CFLAGS += $(CC_FLAGS_SCS)
export CC_FLAGS_SCS
endif
ifdef CONFIG_LTO_CLANG
ifdef CONFIG_LTO_CLANG_THIN
CC_FLAGS_LTO += -flto=thin -fsplit-lto-unit
KBUILD_LDFLAGS += --thinlto-cache-dir=$(extmod-prefix).thinlto-cache
else
CC_FLAGS_LTO += -flto
endif
CC_FLAGS_LTO += -fvisibility=hidden
endif
ifdef CONFIG_LTO
KBUILD_CFLAGS += $(CC_FLAGS_LTO)
export CC_FLAGS_LTO
endif
ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_32B
KBUILD_CFLAGS += -falign-functions=32
endif
@ -1479,7 +1494,7 @@ MRPROPER_FILES += include/config include/generated \
*.spec
# Directories & files removed with 'make distclean'
DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS
DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS .thinlto-cache
# clean - Delete most, but leave enough to build external modules
#
@ -1725,7 +1740,7 @@ PHONY += compile_commands.json
clean-dirs := $(KBUILD_EXTMOD)
clean: rm-files := $(KBUILD_EXTMOD)/Module.symvers $(KBUILD_EXTMOD)/modules.nsdeps \
$(KBUILD_EXTMOD)/compile_commands.json
$(KBUILD_EXTMOD)/compile_commands.json $(KBUILD_EXTMOD)/.thinlto-cache
PHONY += help
help:

View File

@ -631,6 +631,97 @@ config SHADOW_CALL_STACK
reading and writing arbitrary memory may be able to locate them
and hijack control flow by modifying the stacks.
config LTO
bool
help
Selected if the kernel will be built using the compiler's LTO feature.
config LTO_CLANG
bool
select LTO
help
Selected if the kernel will be built using Clang's LTO feature.
config ARCH_SUPPORTS_LTO_CLANG
bool
help
An architecture should select this option if it supports:
- compiling with Clang,
- compiling inline assembly with Clang's integrated assembler,
- and linking with LLD.
config ARCH_SUPPORTS_LTO_CLANG_THIN
bool
help
An architecture should select this option if it can support Clang's
ThinLTO mode.
config HAS_LTO_CLANG
def_bool y
# Clang >= 11: https://github.com/ClangBuiltLinux/linux/issues/510
depends on CC_IS_CLANG && CLANG_VERSION >= 110000 && LD_IS_LLD
depends on $(success,test $(LLVM) -eq 1)
depends on $(success,test $(LLVM_IAS) -eq 1)
depends on $(success,$(NM) --help | head -n 1 | grep -qi llvm)
depends on $(success,$(AR) --help | head -n 1 | grep -qi llvm)
depends on ARCH_SUPPORTS_LTO_CLANG
depends on !FTRACE_MCOUNT_USE_RECORDMCOUNT
depends on !KASAN
depends on !GCOV_KERNEL
depends on !MODVERSIONS
help
The compiler and Kconfig options support building with Clang's
LTO.
choice
prompt "Link Time Optimization (LTO)"
default LTO_NONE
help
This option enables Link Time Optimization (LTO), which allows the
compiler to optimize binaries globally.
If unsure, select LTO_NONE. Note that LTO is very resource-intensive
so it's disabled by default.
config LTO_NONE
bool "None"
help
Build the kernel normally, without Link Time Optimization (LTO).
config LTO_CLANG_FULL
bool "Clang Full LTO (EXPERIMENTAL)"
depends on HAS_LTO_CLANG
depends on !COMPILE_TEST
select LTO_CLANG
help
This option enables Clang's full Link Time Optimization (LTO), which
allows the compiler to optimize the kernel globally. If you enable
this option, the compiler generates LLVM bitcode instead of ELF
object files, and the actual compilation from bitcode happens at
the LTO link step, which may take several minutes depending on the
kernel configuration. More information can be found from LLVM's
documentation:
https://llvm.org/docs/LinkTimeOptimization.html
During link time, this option can use a large amount of RAM, and
may take much longer than the ThinLTO option.
config LTO_CLANG_THIN
bool "Clang ThinLTO (EXPERIMENTAL)"
depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN
select LTO_CLANG
help
This option enables Clang's ThinLTO, which allows for parallel
optimization and faster incremental compiles compared to the
CONFIG_LTO_CLANG_FULL option. More information can be found
from Clang's documentation:
https://clang.llvm.org/docs/ThinLTO.html
If unsure, say Y.
endchoice
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool
help

View File

@ -90,15 +90,18 @@
* .data. We don't want to pull in .data..other sections, which Linux
* has defined. Same for text and bss.
*
* With LTO_CLANG, the linker also splits sections by default, so we need
* these macros to combine the sections during the final link.
*
* RODATA_MAIN is not used because existing code already defines .rodata.x
* sections to be brought in with rodata.
*/
#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
#define TEXT_MAIN .text .text.[0-9a-zA-Z_]*
#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX*
#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral*
#define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]*
#define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]*
#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]*
#define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L*
#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..compoundliteral*
#define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]*
#else
#define TEXT_MAIN .text

View File

@ -111,7 +111,7 @@ endif
# ---------------------------------------------------------------------------
quiet_cmd_cc_s_c = CC $(quiet_modtag) $@
cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS), $(c_flags)) -fverbose-asm -S -o $@ $<
cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS) $(CC_FLAGS_LTO), $(c_flags)) -fverbose-asm -S -o $@ $<
$(obj)/%.s: $(src)/%.c FORCE
$(call if_changed_dep,cc_s_c)
@ -421,8 +421,15 @@ $(obj)/lib.a: $(lib-y) FORCE
# Do not replace $(filter %.o,^) with $(real-prereqs). When a single object
# module is turned into a multi object module, $^ will contain header file
# dependencies recorded in the .*.cmd file.
ifdef CONFIG_LTO_CLANG
quiet_cmd_link_multi-m = AR [M] $@
cmd_link_multi-m = \
rm -f $@; \
$(AR) cDPrsT $@ $(filter %.o,$^)
else
quiet_cmd_link_multi-m = LD [M] $@
cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(filter %.o,$^)
endif
$(multi-used-m): FORCE
$(call if_changed,link_multi-m)

View File

@ -30,6 +30,12 @@ quiet_cmd_cc_o_c = CC [M] $@
ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
ifdef CONFIG_LTO_CLANG
# With CONFIG_LTO_CLANG, reuse the object file we compiled for modpost to
# avoid a second slow LTO link
prelink-ext := .lto
endif
quiet_cmd_ld_ko_o = LD [M] $@
cmd_ld_ko_o = \
$(LD) -r $(KBUILD_LDFLAGS) \
@ -53,8 +59,9 @@ if_changed_except = $(if $(call newer_prereqs_except,$(2))$(cmd-check), \
$(cmd); \
printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd, @:)
# Re-generate module BTFs if either module's .ko or vmlinux changed
$(modules): %.ko: %.o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE
$(modules): %.ko: %$(prelink-ext).o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE
+$(call if_changed_except,ld_ko_o,vmlinux)
ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+$(if $(newer-prereqs),$(call cmd,btf_ko))

View File

@ -43,6 +43,9 @@ __modpost:
include include/config/auto.conf
include scripts/Kbuild.include
# for ld_flags
include scripts/Makefile.lib
MODPOST = scripts/mod/modpost \
$(if $(CONFIG_MODVERSIONS),-m) \
$(if $(CONFIG_MODULE_SRCVERSION_ALL),-a) \
@ -102,12 +105,26 @@ $(input-symdump):
@echo >&2 'WARNING: Symbol version dump "$@" is missing.'
@echo >&2 ' Modules may not have dependencies or modversions.'
ifdef CONFIG_LTO_CLANG
# With CONFIG_LTO_CLANG, .o files might be LLVM bitcode, so we need to run
# LTO to compile them into native code before running modpost
prelink-ext := .lto
quiet_cmd_cc_lto_link_modules = LTO [M] $@
cmd_cc_lto_link_modules = $(LD) $(ld_flags) -r -o $@ --whole-archive $^
%.lto.o: %.o
$(call if_changed,cc_lto_link_modules)
endif
modules := $(sort $(shell cat $(MODORDER)))
# Read out modules.order to pass in modpost.
# Otherwise, allmodconfig would fail with "Argument list too long".
quiet_cmd_modpost = MODPOST $@
cmd_modpost = sed 's/ko$$/o/' $< | $(MODPOST) -T -
cmd_modpost = sed 's/\.ko$$/$(prelink-ext)\.o/' $< | $(MODPOST) -T -
$(output-symdump): $(MODORDER) $(input-symdump) FORCE
$(output-symdump): $(MODORDER) $(input-symdump) $(modules:.ko=$(prelink-ext).o) FORCE
$(call if_changed,modpost)
targets += $(output-symdump)

View File

@ -56,6 +56,14 @@ modpost_link()
${KBUILD_VMLINUX_LIBS} \
--end-group"
if [ -n "${CONFIG_LTO_CLANG}" ]; then
# This might take a while, so indicate that we're doing
# an LTO link
info LTO ${1}
else
info LD ${1}
fi
${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${objects}
}
@ -103,13 +111,22 @@ vmlinux_link()
fi
if [ "${SRCARCH}" != "um" ]; then
objects="--whole-archive \
${KBUILD_VMLINUX_OBJS} \
--no-whole-archive \
--start-group \
${KBUILD_VMLINUX_LIBS} \
--end-group \
${@}"
if [ -n "${CONFIG_LTO_CLANG}" ]; then
# Use vmlinux.o instead of performing the slow LTO
# link again.
objects="--whole-archive \
vmlinux.o \
--no-whole-archive \
${@}"
else
objects="--whole-archive \
${KBUILD_VMLINUX_OBJS} \
--no-whole-archive \
--start-group \
${KBUILD_VMLINUX_LIBS} \
--end-group \
${@}"
fi
${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} \
${strip_debug#-Wl,} \
@ -274,7 +291,6 @@ fi;
${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init need-builtin=1
#link vmlinux.o
info LD vmlinux.o
modpost_link vmlinux.o
objtool_link vmlinux.o