mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-26 12:26:11 +00:00
Merge patch series "Introduce 64b relocatable kernel"
Alexandre Ghiti <alexghiti@rivosinc.com> says: After multiple attempts, this patchset is now based on the fact that the 64b kernel mapping was moved outside the linear mapping. The first patch allows to build relocatable kernels but is not selected by default. That patch is a requirement for KASLR. The second and third patches take advantage of an already existing powerpc script that checks relocations at compile-time, and uses it for riscv. * b4-shazam-merge: riscv: Use --emit-relocs in order to move .rela.dyn in init riscv: Check relocations at compile time powerpc: Move script to check relocations at compile time in scripts/ riscv: Introduce CONFIG_RELOCATABLE riscv: Move .rela.dyn outside of init to avoid empty relocations riscv: Prepare EFI header for relocatable kernels Link: https://lore.kernel.org/r/20230329045329.64565-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
commit
310c33dc7a
13 changed files with 221 additions and 31 deletions
|
@ -15,21 +15,8 @@ if [ $# -lt 3 ]; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
# Have Kbuild supply the path to objdump and nm so we handle cross compilation.
|
||||
objdump="$1"
|
||||
nm="$2"
|
||||
vmlinux="$3"
|
||||
|
||||
# Remove from the bad relocations those that match an undefined weak symbol
|
||||
# which will result in an absolute relocation to 0.
|
||||
# Weak unresolved symbols are of that form in nm output:
|
||||
# " w _binary__btf_vmlinux_bin_end"
|
||||
undef_weak_symbols=$($nm "$vmlinux" | awk '$1 ~ /w/ { print $2 }')
|
||||
|
||||
bad_relocs=$(
|
||||
$objdump -R "$vmlinux" |
|
||||
# Only look at relocation lines.
|
||||
grep -E '\<R_' |
|
||||
${srctree}/scripts/relocs_check.sh "$@" |
|
||||
# These relocations are okay
|
||||
# On PPC64:
|
||||
# R_PPC64_RELATIVE, R_PPC64_NONE
|
||||
|
@ -44,8 +31,7 @@ R_PPC_ADDR16_LO
|
|||
R_PPC_ADDR16_HI
|
||||
R_PPC_ADDR16_HA
|
||||
R_PPC_RELATIVE
|
||||
R_PPC_NONE' |
|
||||
([ "$undef_weak_symbols" ] && grep -F -w -v "$undef_weak_symbols" || cat)
|
||||
R_PPC_NONE'
|
||||
)
|
||||
|
||||
if [ -z "$bad_relocs" ]; then
|
||||
|
|
|
@ -603,6 +603,20 @@ config COMPAT
|
|||
|
||||
If you want to execute 32-bit userspace applications, say Y.
|
||||
|
||||
config RELOCATABLE
|
||||
bool "Build a relocatable kernel"
|
||||
depends on MMU && 64BIT && !XIP_KERNEL
|
||||
help
|
||||
This builds a kernel as a Position Independent Executable (PIE),
|
||||
which retains all relocation metadata required to relocate the
|
||||
kernel binary at runtime to a different virtual address than the
|
||||
address it was linked at.
|
||||
Since RISCV uses the RELA relocation format, this requires a
|
||||
relocation pass at runtime even if the kernel is loaded at the
|
||||
same address it was linked at.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
endmenu # "Kernel features"
|
||||
|
||||
menu "Boot options"
|
||||
|
|
|
@ -7,9 +7,12 @@
|
|||
#
|
||||
|
||||
OBJCOPYFLAGS := -O binary
|
||||
LDFLAGS_vmlinux :=
|
||||
ifeq ($(CONFIG_RELOCATABLE),y)
|
||||
LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro --emit-relocs
|
||||
KBUILD_CFLAGS += -fPIE
|
||||
endif
|
||||
ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
|
||||
LDFLAGS_vmlinux := --no-relax
|
||||
LDFLAGS_vmlinux += --no-relax
|
||||
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
|
||||
ifeq ($(CONFIG_RISCV_ISA_C),y)
|
||||
CC_FLAGS_FTRACE := -fpatchable-function-entry=4
|
||||
|
|
49
arch/riscv/Makefile.postlink
Normal file
49
arch/riscv/Makefile.postlink
Normal file
|
@ -0,0 +1,49 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
# ===========================================================================
|
||||
# Post-link riscv pass
|
||||
# ===========================================================================
|
||||
#
|
||||
# Check that vmlinux relocations look sane
|
||||
|
||||
PHONY := __archpost
|
||||
__archpost:
|
||||
|
||||
-include include/config/auto.conf
|
||||
include $(srctree)/scripts/Kbuild.include
|
||||
|
||||
quiet_cmd_relocs_check = CHKREL $@
|
||||
cmd_relocs_check = \
|
||||
$(CONFIG_SHELL) $(srctree)/arch/riscv/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@"
|
||||
|
||||
ifdef CONFIG_RELOCATABLE
|
||||
quiet_cmd_cp_vmlinux_relocs = CPREL vmlinux.relocs
|
||||
cmd_cp_vmlinux_relocs = cp vmlinux vmlinux.relocs
|
||||
|
||||
quiet_cmd_relocs_strip = STRIPREL $@
|
||||
cmd_relocs_strip = $(OBJCOPY) --remove-section='.rel.*' \
|
||||
--remove-section='.rel__*' \
|
||||
--remove-section='.rela.*' \
|
||||
--remove-section='.rela__*' $@
|
||||
endif
|
||||
|
||||
# `@true` prevents complaint when there is nothing to be done
|
||||
|
||||
vmlinux: FORCE
|
||||
@true
|
||||
ifdef CONFIG_RELOCATABLE
|
||||
$(call if_changed,relocs_check)
|
||||
$(call if_changed,cp_vmlinux_relocs)
|
||||
$(call if_changed,relocs_strip)
|
||||
endif
|
||||
|
||||
%.ko: FORCE
|
||||
@true
|
||||
|
||||
clean:
|
||||
@true
|
||||
|
||||
PHONY += FORCE clean
|
||||
|
||||
FORCE:
|
||||
|
||||
.PHONY: $(PHONY)
|
|
@ -33,7 +33,14 @@ $(obj)/xipImage: vmlinux FORCE
|
|||
|
||||
endif
|
||||
|
||||
ifdef CONFIG_RELOCATABLE
|
||||
vmlinux.relocs: vmlinux
|
||||
@ (! [ -f vmlinux.relocs ] && echo "vmlinux.relocs can't be found, please remove vmlinux and try again") || true
|
||||
|
||||
$(obj)/Image: vmlinux.relocs FORCE
|
||||
else
|
||||
$(obj)/Image: vmlinux FORCE
|
||||
endif
|
||||
$(call if_changed,objcopy)
|
||||
|
||||
$(obj)/Image.gz: $(obj)/Image FORCE
|
||||
|
|
|
@ -56,4 +56,7 @@ bool kernel_page_present(struct page *page);
|
|||
#define SECTION_ALIGN L1_CACHE_BYTES
|
||||
#endif /* CONFIG_STRICT_KERNEL_RWX */
|
||||
|
||||
#define PECOFF_SECTION_ALIGNMENT 0x1000
|
||||
#define PECOFF_FILE_ALIGNMENT 0x200
|
||||
|
||||
#endif /* _ASM_RISCV_SET_MEMORY_H */
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
#include <linux/arch_topology.h>
|
||||
|
||||
/* Replace task scheduler's default frequency-invariant accounting */
|
||||
#define arch_scale_freq_tick topology_scale_freq_tick
|
||||
#define arch_set_freq_scale topology_set_freq_scale
|
||||
#define arch_scale_freq_tick topology_scale_freq_tick
|
||||
#define arch_set_freq_scale topology_set_freq_scale
|
||||
#define arch_scale_freq_capacity topology_get_freq_scale
|
||||
#define arch_scale_freq_invariant topology_scale_freq_invariant
|
||||
|
||||
|
@ -17,4 +17,5 @@
|
|||
#define arch_update_cpu_topology topology_update_cpu_topology
|
||||
|
||||
#include <asm-generic/topology.h>
|
||||
|
||||
#endif /* _ASM_RISCV_TOPOLOGY_H */
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <linux/pe.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <asm/set_memory.h>
|
||||
|
||||
.macro __EFI_PE_HEADER
|
||||
.long PE_MAGIC
|
||||
|
@ -33,7 +34,11 @@ optional_header:
|
|||
.byte 0x02 // MajorLinkerVersion
|
||||
.byte 0x14 // MinorLinkerVersion
|
||||
.long __pecoff_text_end - efi_header_end // SizeOfCode
|
||||
.long __pecoff_data_virt_size // SizeOfInitializedData
|
||||
#ifdef __clang__
|
||||
.long __pecoff_data_virt_size // SizeOfInitializedData
|
||||
#else
|
||||
.long __pecoff_data_virt_end - __pecoff_text_end // SizeOfInitializedData
|
||||
#endif
|
||||
.long 0 // SizeOfUninitializedData
|
||||
.long __efistub_efi_pe_entry - _start // AddressOfEntryPoint
|
||||
.long efi_header_end - _start // BaseOfCode
|
||||
|
@ -91,9 +96,17 @@ section_table:
|
|||
IMAGE_SCN_MEM_EXECUTE // Characteristics
|
||||
|
||||
.ascii ".data\0\0\0"
|
||||
.long __pecoff_data_virt_size // VirtualSize
|
||||
#ifdef __clang__
|
||||
.long __pecoff_data_virt_size // VirtualSize
|
||||
#else
|
||||
.long __pecoff_data_virt_end - __pecoff_text_end // VirtualSize
|
||||
#endif
|
||||
.long __pecoff_text_end - _start // VirtualAddress
|
||||
.long __pecoff_data_raw_size // SizeOfRawData
|
||||
#ifdef __clang__
|
||||
.long __pecoff_data_raw_size // SizeOfRawData
|
||||
#else
|
||||
.long __pecoff_data_raw_end - __pecoff_text_end // SizeOfRawData
|
||||
#endif
|
||||
.long __pecoff_text_end - _start // PointerToRawData
|
||||
|
||||
.long 0 // PointerToRelocations
|
||||
|
|
|
@ -27,9 +27,6 @@ ENTRY(_start)
|
|||
|
||||
jiffies = jiffies_64;
|
||||
|
||||
PECOFF_SECTION_ALIGNMENT = 0x1000;
|
||||
PECOFF_FILE_ALIGNMENT = 0x200;
|
||||
|
||||
SECTIONS
|
||||
{
|
||||
/* Beginning of code and text segment */
|
||||
|
@ -99,10 +96,6 @@ SECTIONS
|
|||
*(.rel.dyn*)
|
||||
}
|
||||
|
||||
.rela.dyn : {
|
||||
*(.rela*)
|
||||
}
|
||||
|
||||
__init_data_end = .;
|
||||
|
||||
. = ALIGN(8);
|
||||
|
@ -129,9 +122,27 @@ SECTIONS
|
|||
*(.sdata*)
|
||||
}
|
||||
|
||||
.rela.dyn : ALIGN(8) {
|
||||
__rela_dyn_start = .;
|
||||
*(.rela .rela*)
|
||||
__rela_dyn_end = .;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RELOCATABLE
|
||||
.data.rel : { *(.data.rel*) }
|
||||
.got : { *(.got*) }
|
||||
.plt : { *(.plt) }
|
||||
.dynamic : { *(.dynamic) }
|
||||
.dynsym : { *(.dynsym) }
|
||||
.dynstr : { *(.dynstr) }
|
||||
.hash : { *(.hash) }
|
||||
.gnu.hash : { *(.gnu.hash) }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_EFI
|
||||
.pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); }
|
||||
__pecoff_data_raw_size = ABSOLUTE(. - __pecoff_text_end);
|
||||
__pecoff_data_raw_end = ABSOLUTE(.);
|
||||
#endif
|
||||
|
||||
/* End of data section */
|
||||
|
@ -142,6 +153,7 @@ SECTIONS
|
|||
#ifdef CONFIG_EFI
|
||||
. = ALIGN(PECOFF_SECTION_ALIGNMENT);
|
||||
__pecoff_data_virt_size = ABSOLUTE(. - __pecoff_text_end);
|
||||
__pecoff_data_virt_end = ABSOLUTE(.);
|
||||
#endif
|
||||
_end = .;
|
||||
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
CFLAGS_init.o := -mcmodel=medany
|
||||
ifdef CONFIG_RELOCATABLE
|
||||
CFLAGS_init.o += -fno-pie
|
||||
endif
|
||||
|
||||
ifdef CONFIG_FTRACE
|
||||
CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE)
|
||||
CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE)
|
||||
|
|
|
@ -20,6 +20,9 @@
|
|||
#include <linux/dma-map-ops.h>
|
||||
#include <linux/crash_dump.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#ifdef CONFIG_RELOCATABLE
|
||||
#include <linux/elf.h>
|
||||
#endif
|
||||
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
@ -146,7 +149,7 @@ static void __init print_vm_layout(void)
|
|||
print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END);
|
||||
#endif
|
||||
|
||||
print_ml("kernel", (unsigned long)KERNEL_LINK_ADDR,
|
||||
print_ml("kernel", (unsigned long)kernel_map.virt_addr,
|
||||
(unsigned long)ADDRESS_SPACE_END);
|
||||
}
|
||||
}
|
||||
|
@ -831,6 +834,44 @@ static __init void set_satp_mode(void)
|
|||
#error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RELOCATABLE
|
||||
extern unsigned long __rela_dyn_start, __rela_dyn_end;
|
||||
|
||||
static void __init relocate_kernel(void)
|
||||
{
|
||||
Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start;
|
||||
/*
|
||||
* This holds the offset between the linked virtual address and the
|
||||
* relocated virtual address.
|
||||
*/
|
||||
uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR;
|
||||
/*
|
||||
* This holds the offset between kernel linked virtual address and
|
||||
* physical address.
|
||||
*/
|
||||
uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr;
|
||||
|
||||
for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) {
|
||||
Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset);
|
||||
Elf64_Addr relocated_addr = rela->r_addend;
|
||||
|
||||
if (rela->r_info != R_RISCV_RELATIVE)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Make sure to not relocate vdso symbols like rt_sigreturn
|
||||
* which are linked from the address 0 in vmlinux since
|
||||
* vdso symbol addresses are actually used as an offset from
|
||||
* mm->context.vdso in VDSO_OFFSET macro.
|
||||
*/
|
||||
if (relocated_addr >= KERNEL_LINK_ADDR)
|
||||
relocated_addr += reloc_offset;
|
||||
|
||||
*(Elf64_Addr *)addr = relocated_addr;
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_RELOCATABLE */
|
||||
|
||||
#ifdef CONFIG_XIP_KERNEL
|
||||
static void __init create_kernel_page_table(pgd_t *pgdir,
|
||||
__always_unused bool early)
|
||||
|
@ -1029,6 +1070,17 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
|
|||
BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RELOCATABLE
|
||||
/*
|
||||
* Early page table uses only one PUD, which makes it possible
|
||||
* to map PUD_SIZE aligned on PUD_SIZE: if the relocation offset
|
||||
* makes the kernel cross over a PUD_SIZE boundary, raise a bug
|
||||
* since a part of the kernel would not get mapped.
|
||||
*/
|
||||
BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size);
|
||||
relocate_kernel();
|
||||
#endif
|
||||
|
||||
apply_early_boot_alternatives();
|
||||
pt_ops_set_early();
|
||||
|
||||
|
|
26
arch/riscv/tools/relocs_check.sh
Executable file
26
arch/riscv/tools/relocs_check.sh
Executable file
|
@ -0,0 +1,26 @@
|
|||
#!/bin/sh
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
# Based on powerpc relocs_check.sh
|
||||
|
||||
# This script checks the relocations of a vmlinux for "suspicious"
|
||||
# relocations.
|
||||
|
||||
if [ $# -lt 3 ]; then
|
||||
echo "$0 [path to objdump] [path to nm] [path to vmlinux]" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
bad_relocs=$(
|
||||
${srctree}/scripts/relocs_check.sh "$@" |
|
||||
# These relocations are okay
|
||||
# R_RISCV_RELATIVE
|
||||
grep -F -w -v 'R_RISCV_RELATIVE'
|
||||
)
|
||||
|
||||
if [ -z "$bad_relocs" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
num_bad=$(echo "$bad_relocs" | wc -l)
|
||||
echo "WARNING: $num_bad bad relocations"
|
||||
echo "$bad_relocs"
|
20
scripts/relocs_check.sh
Executable file
20
scripts/relocs_check.sh
Executable file
|
@ -0,0 +1,20 @@
|
|||
#!/bin/sh
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
# Get a list of all the relocations, remove from it the relocations
|
||||
# that are known to be legitimate and return this list to arch specific
|
||||
# script that will look for suspicious relocations.
|
||||
|
||||
objdump="$1"
|
||||
nm="$2"
|
||||
vmlinux="$3"
|
||||
|
||||
# Remove from the possible bad relocations those that match an undefined
|
||||
# weak symbol which will result in an absolute relocation to 0.
|
||||
# Weak unresolved symbols are of that form in nm output:
|
||||
# " w _binary__btf_vmlinux_bin_end"
|
||||
undef_weak_symbols=$($nm "$vmlinux" | awk '$1 ~ /w/ { print $2 }')
|
||||
|
||||
$objdump -R "$vmlinux" |
|
||||
grep -E '\<R_' |
|
||||
([ "$undef_weak_symbols" ] && grep -F -w -v "$undef_weak_symbols" || cat)
|
Loading…
Reference in a new issue