diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 7888c9803eb0..1dba9c0d63d5 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2005,6 +2005,17 @@ config XIP_PHYS_ADDR be linked for and stored to. This address is dependent on your own flash usage. +config XIP_DEFLATED_DATA + bool "Store kernel .data section compressed in ROM" + depends on XIP_KERNEL + select ZLIB_INFLATE + help + Before the kernel is actually executed, its .data section has to be + copied to RAM from ROM. This option allows for storing that data + in compressed form and decompressed to RAM rather than merely being + copied, saving some precious ROM space. A possible drawback is a + slightly longer boot delay. + config KEXEC bool "Kexec system call (EXPERIMENTAL)" depends on (!SMP || PM_SLEEP_SMP) diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile index 50f8d1be7fcb..a3af4dc08c3e 100644 --- a/arch/arm/boot/Makefile +++ b/arch/arm/boot/Makefile @@ -31,8 +31,19 @@ targets := Image zImage xipImage bootpImage uImage ifeq ($(CONFIG_XIP_KERNEL),y) +cmd_deflate_xip_data = $(CONFIG_SHELL) -c \ + '$(srctree)/$(src)/deflate_xip_data.sh $< $@ || { rm -f $@; false; }' + +ifeq ($(CONFIG_XIP_DEFLATED_DATA),y) +quiet_cmd_mkxip = XIPZ $@ +cmd_mkxip = $(cmd_objcopy) && $(cmd_deflate_xip_data) +else +quiet_cmd_mkxip = $(quiet_cmd_objcopy) +cmd_mkxip = $(cmd_objcopy) +endif + $(obj)/xipImage: vmlinux FORCE - $(call if_changed,objcopy) + $(call if_changed,mkxip) @$(kecho) ' Physical Address of xipImage: $(CONFIG_XIP_PHYS_ADDR)' $(obj)/Image $(obj)/zImage: FORCE diff --git a/arch/arm/boot/deflate_xip_data.sh b/arch/arm/boot/deflate_xip_data.sh new file mode 100755 index 000000000000..1189598a25eb --- /dev/null +++ b/arch/arm/boot/deflate_xip_data.sh @@ -0,0 +1,64 @@ +#!/bin/sh + +# XIP kernel .data segment compressor +# +# Created by: Nicolas Pitre, August 2017 +# Copyright: (C) 2017 Linaro Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +# This script locates the start of the .data section in xipImage and +# substitutes it with a compressed version. The needed offsets are obtained +# from symbol addresses in vmlinux. It is expected that .data extends to +# the end of xipImage. + +set -e + +VMLINUX="$1" +XIPIMAGE="$2" + +DD="dd status=none" + +# Use "make V=1" to debug this script. +case "$KBUILD_VERBOSE" in +*1*) + set -x + ;; +esac + +sym_val() { + # extract hex value for symbol in $1 + local val=$($NM "$VMLINUX" | sed -n "/ $1$/{s/ .*$//p;q}") + [ "$val" ] || { echo "can't find $1 in $VMLINUX" 1>&2; exit 1; } + # convert from hex to decimal + echo $((0x$val)) +} + +__data_loc=$(sym_val __data_loc) +_edata_loc=$(sym_val _edata_loc) +base_offset=$(sym_val _xiprom) + +# convert to file based offsets +data_start=$(($__data_loc - $base_offset)) +data_end=$(($_edata_loc - $base_offset)) + +# Make sure data occupies the last part of the file. +file_end=$(stat -c "%s" "$XIPIMAGE") +if [ "$file_end" != "$data_end" ]; then + printf "end of xipImage doesn't match with _edata_loc (%#x vs %#x)\n" \ + $(($file_end + $base_offset)) $_edata_loc 2>&1 + exit 1; +fi + +# be ready to clean up +trap 'rm -f "$XIPIMAGE.tmp"' 0 1 2 3 + +# substitute the data section by a compressed version +$DD if="$XIPIMAGE" count=$data_start iflag=count_bytes of="$XIPIMAGE.tmp" +$DD if="$XIPIMAGE" skip=$data_start iflag=skip_bytes | +gzip -9 >> "$XIPIMAGE.tmp" + +# replace kernel binary +mv -f "$XIPIMAGE.tmp" "$XIPIMAGE" diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index ad325a8c7e1e..52f437997cc6 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -87,6 +87,11 @@ head-y := head$(MMUEXT).o obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +# This is executed very early using a temporary stack when no memory allocator +# nor global data is available. Everything has to be allocated on the stack. +CFLAGS_head-inflate-data.o := $(call cc-option,-Wframe-larger-than=10240) +obj-$(CONFIG_XIP_DEFLATED_DATA) += head-inflate-data.o + obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o AFLAGS_hyp-stub.o :=-Wa,-march=armv7-a ifeq ($(CONFIG_ARM_PSCI),y) diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 8733012d231f..a25027b87a60 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -79,47 +79,68 @@ ENDPROC(__vet_atags) */ __INIT __mmap_switched: - adr r3, __mmap_switched_data - ldmia r3!, {r4, r5, r6, r7} - cmp r4, r5 @ Copy data segment if needed -1: cmpne r5, r6 - ldrne fp, [r4], #4 - strne fp, [r5], #4 - bne 1b + mov r7, r1 + mov r8, r2 + mov r10, r0 - mov fp, #0 @ Clear BSS (and zero fp) -1: cmp r6, r7 - strcc fp, [r6],#4 - bcc 1b + adr r4, __mmap_switched_data + mov fp, #0 - ARM( ldmia r3, {r4, r5, r6, r7, sp}) - THUMB( ldmia r3, {r4, r5, r6, r7} ) - THUMB( ldr sp, [r3, #16] ) - str r9, [r4] @ Save processor ID - str r1, [r5] @ Save machine type - str r2, [r6] @ Save atags pointer - cmp r7, #0 - strne r0, [r7] @ Save control register values +#if defined(CONFIG_XIP_DEFLATED_DATA) + ARM( ldr sp, [r4], #4 ) + THUMB( ldr sp, [r4] ) + THUMB( add r4, #4 ) + bl __inflate_kernel_data @ decompress .data to RAM + teq r0, #0 + bne __error +#elif defined(CONFIG_XIP_KERNEL) + ARM( ldmia r4!, {r0, r1, r2, sp} ) + THUMB( ldmia r4!, {r0, r1, r2, r3} ) + THUMB( mov sp, r3 ) + sub r2, r2, r1 + bl memcpy @ copy .data to RAM +#endif + + ARM( ldmia r4!, {r0, r1, sp} ) + THUMB( ldmia r4!, {r0, r1, r3} ) + THUMB( mov sp, r3 ) + sub r1, r1, r0 + bl __memzero @ clear .bss + + ldmia r4, {r0, r1, r2, r3} + str r9, [r0] @ Save processor ID + str r7, [r1] @ Save machine type + str r8, [r2] @ Save atags pointer + cmp r3, #0 + strne r10, [r3] @ Save control register values b start_kernel ENDPROC(__mmap_switched) .align 2 .type __mmap_switched_data, %object __mmap_switched_data: - .long __data_loc @ r4 - .long _sdata @ r5 - .long __bss_start @ r6 - .long _end @ r7 - .long processor_id @ r4 - .long __machine_arch_type @ r5 - .long __atags_pointer @ r6 -#ifdef CONFIG_CPU_CP15 - .long cr_alignment @ r7 -#else - .long 0 @ r7 +#ifdef CONFIG_XIP_KERNEL +#ifndef CONFIG_XIP_DEFLATED_DATA + .long _sdata @ r0 + .long __data_loc @ r1 + .long _edata_loc @ r2 #endif + .long __bss_stop @ sp (temporary stack in .bss) +#endif + + .long __bss_start @ r0 + .long __bss_stop @ r1 .long init_thread_union + THREAD_START_SP @ sp + + .long processor_id @ r0 + .long __machine_arch_type @ r1 + .long __atags_pointer @ r2 +#ifdef CONFIG_CPU_CP15 + .long cr_alignment @ r3 +#else + .long 0 @ r3 +#endif .size __mmap_switched_data, . - __mmap_switched_data /* diff --git a/arch/arm/kernel/head-inflate-data.c b/arch/arm/kernel/head-inflate-data.c new file mode 100644 index 000000000000..6dd0ce5e6058 --- /dev/null +++ b/arch/arm/kernel/head-inflate-data.c @@ -0,0 +1,62 @@ +/* + * XIP kernel .data segment decompressor + * + * Created by: Nicolas Pitre, August 2017 + * Copyright: (C) 2017 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +/* for struct inflate_state */ +#include "../../../lib/zlib_inflate/inftrees.h" +#include "../../../lib/zlib_inflate/inflate.h" +#include "../../../lib/zlib_inflate/infutil.h" + +extern char __data_loc[]; +extern char _edata_loc[]; +extern char _sdata[]; + +/* + * This code is called very early during the boot process to decompress + * the .data segment stored compressed in ROM. Therefore none of the global + * variables are valid yet, hence no kernel services such as memory + * allocation is available. Everything must be allocated on the stack and + * we must avoid any global data access. We use a temporary stack located + * in the .bss area. The linker script makes sure the .bss is big enough + * to hold our stack frame plus some room for called functions. + * + * We mimic the code in lib/decompress_inflate.c to use the smallest work + * area possible. And because everything is statically allocated on the + * stack then there is no need to clean up before returning. + */ + +int __init __inflate_kernel_data(void) +{ + struct z_stream_s stream, *strm = &stream; + struct inflate_state state; + char *in = __data_loc; + int rc; + + /* Check and skip gzip header (assume no filename) */ + if (in[0] != 0x1f || in[1] != 0x8b || in[2] != 0x08 || in[3] & ~3) + return -1; + in += 10; + + strm->workspace = &state; + strm->next_in = in; + strm->avail_in = _edata_loc - __data_loc; /* upper bound */ + strm->next_out = _sdata; + strm->avail_out = _edata_loc - __data_loc; + zlib_inflateInit2(strm, -MAX_WBITS); + WS(strm)->inflate_state.wsize = 0; + WS(strm)->inflate_state.window = NULL; + rc = zlib_inflate(strm, Z_FINISH); + if (rc == Z_OK || rc == Z_STREAM_END) + rc = strm->avail_out; /* should be 0 */ + return rc; +} diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 8265b116218d..7a844310085e 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -77,9 +77,7 @@ SECTIONS *(.text.fixup) *(__ex_table) #endif -#ifndef CONFIG_SMP_ON_UP *(.alt.smp.init) -#endif *(.discard) *(.discard.*) } @@ -181,19 +179,7 @@ SECTIONS *(.taglist.init) __tagtable_end = .; } -#ifdef CONFIG_SMP_ON_UP - .init.smpalt : { - __smpalt_begin = .; - *(.alt.smp.init) - __smpalt_end = .; - } -#endif - .init.pv_table : { - __pv_table_begin = .; - *(.pv_table) - __pv_table_end = .; - } - .init.data : { + .init.rodata : { INIT_SETUP(16) INIT_CALLS CON_INITCALL @@ -201,48 +187,46 @@ SECTIONS INIT_RAM_FS } + _exiprom = .; /* End of XIP ROM area */ + +/* + * From this point, stuff is considered writable and will be copied to RAM + */ + __data_loc = ALIGN(4); /* location in file */ + . = PAGE_OFFSET + TEXT_OFFSET; /* location in memory */ +#undef LOAD_OFFSET +#define LOAD_OFFSET (PAGE_OFFSET + TEXT_OFFSET - __data_loc) + + . = ALIGN(THREAD_SIZE); + _sdata = .; + RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + .data.ro_after_init : AT(ADDR(.data.ro_after_init) - LOAD_OFFSET) { + *(.data..ro_after_init) + } + _edata = .; + + . = ALIGN(PAGE_SIZE); + __init_begin = .; + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { + INIT_DATA + } + .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { + ARM_EXIT_KEEP(EXIT_DATA) + } #ifdef CONFIG_SMP PERCPU_SECTION(L1_CACHE_BYTES) #endif - _exiprom = .; /* End of XIP ROM area */ - __data_loc = ALIGN(4); /* location in binary */ - . = PAGE_OFFSET + TEXT_OFFSET; + /* + * End of copied data. We need a dummy section to get its LMA. + * Also located before final ALIGN() as trailing padding is not stored + * in the resulting binary file and useless to copy. + */ + .data.endmark : AT(ADDR(.data.endmark) - LOAD_OFFSET) { } + _edata_loc = LOADADDR(.data.endmark); - .data : AT(__data_loc) { - _data = .; /* address in memory */ - _sdata = .; - - /* - * first, the init task union, aligned - * to an 8192 byte boundary. - */ - INIT_TASK_DATA(THREAD_SIZE) - - . = ALIGN(PAGE_SIZE); - __init_begin = .; - INIT_DATA - ARM_EXIT_KEEP(EXIT_DATA) - . = ALIGN(PAGE_SIZE); - __init_end = .; - - *(.data..ro_after_init) - - NOSAVE_DATA - CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) - READ_MOSTLY_DATA(L1_CACHE_BYTES) - - /* - * and the usual data section - */ - DATA_DATA - CONSTRUCTORS - - _edata = .; - } - _edata_loc = __data_loc + SIZEOF(.data); - - BUG_TABLE + . = ALIGN(PAGE_SIZE); + __init_end = .; #ifdef CONFIG_HAVE_TCM /* @@ -301,7 +285,7 @@ SECTIONS } #endif - BSS_SECTION(0, 0, 0) + BSS_SECTION(0, 0, 8) _end = .; STABS_DEBUG @@ -322,3 +306,11 @@ ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined") */ ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE, "HYP init code too big or misaligned") + +#ifdef CONFIG_XIP_DEFLATED_DATA +/* + * The .bss is used as a stack area for __inflate_kernel_data() whose stack + * frame is 9568 bytes. Make sure it has extra room left. + */ +ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA") +#endif diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index c83a7ba737d6..f73ba564b5e5 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -214,14 +214,9 @@ SECTIONS *(.pv_table) __pv_table_end = .; } - .init.data : { - INIT_DATA - INIT_SETUP(16) - INIT_CALLS - CON_INITCALL - SECURITY_INITCALL - INIT_RAM_FS - } + + INIT_DATA_SECTION(16) + .exit.data : { ARM_EXIT_KEEP(EXIT_DATA) } @@ -236,33 +231,10 @@ SECTIONS . = ALIGN(THREAD_SIZE); #endif __init_end = .; - __data_loc = .; - .data : AT(__data_loc) { - _data = .; /* address in memory */ - _sdata = .; - - /* - * first, the init task union, aligned - * to an 8192 byte boundary. - */ - INIT_TASK_DATA(THREAD_SIZE) - - NOSAVE_DATA - CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) - READ_MOSTLY_DATA(L1_CACHE_BYTES) - - /* - * and the usual data section - */ - DATA_DATA - CONSTRUCTORS - - _edata = .; - } - _edata_loc = __data_loc + SIZEOF(.data); - - BUG_TABLE + _sdata = .; + RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + _edata = .; #ifdef CONFIG_HAVE_TCM /*