Merge branch 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 boot and percpu updates from Ingo Molnar:
 "This tree contains a bootable images documentation update plus three
  slightly misplaced x86/asm percpu changes/optimizations"

* 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86-64: Use RIP-relative addressing for most per-CPU accesses
  x86-64: Handle PC-relative relocations on per-CPU data
  x86: Convert a few more per-CPU items to read-mostly ones
  x86, boot: Document intermediates more clearly
This commit is contained in:
Linus Torvalds 2014-12-10 12:10:24 -08:00
commit b6444bd0a1
8 changed files with 103 additions and 30 deletions

View File

@ -3,6 +3,18 @@
#
# create a compressed vmlinux image from the original vmlinux
#
# vmlinuz is:
# decompression code (*.o)
# asm globals (piggy.S), including:
# vmlinux.bin.(gz|bz2|lzma|...)
#
# vmlinux.bin is:
# vmlinux stripped of debugging and comments
# vmlinux.bin.all is:
# vmlinux.bin + vmlinux.relocs
# vmlinux.bin.(gz|bz2|lzma|...) is:
# (see scripts/Makefile.lib size_append)
# compressed vmlinux.bin.all + u32 size of vmlinux.bin.all
targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4

View File

@ -260,7 +260,7 @@ static void handle_relocations(void *output, unsigned long output_len)
/*
* Process relocations: 32 bit relocations first then 64 bit after.
* Two sets of binary relocations are added to the end of the kernel
* Three sets of binary relocations are added to the end of the kernel
* before compression. Each relocation table entry is the kernel
* address of the location which needs to be updated stored as a
* 32-bit value which is sign extended to 64 bits.
@ -270,6 +270,8 @@ static void handle_relocations(void *output, unsigned long output_len)
* kernel bits...
* 0 - zero terminator for 64 bit relocations
* 64 bit relocation repeated
* 0 - zero terminator for inverse 32 bit relocations
* 32 bit inverse relocation repeated
* 0 - zero terminator for 32 bit relocations
* 32 bit relocation repeated
*
@ -286,6 +288,16 @@ static void handle_relocations(void *output, unsigned long output_len)
*(uint32_t *)ptr += delta;
}
#ifdef CONFIG_X86_64
while (*--reloc) {
long extended = *reloc;
extended += map;
ptr = (unsigned long)extended;
if (ptr < min_addr || ptr > max_addr)
error("inverse 32-bit relocation outside of kernel!\n");
*(int32_t *)ptr -= delta;
}
for (reloc--; *reloc; reloc--) {
long extended = *reloc;
extended += map;

View File

@ -64,7 +64,7 @@
#define __percpu_prefix ""
#endif
#define __percpu_arg(x) __percpu_prefix "%P" #x
#define __percpu_arg(x) __percpu_prefix "%" #x
/*
* Initialized pointers to per-cpu variables needed for the boot
@ -179,29 +179,58 @@ do { \
} \
} while (0)
#define percpu_from_op(op, var, constraint) \
#define percpu_from_op(op, var) \
({ \
typeof(var) pfo_ret__; \
switch (sizeof(var)) { \
case 1: \
asm(op "b "__percpu_arg(1)",%0" \
: "=q" (pfo_ret__) \
: constraint); \
: "m" (var)); \
break; \
case 2: \
asm(op "w "__percpu_arg(1)",%0" \
: "=r" (pfo_ret__) \
: constraint); \
: "m" (var)); \
break; \
case 4: \
asm(op "l "__percpu_arg(1)",%0" \
: "=r" (pfo_ret__) \
: constraint); \
: "m" (var)); \
break; \
case 8: \
asm(op "q "__percpu_arg(1)",%0" \
: "=r" (pfo_ret__) \
: constraint); \
: "m" (var)); \
break; \
default: __bad_percpu_size(); \
} \
pfo_ret__; \
})
#define percpu_stable_op(op, var) \
({ \
typeof(var) pfo_ret__; \
switch (sizeof(var)) { \
case 1: \
asm(op "b "__percpu_arg(P1)",%0" \
: "=q" (pfo_ret__) \
: "p" (&(var))); \
break; \
case 2: \
asm(op "w "__percpu_arg(P1)",%0" \
: "=r" (pfo_ret__) \
: "p" (&(var))); \
break; \
case 4: \
asm(op "l "__percpu_arg(P1)",%0" \
: "=r" (pfo_ret__) \
: "p" (&(var))); \
break; \
case 8: \
asm(op "q "__percpu_arg(P1)",%0" \
: "=r" (pfo_ret__) \
: "p" (&(var))); \
break; \
default: __bad_percpu_size(); \
} \
@ -359,11 +388,11 @@ do { \
* per-thread variables implemented as per-cpu variables and thus
* stable for the duration of the respective task.
*/
#define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var)))
#define this_cpu_read_stable(var) percpu_stable_op("mov", var)
#define raw_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define raw_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define raw_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define raw_cpu_read_1(pcp) percpu_from_op("mov", pcp)
#define raw_cpu_read_2(pcp) percpu_from_op("mov", pcp)
#define raw_cpu_read_4(pcp) percpu_from_op("mov", pcp)
#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
@ -381,9 +410,9 @@ do { \
#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val)
#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val)
#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_read_1(pcp) percpu_from_op("mov", pcp)
#define this_cpu_read_2(pcp) percpu_from_op("mov", pcp)
#define this_cpu_read_4(pcp) percpu_from_op("mov", pcp)
#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
@ -435,7 +464,7 @@ do { \
* 32 bit must fall back to generic operations.
*/
#ifdef CONFIG_X86_64
#define raw_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define raw_cpu_read_8(pcp) percpu_from_op("mov", pcp)
#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
@ -444,7 +473,7 @@ do { \
#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_read_8(pcp) percpu_from_op("mov", pcp)
#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
@ -522,7 +551,7 @@ static inline int x86_this_cpu_variable_test_bit(int nr,
#include <asm-generic/percpu.h>
/* We can use this directly for local CPU (faster). */
DECLARE_PER_CPU(unsigned long, this_cpu_off);
DECLARE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off);
#endif /* !__ASSEMBLY__ */

View File

@ -127,7 +127,7 @@ struct cpuinfo_x86 {
/* Index into per_cpu list: */
u16 cpu_index;
u32 microcode;
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
};
#define X86_VENDOR_INTEL 0
#define X86_VENDOR_CYRIX 1
@ -151,7 +151,7 @@ extern __u32 cpu_caps_cleared[NCAPINTS];
extern __u32 cpu_caps_set[NCAPINTS];
#ifdef CONFIG_SMP
DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
#define cpu_data(cpu) per_cpu(cpu_info, cpu)
#else
#define cpu_info boot_cpu_data

View File

@ -30,7 +30,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number);
#define BOOT_PERCPU_OFFSET 0
#endif
DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
EXPORT_PER_CPU_SYMBOL(this_cpu_off);
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {

View File

@ -99,7 +99,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map);
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
/* Per CPU bogomips and other parameters */
DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
atomic_t init_deasserted;

View File

@ -186,6 +186,8 @@ SECTIONS
* start another segment - init.
*/
PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START,
"per-CPU data too large - increase CONFIG_PHYSICAL_START")
#endif
INIT_TEXT_SECTION(PAGE_SIZE)

View File

@ -20,7 +20,10 @@ struct relocs {
static struct relocs relocs16;
static struct relocs relocs32;
#if ELF_BITS == 64
static struct relocs relocs32neg;
static struct relocs relocs64;
#endif
struct section {
Elf_Shdr shdr;
@ -762,11 +765,16 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
switch (r_type) {
case R_X86_64_NONE:
/* NONE can be ignored. */
break;
case R_X86_64_PC32:
/*
* NONE can be ignored and PC relative relocations don't
* need to be adjusted.
* PC relative relocations don't need to be adjusted unless
* referencing a percpu symbol.
*/
if (is_percpu_sym(sym, symname))
add_reloc(&relocs32neg, offset);
break;
case R_X86_64_32:
@ -986,7 +994,10 @@ static void emit_relocs(int as_text, int use_real_mode)
/* Order the relocations for more efficient processing */
sort_relocs(&relocs16);
sort_relocs(&relocs32);
#if ELF_BITS == 64
sort_relocs(&relocs32neg);
sort_relocs(&relocs64);
#endif
/* Print the relocations */
if (as_text) {
@ -1007,14 +1018,21 @@ static void emit_relocs(int as_text, int use_real_mode)
for (i = 0; i < relocs32.count; i++)
write_reloc(relocs32.offset[i], stdout);
} else {
if (ELF_BITS == 64) {
/* Print a stop */
write_reloc(0, stdout);
#if ELF_BITS == 64
/* Print a stop */
write_reloc(0, stdout);
/* Now print each relocation */
for (i = 0; i < relocs64.count; i++)
write_reloc(relocs64.offset[i], stdout);
}
/* Now print each relocation */
for (i = 0; i < relocs64.count; i++)
write_reloc(relocs64.offset[i], stdout);
/* Print a stop */
write_reloc(0, stdout);
/* Now print each inverse 32-bit relocation */
for (i = 0; i < relocs32neg.count; i++)
write_reloc(relocs32neg.offset[i], stdout);
#endif
/* Print a stop */
write_reloc(0, stdout);