diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index c941b214e0b7..4150f74c521a 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -64,6 +64,7 @@ two flavors of JITs, the newer eBPF JIT currently supported on: - arm64 - arm32 - ppc64 + - ppc32 - sparc64 - mips64 - s390x @@ -73,7 +74,6 @@ two flavors of JITs, the newer eBPF JIT currently supported on: And the older cBPF JIT supported on the following archs: - mips - - ppc - sparc eBPF JITs are a superset of cBPF JITs, meaning the kernel will diff --git a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt index 7aff505af706..fa83403b4aec 100644 --- a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt +++ b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt @@ -21,7 +21,7 @@ | nios2: | TODO | | openrisc: | TODO | | parisc: | TODO | - | powerpc: | TODO | + | powerpc: | ok | | riscv: | ok | | s390: | ok | | sh: | TODO | diff --git a/Documentation/powerpc/papr_hcalls.rst b/Documentation/powerpc/papr_hcalls.rst index 3d553e8a2937..fce8bc793660 100644 --- a/Documentation/powerpc/papr_hcalls.rst +++ b/Documentation/powerpc/papr_hcalls.rst @@ -275,6 +275,20 @@ Health Bitmap Flags: Given a DRC Index collect the performance statistics for NVDIMM and copy them to the resultBuffer. +**H_SCM_FLUSH** + +| Input: *drcIndex, continue-token* +| Out: *continue-token* +| Return Value: *H_SUCCESS, H_Parameter, H_P2, H_BUSY* + +Given a DRC Index Flush the data to backend NVDIMM device. + +The hcall returns H_BUSY when the flush takes longer time and the hcall needs +to be issued multiple times in order to be completely serviced. The +*continue-token* from the output to be passed in the argument list of +subsequent hcalls to the hypervisor until the hcall is completely serviced +at which point H_SUCCESS or other error is returned by the hypervisor. + References ========== .. [1] "Power Architecture Platform Reference" diff --git a/Documentation/powerpc/vas-api.rst b/Documentation/powerpc/vas-api.rst index 90c50ed839f3..bdb50fed903e 100644 --- a/Documentation/powerpc/vas-api.rst +++ b/Documentation/powerpc/vas-api.rst @@ -254,7 +254,7 @@ using this window. the signal will be issued to the thread group leader signals. NX-GZIP User's Manual: -https://github.com/libnxz/power-gzip/blob/master/power_nx_gzip_um.pdf +https://github.com/libnxz/power-gzip/blob/master/doc/power_nx_gzip_um.pdf Simple example ============== @@ -301,5 +301,5 @@ Simple example close(fd) or window can be closed upon process exit } - Refer https://github.com/abalib/power-gzip for tests or more + Refer https://github.com/libnxz/power-gzip for tests or more use cases. diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index 7508b0ac1d21..ecb6fd4c3c64 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -155,7 +155,8 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void) } #ifdef CONFIG_TIME_NS -static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { const struct vdso_data *ret; diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 4b4c0dac0e14..4f7a629df81f 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -96,7 +96,7 @@ const struct vdso_data *__arch_get_vdso_data(void) #ifdef CONFIG_TIME_NS static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(void) +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { return _timens_data; } diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3b34c44832e0..c52b0a42062a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -119,6 +119,7 @@ config PPC # select ARCH_32BIT_OFF_T if PPC32 select ARCH_HAS_DEBUG_VIRTUAL + select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE @@ -135,7 +136,7 @@ config PPC select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 - select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION) + select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_COPY_MC if PPC64 @@ -145,6 +146,7 @@ config PPC select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC32 || PPC_BOOK3S_64 select ARCH_USE_BUILTIN_BSWAP @@ -171,6 +173,7 @@ config PPC select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES if PPC_BARRIER_NOSPEC select GENERIC_EARLY_IOREMAP + select GENERIC_GETTIMEOFDAY select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_PCI_IOMAP if PCI @@ -178,13 +181,15 @@ config PPC select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL - select GENERIC_GETTIMEOFDAY + select GENERIC_VDSO_TIME_NS select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KGDB + select HAVE_ARCH_KFENCE if PPC32 select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_NVRAM_OPS @@ -192,7 +197,6 @@ config PPC select HAVE_ARCH_TRACEHOOK select HAVE_ASM_MODVERSIONS select HAVE_C_RECORDMCOUNT - select HAVE_CBPF_JIT if !PPC64 select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13) select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2) select HAVE_CONTEXT_TRACKING if PPC64 @@ -200,7 +204,7 @@ config PPC select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL - select HAVE_EBPF_JIT if PPC64 + select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD @@ -224,8 +228,8 @@ config PPC select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) - select HAVE_HARDLOCKUP_DETECTOR_ARCH if (PPC64 && PPC_BOOK3S) - select HAVE_OPTPROBES if PPC64 + select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC64 && PPC_BOOK3S && SMP + select HAVE_OPTPROBES select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH @@ -234,7 +238,7 @@ config PPC select MMU_GATHER_RCU_TABLE_FREE select MMU_GATHER_PAGE_SIZE select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN + select HAVE_RELIABLE_STACKTRACE select HAVE_SOFTIRQ_ON_OWN_STACK select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING @@ -786,7 +790,7 @@ config THREAD_SHIFT config DATA_SHIFT_BOOL bool "Set custom data alignment" depends on ADVANCED_OPTIONS - depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC + depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !STRICT_KERNEL_RWX) help This option allows you to set the kernel data alignment. When @@ -798,13 +802,13 @@ config DATA_SHIFT_BOOL config DATA_SHIFT int "Data shift" if DATA_SHIFT_BOOL default 24 if STRICT_KERNEL_RWX && PPC64 - range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_BOOK3S_32 - range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC) && PPC_8xx + range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 + range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 - default 18 if DEBUG_PAGEALLOC && PPC_BOOK3S_32 + default 18 if (DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 default 23 if STRICT_KERNEL_RWX && PPC_8xx - default 23 if DEBUG_PAGEALLOC && PPC_8xx && PIN_TLB_DATA - default 19 if DEBUG_PAGEALLOC && PPC_8xx + default 23 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx && PIN_TLB_DATA + default 19 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx default PPC_PAGE_SHIFT help On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO. @@ -1217,7 +1221,7 @@ config TASK_SIZE_BOOL config TASK_SIZE hex "Size of user task space" if TASK_SIZE_BOOL default "0x80000000" if PPC_8xx - default "0xb0000000" if PPC_BOOK3S_32 && STRICT_KERNEL_RWX + default "0xb0000000" if PPC_BOOK3S_32 default "0xc0000000" endmenu diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index ae084357994e..6342f9da4545 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -353,6 +353,7 @@ config PPC_EARLY_DEBUG_CPM_ADDR config FAIL_IOMMU bool "Fault-injection capability for IOMMU" depends on FAULT_INJECTION + depends on PCI || IBMVIO help Provide fault-injection capability for IOMMU. Each device can be selectively enabled via the fail_iommu property. diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5f8544cf724a..3212d076ac6a 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -181,12 +181,6 @@ CC_FLAGS_FTRACE := -pg ifdef CONFIG_MPROFILE_KERNEL CC_FLAGS_FTRACE += -mprofile-kernel endif -# Work around gcc code-gen bugs with -pg / -fno-omit-frame-pointer in gcc <= 4.8 -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=44199 -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52828 -ifndef CONFIG_CC_IS_CLANG -CC_FLAGS_FTRACE += $(call cc-ifversion, -lt, 0409, -mno-sched-epilog) -endif endif CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU)) @@ -444,12 +438,15 @@ endif endif ifdef CONFIG_SMP +ifdef CONFIG_PPC32 prepare: task_cpu_prepare PHONY += task_cpu_prepare task_cpu_prepare: prepare0 $(eval KBUILD_CFLAGS += -D_TASK_CPU=$(shell awk '{if ($$2 == "TASK_CPU") print $$3;}' include/generated/asm-offsets.h)) -endif + +endif # CONFIG_PPC32 +endif # CONFIG_SMP PHONY += checkbin # Check toolchain versions: diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 4f05a6652478..701811c91a6f 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -50,6 +50,7 @@ CONFIG_PPC_TRANSACTIONAL_MEM=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y CONFIG_CRASH_DUMP=y +CONFIG_FA_DUMP=y CONFIG_IRQ_ALL_CPUS=y CONFIG_PPC_64K_PAGES=y CONFIG_SCHED_SMT=y @@ -177,6 +178,7 @@ CONFIG_CHELSIO_T1=m CONFIG_BE2NET=m CONFIG_IBMVETH=m CONFIG_EHEA=m +CONFIG_IBMVNIC=m CONFIG_E100=y CONFIG_E1000=y CONFIG_E1000E=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 777221775c83..50168dde4ea5 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -41,6 +41,7 @@ CONFIG_DTL=y CONFIG_SCANLOG=m CONFIG_PPC_SMLPAR=y CONFIG_IBMEBUS=y +CONFIG_PAPR_SCM=m CONFIG_PPC_SVM=y # CONFIG_PPC_PMAC is not set CONFIG_RTAS_FLASH=m @@ -159,6 +160,7 @@ CONFIG_BE2NET=m CONFIG_S2IO=m CONFIG_IBMVETH=y CONFIG_EHEA=y +CONFIG_IBMVNIC=y CONFIG_E100=y CONFIG_E1000=y CONFIG_E1000E=y diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index e1f9b4ea1c53..bcf95ce0964f 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 generated-y += syscall_table_32.h generated-y += syscall_table_64.h -generated-y += syscall_table_c32.h generated-y += syscall_table_spu.h generic-y += export.h generic-y += kvm_types.h diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 939f3c94c8f3..1c7b75834e04 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -77,8 +77,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, u32 len_high, u32 len_low); long sys_switch_endian(void); -notrace unsigned int __check_irq_replay(void); -void notrace restore_interrupts(void); /* prom_init (OpenFirmware) */ unsigned long __init prom_init(unsigned long r3, unsigned long r4, diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index aecfde829d5d..7ae29cfb06c0 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -80,22 +80,6 @@ do { \ ___p1; \ }) -#ifdef CONFIG_PPC64 -#define smp_cond_load_relaxed(ptr, cond_expr) ({ \ - typeof(ptr) __PTR = (ptr); \ - __unqual_scalar_typeof(*ptr) VAL; \ - VAL = READ_ONCE(*__PTR); \ - if (unlikely(!(cond_expr))) { \ - spin_begin(); \ - do { \ - VAL = READ_ONCE(*__PTR); \ - } while (!(cond_expr)); \ - spin_end(); \ - } \ - (typeof(*ptr))VAL; \ -}) -#endif - #ifdef CONFIG_PPC_BOOK3S_64 #define NOSPEC_BARRIER_SLOT nop #elif defined(CONFIG_PPC_FSL_BOOK3E) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 73bc5d2c431d..1670dfe9d4f1 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -5,86 +5,7 @@ #include #include -#ifdef __ASSEMBLY__ - -.macro kuep_update_sr gpr1, gpr2 /* NEVER use r0 as gpr2 due to addis */ -101: mtsrin \gpr1, \gpr2 - addi \gpr1, \gpr1, 0x111 /* next VSID */ - rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */ - addis \gpr2, \gpr2, 0x1000 /* address of next segment */ - bdnz 101b - isync -.endm - -.macro kuep_lock gpr1, gpr2 -#ifdef CONFIG_PPC_KUEP - li \gpr1, NUM_USER_SEGMENTS - li \gpr2, 0 - mtctr \gpr1 - mfsrin \gpr1, \gpr2 - oris \gpr1, \gpr1, SR_NX@h /* set Nx */ - kuep_update_sr \gpr1, \gpr2 -#endif -.endm - -.macro kuep_unlock gpr1, gpr2 -#ifdef CONFIG_PPC_KUEP - li \gpr1, NUM_USER_SEGMENTS - li \gpr2, 0 - mtctr \gpr1 - mfsrin \gpr1, \gpr2 - rlwinm \gpr1, \gpr1, 0, ~SR_NX /* Clear Nx */ - kuep_update_sr \gpr1, \gpr2 -#endif -.endm - -#ifdef CONFIG_PPC_KUAP - -.macro kuap_update_sr gpr1, gpr2, gpr3 /* NEVER use r0 as gpr2 due to addis */ -101: mtsrin \gpr1, \gpr2 - addi \gpr1, \gpr1, 0x111 /* next VSID */ - rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */ - addis \gpr2, \gpr2, 0x1000 /* address of next segment */ - cmplw \gpr2, \gpr3 - blt- 101b - isync -.endm - -.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 - lwz \gpr2, KUAP(\thread) - rlwinm. \gpr3, \gpr2, 28, 0xf0000000 - stw \gpr2, STACK_REGS_KUAP(\sp) - beq+ 102f - li \gpr1, 0 - stw \gpr1, KUAP(\thread) - mfsrin \gpr1, \gpr2 - oris \gpr1, \gpr1, SR_KS@h /* set Ks */ - kuap_update_sr \gpr1, \gpr2, \gpr3 -102: -.endm - -.macro kuap_restore sp, current, gpr1, gpr2, gpr3 - lwz \gpr2, STACK_REGS_KUAP(\sp) - rlwinm. \gpr3, \gpr2, 28, 0xf0000000 - stw \gpr2, THREAD + KUAP(\current) - beq+ 102f - mfsrin \gpr1, \gpr2 - rlwinm \gpr1, \gpr1, 0, ~SR_KS /* Clear Ks */ - kuap_update_sr \gpr1, \gpr2, \gpr3 -102: -.endm - -.macro kuap_check current, gpr -#ifdef CONFIG_PPC_KUAP_DEBUG - lwz \gpr, THREAD + KUAP(\current) -999: twnei \gpr, 0 - EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) -#endif -.endm - -#endif /* CONFIG_PPC_KUAP */ - -#else /* !__ASSEMBLY__ */ +#ifndef __ASSEMBLY__ #ifdef CONFIG_PPC_KUAP @@ -103,6 +24,51 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) isync(); /* Context sync required after mtsr() */ } +static inline void kuap_save_and_lock(struct pt_regs *regs) +{ + unsigned long kuap = current->thread.kuap; + u32 addr = kuap & 0xf0000000; + u32 end = kuap << 28; + + regs->kuap = kuap; + if (unlikely(!kuap)) + return; + + current->thread.kuap = 0; + kuap_update_sr(mfsr(addr) | SR_KS, addr, end); /* Set Ks */ +} + +static inline void kuap_user_restore(struct pt_regs *regs) +{ +} + +static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) +{ + u32 addr = regs->kuap & 0xf0000000; + u32 end = regs->kuap << 28; + + current->thread.kuap = regs->kuap; + + if (unlikely(regs->kuap == kuap)) + return; + + kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end); /* Clear Ks */ +} + +static inline unsigned long kuap_get_and_assert_locked(void) +{ + unsigned long kuap = current->thread.kuap; + + WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != 0); + + return kuap; +} + +static inline void kuap_assert_locked(void) +{ + kuap_get_and_assert_locked(); +} + static __always_inline void allow_user_access(void __user *to, const void __user *from, u32 size, unsigned long dir) { diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 415ae29fa73a..83c65845a1a9 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -194,10 +194,8 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); #define VMALLOC_END ioremap_bot #endif -#ifdef CONFIG_STRICT_KERNEL_RWX #define MODULES_END ALIGN_DOWN(PAGE_OFFSET, SZ_256M) #define MODULES_VADDR (MODULES_END - SZ_256M) -#endif #ifndef __ASSEMBLY__ #include diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h index d941c06d4f2e..ba1743c52b56 100644 --- a/arch/powerpc/include/asm/book3s/32/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h @@ -79,4 +79,4 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm) flush_tlb_mm(mm); } -#endif /* _ASM_POWERPC_TLBFLUSH_H */ +#endif /* _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H */ diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 8bd905050896..9700da3a4093 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -287,7 +287,7 @@ static inline void kuap_kernel_restore(struct pt_regs *regs, */ } -static inline unsigned long kuap_get_and_check_amr(void) +static inline unsigned long kuap_get_and_assert_locked(void) { if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { unsigned long amr = mfspr(SPRN_AMR); @@ -298,27 +298,7 @@ static inline unsigned long kuap_get_and_check_amr(void) return 0; } -#else /* CONFIG_PPC_PKEY */ - -static inline void kuap_user_restore(struct pt_regs *regs) -{ -} - -static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) -{ -} - -static inline unsigned long kuap_get_and_check_amr(void) -{ - return 0; -} - -#endif /* CONFIG_PPC_PKEY */ - - -#ifdef CONFIG_PPC_KUAP - -static inline void kuap_check_amr(void) +static inline void kuap_assert_locked(void) { if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED); diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index f911bdb68d8b..3004f3323144 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -18,7 +18,6 @@ * complete pgtable.h but only a portion of it. */ #include -#include #include #include diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 058601efbc8a..a666d561b44d 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -7,6 +7,7 @@ #ifndef __ASSEMBLY__ #include #include +#include #endif /* @@ -116,6 +117,7 @@ */ #define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY) #define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_READ) +#define _PAGE_KERNEL_ROX (_PAGE_PRIVILEGED | _PAGE_READ | _PAGE_EXEC) #define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | \ _PAGE_RW | _PAGE_EXEC) /* @@ -323,7 +325,8 @@ extern unsigned long pci_io_base; #define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE) #define IOREMAP_BASE (PHB_IO_END) #define IOREMAP_START (ioremap_bot) -#define IOREMAP_END (KERN_IO_END) +#define IOREMAP_END (KERN_IO_END - FIXADDR_SIZE) +#define FIXADDR_SIZE SZ_32M /* Advertise special mapping type for AGP */ #define HAVE_PAGE_AGP diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index c7813dc628fc..59cab558e2f0 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -222,8 +222,10 @@ static inline void radix__set_pte_at(struct mm_struct *mm, unsigned long addr, * from ptesync, it should probably go into update_mmu_cache, rather * than set_pte_at (which is used to set ptes unrelated to faults). * - * Spurious faults to vmalloc region are not tolerated, so there is - * a ptesync in flush_cache_vmap. + * Spurious faults from the kernel memory are not tolerated, so there + * is a ptesync in flush_cache_vmap, and __map_kernel_page() follows + * the pte update sequence from ISA Book III 6.10 Translation Table + * Update Synchronization Requirements. */ } diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index d1635ffbb179..0b2162890d8b 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -111,11 +111,8 @@ #ifndef __ASSEMBLY__ struct pt_regs; -long do_page_fault(struct pt_regs *); -long hash__do_page_fault(struct pt_regs *); +void hash__do_page_fault(struct pt_regs *); void bad_page_fault(struct pt_regs *, int); -void __bad_page_fault(struct pt_regs *regs, int sig); -void do_bad_page_fault_segv(struct pt_regs *regs); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void _exception_pkey(struct pt_regs *, unsigned long, int); extern void die(const char *, struct pt_regs *, long); diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index f63495109f63..7564dd4fd12b 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -30,7 +30,19 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) #endif /* CONFIG_PPC_BOOK3S_64 */ #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 -extern void flush_dcache_page(struct page *page); +/* + * This is called when a page has been modified by the kernel. + * It just marks the page as not i-cache clean. We do the i-cache + * flush later when the page is given to a user process, if necessary. + */ +static inline void flush_dcache_page(struct page *page) +{ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + return; + /* avoid an atomic op if possible */ + if (test_bit(PG_dcache_clean, &page->flags)) + clear_bit(PG_dcache_clean, &page->flags); +} void flush_icache_range(unsigned long start, unsigned long stop); #define flush_icache_range flush_icache_range @@ -40,7 +52,6 @@ void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, #define flush_icache_user_page flush_icache_user_page void flush_dcache_icache_page(struct page *page); -void __flush_dcache_icache(void *page); /** * flush_dcache_range(): Write any modified data cache blocks out to memory and diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h index 2211b934ecb4..bda45788cfcc 100644 --- a/arch/powerpc/include/asm/cpm2.h +++ b/arch/powerpc/include/asm/cpm2.h @@ -594,7 +594,7 @@ typedef struct fcc_enet { uint fen_p256c; /* Total packets 256 < bytes <= 511 */ uint fen_p512c; /* Total packets 512 < bytes <= 1023 */ uint fen_p1024c; /* Total packets 1024 < bytes <= 1518 */ - uint fen_cambuf; /* Internal CAM buffer poiner */ + uint fen_cambuf; /* Internal CAM buffer pointer */ ushort fen_rfthr; /* Received frames threshold */ ushort fen_rfcnt; /* Received frames count */ } fcc_enet_t; diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 8d03c16a3663..947b5b9c4424 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -23,12 +23,17 @@ #include #endif +#ifdef CONFIG_PPC64 +#define FIXADDR_TOP (IOREMAP_END + FIXADDR_SIZE) +#else +#define FIXADDR_SIZE 0 #ifdef CONFIG_KASAN #include #define FIXADDR_TOP (KASAN_SHADOW_START - PAGE_SIZE) #else #define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE)) #endif +#endif /* * Here we define all the compile-time 'special' virtual @@ -50,6 +55,7 @@ */ enum fixed_addresses { FIX_HOLE, +#ifdef CONFIG_PPC32 /* reserve the top 128K for early debugging purposes */ FIX_EARLY_DEBUG_TOP = FIX_HOLE, FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128K, PAGE_SIZE)/PAGE_SIZE)-1, @@ -72,6 +78,7 @@ enum fixed_addresses { FIX_IMMR_SIZE, #endif /* FIX_PCIE_MCFG, */ +#endif /* CONFIG_PPC32 */ __end_of_permanent_fixed_addresses, #define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE) @@ -98,6 +105,8 @@ enum fixed_addresses { static inline void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { + BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && __FIXADDR_SIZE > FIXADDR_SIZE); + if (__builtin_constant_p(idx)) BUILD_BUG_ON(idx >= __end_of_fixed_addresses); else if (WARN_ON(idx >= __end_of_fixed_addresses)) diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index e93ee3202e4c..b3001f8b2c1e 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -33,9 +33,8 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, { int oldval = 0, ret; - if (!access_ok(uaddr, sizeof(u32))) + if (!user_access_begin(uaddr, sizeof(u32))) return -EFAULT; - allow_read_write_user(uaddr, uaddr, sizeof(*uaddr)); switch (op) { case FUTEX_OP_SET: @@ -56,10 +55,10 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, default: ret = -ENOSYS; } + user_access_end(); *oval = oldval; - prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr)); return ret; } @@ -70,11 +69,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, int ret = 0; u32 prev; - if (!access_ok(uaddr, sizeof(u32))) + if (!user_access_begin(uaddr, sizeof(u32))) return -EFAULT; - allow_read_write_user(uaddr, uaddr, sizeof(*uaddr)); - __asm__ __volatile__ ( PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ @@ -93,8 +90,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT) : "cc", "memory"); + user_access_end(); + *uval = prev; - prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr)); return ret; } diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index ed6086d57b22..443050906018 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -315,7 +315,8 @@ #define H_SCM_HEALTH 0x400 #define H_SCM_PERFORMANCE_STATS 0x418 #define H_RPT_INVALIDATE 0x448 -#define MAX_HCALL_OPCODE H_RPT_INVALIDATE +#define H_SCM_FLUSH 0x44C +#define MAX_HCALL_OPCODE H_SCM_FLUSH /* Scope args for H_SCM_UNBIND_ALL */ #define H_UNBIND_SCOPE_ALL (0x1) @@ -389,6 +390,7 @@ #define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 #define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 +#define H_CPU_BEHAV_FAVOUR_SECURITY_H (1ull << 60) // IBM bit 3 #define H_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) // IBM bit 5 #define H_CPU_BEHAV_FLUSH_LINK_STACK (1ull << 57) // IBM bit 6 diff --git a/arch/powerpc/include/asm/hvconsole.h b/arch/powerpc/include/asm/hvconsole.h index 999ed5ac9053..ccb2034506f0 100644 --- a/arch/powerpc/include/asm/hvconsole.h +++ b/arch/powerpc/include/asm/hvconsole.h @@ -24,5 +24,8 @@ extern int hvc_get_chars(uint32_t vtermno, char *buf, int count); extern int hvc_put_chars(uint32_t vtermno, const char *buf, int count); +/* Provided by HVC VIO */ +void hvc_vio_init_early(void); + #endif /* __KERNEL__ */ #endif /* _PPC64_HVCONSOLE_H */ diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h index ae02eb53d6ef..d024447283a0 100644 --- a/arch/powerpc/include/asm/hydra.h +++ b/arch/powerpc/include/asm/hydra.h @@ -94,8 +94,6 @@ extern volatile struct Hydra __iomem *Hydra; #define HYDRA_INT_EXT7 18 /* Power Off Request */ #define HYDRA_INT_SPARE 19 -extern int hydra_init(void); - #endif /* __KERNEL__ */ #endif /* _ASMPPC_HYDRA_H */ diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index cc73c1267572..268d3bd073c8 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -4,6 +4,40 @@ #include +#ifdef CONFIG_PPC64 + +#define ___get_user_instr(gu_op, dest, ptr) \ +({ \ + long __gui_ret = 0; \ + unsigned long __gui_ptr = (unsigned long)ptr; \ + struct ppc_inst __gui_inst; \ + unsigned int __prefix, __suffix; \ + __gui_ret = gu_op(__prefix, (unsigned int __user *)__gui_ptr); \ + if (__gui_ret == 0) { \ + if ((__prefix >> 26) == OP_PREFIX) { \ + __gui_ret = gu_op(__suffix, \ + (unsigned int __user *)__gui_ptr + 1); \ + __gui_inst = ppc_inst_prefix(__prefix, \ + __suffix); \ + } else { \ + __gui_inst = ppc_inst(__prefix); \ + } \ + if (__gui_ret == 0) \ + (dest) = __gui_inst; \ + } \ + __gui_ret; \ +}) +#else /* !CONFIG_PPC64 */ +#define ___get_user_instr(gu_op, dest, ptr) \ + gu_op((dest).val, (u32 __user *)(ptr)) +#endif /* CONFIG_PPC64 */ + +#define get_user_instr(x, ptr) \ + ___get_user_instr(get_user, x, ptr) + +#define __get_user_instr(x, ptr) \ + ___get_user_instr(__get_user, x, ptr) + /* * Instruction data type for POWER */ @@ -68,6 +102,8 @@ static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) #define ppc_inst(x) ((struct ppc_inst){ .val = x }) +#define ppc_inst_prefix(x, y) ppc_inst(x) + static inline bool ppc_inst_prefixed(struct ppc_inst x) { return false; @@ -113,13 +149,14 @@ static inline struct ppc_inst *ppc_inst_next(void *location, struct ppc_inst *va return location + ppc_inst_len(tmp); } -static inline u64 ppc_inst_as_u64(struct ppc_inst x) +static inline unsigned long ppc_inst_as_ulong(struct ppc_inst x) { -#ifdef CONFIG_CPU_LITTLE_ENDIAN - return (u64)ppc_inst_suffix(x) << 32 | ppc_inst_val(x); -#else - return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x); -#endif + if (IS_ENABLED(CONFIG_PPC32)) + return ppc_inst_val(x); + else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) + return (u64)ppc_inst_suffix(x) << 32 | ppc_inst_val(x); + else + return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x); } #define PPC_INST_STR_LEN sizeof("00000000 00000000") @@ -141,10 +178,6 @@ static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_ins __str; \ }) -int probe_user_read_inst(struct ppc_inst *inst, - struct ppc_inst __user *nip); - -int probe_kernel_read_inst(struct ppc_inst *inst, - struct ppc_inst *src); +int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src); #endif /* _ASM_POWERPC_INST_H */ diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index e8d09a841373..44cde2e129b8 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -2,6 +2,70 @@ #ifndef _ASM_POWERPC_INTERRUPT_H #define _ASM_POWERPC_INTERRUPT_H +/* BookE/4xx */ +#define INTERRUPT_CRITICAL_INPUT 0x100 + +/* BookE */ +#define INTERRUPT_DEBUG 0xd00 +#ifdef CONFIG_BOOKE +#define INTERRUPT_PERFMON 0x260 +#define INTERRUPT_DOORBELL 0x280 +#endif + +/* BookS/4xx/8xx */ +#define INTERRUPT_MACHINE_CHECK 0x200 + +/* BookS/8xx */ +#define INTERRUPT_SYSTEM_RESET 0x100 + +/* BookS */ +#define INTERRUPT_DATA_SEGMENT 0x380 +#define INTERRUPT_INST_SEGMENT 0x480 +#define INTERRUPT_TRACE 0xd00 +#define INTERRUPT_H_DATA_STORAGE 0xe00 +#define INTERRUPT_HMI 0xe60 +#define INTERRUPT_H_FAC_UNAVAIL 0xf80 +#ifdef CONFIG_PPC_BOOK3S +#define INTERRUPT_DOORBELL 0xa00 +#define INTERRUPT_PERFMON 0xf00 +#define INTERRUPT_ALTIVEC_UNAVAIL 0xf20 +#endif + +/* BookE/BookS/4xx/8xx */ +#define INTERRUPT_DATA_STORAGE 0x300 +#define INTERRUPT_INST_STORAGE 0x400 +#define INTERRUPT_EXTERNAL 0x500 +#define INTERRUPT_ALIGNMENT 0x600 +#define INTERRUPT_PROGRAM 0x700 +#define INTERRUPT_SYSCALL 0xc00 +#define INTERRUPT_TRACE 0xd00 + +/* BookE/BookS/44x */ +#define INTERRUPT_FP_UNAVAIL 0x800 + +/* BookE/BookS/44x/8xx */ +#define INTERRUPT_DECREMENTER 0x900 + +#ifndef INTERRUPT_PERFMON +#define INTERRUPT_PERFMON 0x0 +#endif + +/* 8xx */ +#define INTERRUPT_SOFT_EMU_8xx 0x1000 +#define INTERRUPT_INST_TLB_MISS_8xx 0x1100 +#define INTERRUPT_DATA_TLB_MISS_8xx 0x1200 +#define INTERRUPT_INST_TLB_ERROR_8xx 0x1300 +#define INTERRUPT_DATA_TLB_ERROR_8xx 0x1400 +#define INTERRUPT_DATA_BREAKPOINT_8xx 0x1c00 +#define INTERRUPT_INST_BREAKPOINT_8xx 0x1d00 + +/* 603 */ +#define INTERRUPT_INST_TLB_MISS_603 0x1000 +#define INTERRUPT_DATA_LOAD_TLB_MISS_603 0x1100 +#define INTERRUPT_DATA_STORE_TLB_MISS_603 0x1200 + +#ifndef __ASSEMBLY__ + #include #include #include @@ -9,10 +73,18 @@ #include #include -struct interrupt_state { -#ifdef CONFIG_PPC_BOOK3E_64 - enum ctx_state ctx_state; +static inline void nap_adjust_return(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC_970_NAP + if (unlikely(test_thread_local_flags(_TLF_NAPPING))) { + /* Can avoid a test-and-clear because NMIs do not call this */ + clear_thread_local_flags(_TLF_NAPPING); + regs->nip = (unsigned long)power4_idle_nap_return; + } #endif +} + +struct interrupt_state { }; static inline void booke_restore_dbcr0(void) @@ -29,10 +101,19 @@ static inline void booke_restore_dbcr0(void) static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) { - /* - * Book3E reconciles irq soft mask in asm - */ -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC32 + if (!arch_irq_disabled_regs(regs)) + trace_hardirqs_off(); + + if (user_mode(regs)) { + kuep_lock(); + account_cpu_user_entry(); + } else { + kuap_save_and_lock(regs); + } +#endif + +#ifdef CONFIG_PPC64 if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED) trace_hardirqs_off(); local_paca->irq_happened |= PACA_IRQ_HARD_DIS; @@ -48,16 +129,12 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup * CT_WARN_ON comes here via program_check_exception, * so avoid recursion. */ - if (TRAP(regs) != 0x700) + if (TRAP(regs) != INTERRUPT_PROGRAM) CT_WARN_ON(ct_state() != CONTEXT_KERNEL); } #endif -#ifdef CONFIG_PPC_BOOK3E_64 - state->ctx_state = exception_enter(); - if (user_mode(regs)) - account_cpu_user_entry(); -#endif + booke_restore_dbcr0(); } /* @@ -76,23 +153,8 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup */ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) { -#ifdef CONFIG_PPC_BOOK3E_64 - exception_exit(state->ctx_state); -#endif - - /* - * Book3S exits to user via interrupt_exit_user_prepare(), which does - * context tracking, which is a cleaner way to handle PREEMPT=y - * and avoid context entry/exit in e.g., preempt_schedule_irq()), - * which is likely to be where the core code wants to end up. - * - * The above comment explains why we can't do the - * - * if (user_mode(regs)) - * user_exit_irqoff(); - * - * sequence here. - */ + if (user_mode(regs)) + kuep_unlock(); } static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) @@ -109,24 +171,46 @@ static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct in static inline void interrupt_async_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) { + /* + * Adjust at exit so the main handler sees the true NIA. This must + * come before irq_exit() because irq_exit can enable interrupts, and + * if another interrupt is taken before nap_adjust_return has run + * here, then that interrupt would return directly to idle nap return. + */ + nap_adjust_return(regs); + irq_exit(); interrupt_exit_prepare(regs, state); } struct interrupt_nmi_state { #ifdef CONFIG_PPC64 -#ifdef CONFIG_PPC_BOOK3S_64 u8 irq_soft_mask; u8 irq_happened; -#endif u8 ftrace_enabled; #endif }; +static inline bool nmi_disables_ftrace(struct pt_regs *regs) +{ + /* Allow DEC and PMI to be traced when they are soft-NMI */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { + if (TRAP(regs) == INTERRUPT_DECREMENTER) + return false; + if (TRAP(regs) == INTERRUPT_PERFMON) + return false; + } + if (IS_ENABLED(CONFIG_PPC_BOOK3E)) { + if (TRAP(regs) == INTERRUPT_PERFMON) + return false; + } + + return true; +} + static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state) { #ifdef CONFIG_PPC64 -#ifdef CONFIG_PPC_BOOK3S_64 state->irq_soft_mask = local_paca->irq_soft_mask; state->irq_happened = local_paca->irq_happened; @@ -139,9 +223,8 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte local_paca->irq_happened |= PACA_IRQ_HARD_DIS; /* Don't do any per-CPU operations until interrupt state is fixed */ -#endif - /* Allow DEC and PMI to be traced when they are soft-NMI */ - if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260) { + + if (nmi_disables_ftrace(regs)) { state->ftrace_enabled = this_cpu_get_ftrace_enabled(); this_cpu_set_ftrace_enabled(0); } @@ -164,17 +247,20 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter radix_enabled() || (mfmsr() & MSR_DR)) nmi_exit(); + /* + * nmi does not call nap_adjust_return because nmi should not create + * new work to do (must use irq_work for that). + */ + #ifdef CONFIG_PPC64 - if (TRAP(regs) != 0x900 && TRAP(regs) != 0xf00 && TRAP(regs) != 0x260) + if (nmi_disables_ftrace(regs)) this_cpu_set_ftrace_enabled(state->ftrace_enabled); -#ifdef CONFIG_PPC_BOOK3S_64 /* Check we didn't change the pending interrupt mask. */ WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened); local_paca->irq_happened = state->irq_happened; local_paca->irq_soft_mask = state->irq_soft_mask; #endif -#endif } /* @@ -387,6 +473,7 @@ DECLARE_INTERRUPT_HANDLER(SMIException); DECLARE_INTERRUPT_HANDLER(handle_hmi_exception); DECLARE_INTERRUPT_HANDLER(unknown_exception); DECLARE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception); +DECLARE_INTERRUPT_HANDLER_NMI(unknown_nmi_exception); DECLARE_INTERRUPT_HANDLER(instruction_breakpoint_exception); DECLARE_INTERRUPT_HANDLER(RunModeException); DECLARE_INTERRUPT_HANDLER(single_step_exception); @@ -410,7 +497,7 @@ DECLARE_INTERRUPT_HANDLER(altivec_assist_exception); DECLARE_INTERRUPT_HANDLER(CacheLockingException); DECLARE_INTERRUPT_HANDLER(SPEFloatingPointException); DECLARE_INTERRUPT_HANDLER(SPEFloatingPointRoundException); -DECLARE_INTERRUPT_HANDLER(WatchdogException); +DECLARE_INTERRUPT_HANDLER_NMI(WatchdogException); DECLARE_INTERRUPT_HANDLER(kernel_bad_stack); /* slb.c */ @@ -421,7 +508,7 @@ DECLARE_INTERRUPT_HANDLER(do_bad_slb_fault); DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault); /* fault.c */ -DECLARE_INTERRUPT_HANDLER_RET(do_page_fault); +DECLARE_INTERRUPT_HANDLER(do_page_fault); DECLARE_INTERRUPT_HANDLER(do_bad_page_fault_segv); /* process.c */ @@ -436,7 +523,7 @@ DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode); DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException); -void unrecoverable_exception(struct pt_regs *regs); +void __noreturn unrecoverable_exception(struct pt_regs *regs); void replay_system_reset(void); void replay_soft_interrupts(void); @@ -447,4 +534,6 @@ static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs) local_irq_enable(); } +#endif /* __ASSEMBLY__ */ + #endif /* _ASM_POWERPC_INTERRUPT_H */ diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index f3f264e441a7..b2bd58830430 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -53,8 +53,6 @@ extern void *mcheckirq_ctx[NR_CPUS]; extern void *hardirq_ctx[NR_CPUS]; extern void *softirq_ctx[NR_CPUS]; -void call_do_softirq(void *sp); -void call_do_irq(struct pt_regs *regs, void *sp); extern void do_IRQ(struct pt_regs *regs); extern void __init init_IRQ(void); extern void __do_irq(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 09297ec9fa52..2d5c6bec2b4f 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -20,7 +20,8 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran asm_volatile_goto("1:\n\t" "nop # arch_static_branch\n\t" ".pushsection __jump_table, \"aw\"\n\t" - JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" + ".long 1b - ., %l[l_yes] - .\n\t" + JUMP_ENTRY_TYPE "%c0 - .\n\t" ".popsection \n\t" : : "i" (&((char *)key)[branch]) : : l_yes); @@ -34,7 +35,8 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool asm_volatile_goto("1:\n\t" "b %l[l_yes] # arch_static_branch_jump\n\t" ".pushsection __jump_table, \"aw\"\n\t" - JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" + ".long 1b - ., %l[l_yes] - .\n\t" + JUMP_ENTRY_TYPE "%c0 - .\n\t" ".popsection \n\t" : : "i" (&((char *)key)[branch]) : : l_yes); @@ -43,23 +45,12 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool return true; } -#ifdef CONFIG_PPC64 -typedef u64 jump_label_t; -#else -typedef u32 jump_label_t; -#endif - -struct jump_entry { - jump_label_t code; - jump_label_t target; - jump_label_t key; -}; - #else #define ARCH_STATIC_BRANCH(LABEL, KEY) \ 1098: nop; \ .pushsection __jump_table, "aw"; \ - FTR_ENTRY_LONG 1098b, LABEL, KEY; \ + .long 1098b - ., LABEL - .; \ + FTR_ENTRY_LONG KEY; \ .popsection #endif diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index 7355ed05e65e..3c478e5ef24c 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -19,7 +19,7 @@ #define KASAN_SHADOW_SCALE_SHIFT 3 -#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_MODULES) && defined(CONFIG_STRICT_KERNEL_RWX) +#ifdef CONFIG_MODULES #define KASAN_KERN_START ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M) #else #define KASAN_KERN_START PAGE_OFFSET diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h new file mode 100644 index 000000000000..a9846b68c6b9 --- /dev/null +++ b/arch/powerpc/include/asm/kfence.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * powerpc KFENCE support. + * + * Copyright (C) 2020 CS GROUP France + */ + +#ifndef __ASM_POWERPC_KFENCE_H +#define __ASM_POWERPC_KFENCE_H + +#include +#include + +static inline bool arch_kfence_init_pool(void) +{ + return true; +} + +static inline bool kfence_protect_page(unsigned long addr, bool protect) +{ + pte_t *kpte = virt_to_kpte(addr); + + if (protect) { + pte_update(&init_mm, addr, kpte, _PAGE_PRESENT, 0, 0); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + } else { + pte_update(&init_mm, addr, kpte, 0, _PAGE_PRESENT, 0); + } + + return true; +} + +#endif /* __ASM_POWERPC_KFENCE_H */ diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 7ec21af49a45..ec96232529ac 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -28,15 +28,6 @@ #ifdef __ASSEMBLY__ #ifndef CONFIG_PPC_KUAP -.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 -.endm - -.macro kuap_restore sp, current, gpr1, gpr2, gpr3 -.endm - -.macro kuap_check current, gpr -.endm - .macro kuap_check_amr gpr1, gpr2 .endm @@ -55,6 +46,14 @@ void setup_kuep(bool disabled); static inline void setup_kuep(bool disabled) { } #endif /* CONFIG_PPC_KUEP */ +#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32) +void kuep_lock(void); +void kuep_unlock(void); +#else +static inline void kuep_lock(void) { } +static inline void kuep_unlock(void) { } +#endif + #ifdef CONFIG_PPC_KUAP void setup_kuap(bool disabled); #else @@ -66,7 +65,15 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) return false; } -static inline void kuap_check_amr(void) { } +static inline void kuap_assert_locked(void) { } +static inline void kuap_save_and_lock(struct pt_regs *regs) { } +static inline void kuap_user_restore(struct pt_regs *regs) { } +static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { } + +static inline unsigned long kuap_get_and_assert_locked(void) +{ + return 0; +} /* * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 2f5f919f6cd3..c58121508157 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -258,6 +258,8 @@ extern long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm, extern void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa, struct kvm_memory_slot *memslot, unsigned long *map); +extern unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, + unsigned long lpcr); extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask); extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr); diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 8aacd76bb702..9531b1c1b190 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -767,8 +767,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long pte_index, unsigned long avpn); long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu); long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, - unsigned long pte_index, unsigned long avpn, - unsigned long va); + unsigned long pte_index, unsigned long avpn); long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long pte_index); long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 652ce85f9410..4bc45d3ed8b0 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -263,7 +263,7 @@ extern void arch_exit_mmap(struct mm_struct *mm); static inline void arch_unmap(struct mm_struct *mm, unsigned long start, unsigned long end) { - unsigned long vdso_base = (unsigned long)mm->context.vdso - PAGE_SIZE; + unsigned long vdso_base = (unsigned long)mm->context.vdso; if (start <= vdso_base && vdso_base < end) mm->context.vdso = NULL; diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 17a4a616436f..295ef5639609 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -7,33 +7,41 @@ #ifdef CONFIG_PPC_KUAP -#ifdef __ASSEMBLY__ - -.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 - lis \gpr2, MD_APG_KUAP@h /* only APG0 and APG1 are used */ - mfspr \gpr1, SPRN_MD_AP - mtspr SPRN_MD_AP, \gpr2 - stw \gpr1, STACK_REGS_KUAP(\sp) -.endm - -.macro kuap_restore sp, current, gpr1, gpr2, gpr3 - lwz \gpr1, STACK_REGS_KUAP(\sp) - mtspr SPRN_MD_AP, \gpr1 -.endm - -.macro kuap_check current, gpr -#ifdef CONFIG_PPC_KUAP_DEBUG - mfspr \gpr, SPRN_MD_AP - rlwinm \gpr, \gpr, 16, 0xffff -999: twnei \gpr, MD_APG_KUAP@h - EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) -#endif -.endm - -#else /* !__ASSEMBLY__ */ +#ifndef __ASSEMBLY__ #include +static inline void kuap_save_and_lock(struct pt_regs *regs) +{ + regs->kuap = mfspr(SPRN_MD_AP); + mtspr(SPRN_MD_AP, MD_APG_KUAP); +} + +static inline void kuap_user_restore(struct pt_regs *regs) +{ +} + +static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) +{ + mtspr(SPRN_MD_AP, regs->kuap); +} + +static inline unsigned long kuap_get_and_assert_locked(void) +{ + unsigned long kuap = mfspr(SPRN_MD_AP); + + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) + WARN_ON_ONCE(kuap >> 16 != MD_APG_KUAP >> 16); + + return kuap; +} + +static inline void kuap_assert_locked(void) +{ + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) + kuap_get_and_assert_locked(); +} + static inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 478249959baa..6e4faa0a9b35 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -172,6 +172,9 @@ #define mmu_linear_psize MMU_PAGE_8M +#define MODULES_VADDR (PAGE_OFFSET - SZ_256M) +#define MODULES_END PAGE_OFFSET + #ifndef __ASSEMBLY__ #include diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 6cb8aa357191..57cd3892bfe0 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -6,6 +6,8 @@ * the ppc64 non-hashed page table. */ +#include + #include #include #include @@ -54,7 +56,8 @@ #define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE) #define IOREMAP_BASE (PHB_IO_END) #define IOREMAP_START (ioremap_bot) -#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE) +#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE - FIXADDR_SIZE) +#define FIXADDR_SIZE SZ_32M /* diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 9986ac34b8e2..c76157237e22 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -307,7 +307,7 @@ int opal_secvar_enqueue_update(const char *key, uint64_t key_len, u8 *data, s64 opal_mpipl_update(enum opal_mpipl_ops op, u64 src, u64 dest, u64 size); s64 opal_mpipl_register_tag(enum opal_mpipl_tags tag, u64 addr); -s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, u64 *addr); +s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, __be64 *addr); s64 opal_signal_system_reset(s32 cpu); s64 opal_quiesce(u64 shutdown_type, s32 cpu); diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 00e7e671bb4b..f4c3428e816b 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -43,7 +43,7 @@ struct power_pmu { u64 alt[]); void (*get_mem_data_src)(union perf_mem_data_src *dsrc, u32 flags, struct pt_regs *regs); - void (*get_mem_weight)(u64 *weight); + void (*get_mem_weight)(u64 *weight, u64 type); unsigned long group_constraint_mask; unsigned long group_constraint_val; u64 (*bhrb_filter_map)(u64 branch_sample_type); @@ -67,6 +67,12 @@ struct power_pmu { * the pmu supports extended perf regs capability */ int capabilities; + /* + * Function to check event code for values which are + * reserved. Function takes struct perf_event as input, + * since event code could be spread in attr.config* + */ + int (*check_attr_config)(struct perf_event *ev); }; /* diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 4eed82172e33..c6a676714f04 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -41,8 +41,6 @@ struct mm_struct; #ifndef __ASSEMBLY__ -#include - /* Keep these as a macros to avoid include dependency mess */ #define pte_page(x) pfn_to_page(pte_pfn(x)) #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index ed161ef2b3ca..ac41776661e9 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -265,6 +265,7 @@ #define PPC_INST_ORI 0x60000000 #define PPC_INST_ORIS 0x64000000 #define PPC_INST_BRANCH 0x48000000 +#define PPC_INST_BL 0x48000001 #define PPC_INST_BRANCH_COND 0x40800000 /* Prefixes */ @@ -437,6 +438,9 @@ #define PPC_RAW_STFDX(s, a, b) (0x7c0005ae | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_LVX(t, a, b) (0x7c0000ce | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_STVX(s, a, b) (0x7c0001ce | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_ADDE(t, a, b) (0x7c000114 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_ADDZE(t, a) (0x7c000194 | ___PPC_RT(t) | ___PPC_RA(a)) +#define PPC_RAW_ADDME(t, a) (0x7c0001d4 | ___PPC_RT(t) | ___PPC_RA(a)) #define PPC_RAW_ADD(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_ADD_DOT(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) #define PPC_RAW_ADDC(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) @@ -445,11 +449,14 @@ #define PPC_RAW_BLR() (PPC_INST_BLR) #define PPC_RAW_BLRL() (0x4e800021) #define PPC_RAW_MTLR(r) (0x7c0803a6 | ___PPC_RT(r)) +#define PPC_RAW_MFLR(t) (PPC_INST_MFLR | ___PPC_RT(t)) #define PPC_RAW_BCTR() (PPC_INST_BCTR) #define PPC_RAW_MTCTR(r) (PPC_INST_MTCTR | ___PPC_RT(r)) #define PPC_RAW_ADDI(d, a, i) (PPC_INST_ADDI | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_RAW_LI(r, i) PPC_RAW_ADDI(r, 0, i) #define PPC_RAW_ADDIS(d, a, i) (PPC_INST_ADDIS | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_ADDIC(d, a, i) (0x30000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_ADDIC_DOT(d, a, i) (0x34000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_RAW_LIS(r, i) PPC_RAW_ADDIS(r, 0, i) #define PPC_RAW_STDX(r, base, b) (0x7c00012a | ___PPC_RS(r) | ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_RAW_STDU(r, base, i) (0xf8000001 | ___PPC_RS(r) | ___PPC_RA(base) | ((i) & 0xfffc)) @@ -472,6 +479,10 @@ #define PPC_RAW_CMPLW(a, b) (0x7c000040 | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_CMPLD(a, b) (0x7c200040 | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_SUB(d, a, b) (0x7c000050 | ___PPC_RT(d) | ___PPC_RB(a) | ___PPC_RA(b)) +#define PPC_RAW_SUBFC(d, a, b) (0x7c000010 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_SUBFE(d, a, b) (0x7c000110 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_SUBFIC(d, a, i) (0x20000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_SUBFZE(d, a) (0x7c000190 | ___PPC_RT(d) | ___PPC_RA(a)) #define PPC_RAW_MULD(d, a, b) (0x7c0001d2 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_MULW(d, a, b) (0x7c0001d6 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_MULHWU(d, a, b) (0x7c000016 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) @@ -484,11 +495,13 @@ #define PPC_RAW_DIVDEU_DOT(t, a, b) (0x7c000312 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) #define PPC_RAW_AND(d, a, b) (0x7c000038 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_ANDI(d, a, i) (0x70000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_ANDIS(d, a, i) (0x74000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) #define PPC_RAW_AND_DOT(d, a, b) (0x7c000039 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_OR(d, a, b) (0x7c000378 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_MR(d, a) PPC_RAW_OR(d, a, a) #define PPC_RAW_ORI(d, a, i) (PPC_INST_ORI | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) #define PPC_RAW_ORIS(d, a, i) (PPC_INST_ORIS | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_NOR(d, a, b) (0x7c0000f8 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_XOR(d, a, b) (0x7c000278 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_XORI(d, a, i) (0x68000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) #define PPC_RAW_XORIS(d, a, i) (0x6c000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 3dceb64fc9af..d6739d700f0a 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -15,36 +15,6 @@ #define SZL (BITS_PER_LONG/8) -/* - * Stuff for accurate CPU time accounting. - * These macros handle transitions between user and system state - * in exception entry and exit and accumulate time to the - * user_time and system_time fields in the paca. - */ - -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) -#define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb) -#else -#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) \ - MFTB(ra); /* get timebase */ \ - PPC_LL rb, ACCOUNT_STARTTIME_USER(ptr); \ - PPC_STL ra, ACCOUNT_STARTTIME(ptr); \ - subf rb,rb,ra; /* subtract start value */ \ - PPC_LL ra, ACCOUNT_USER_TIME(ptr); \ - add ra,ra,rb; /* add on to user time */ \ - PPC_STL ra, ACCOUNT_USER_TIME(ptr); \ - -#define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb) \ - MFTB(ra); /* get timebase */ \ - PPC_LL rb, ACCOUNT_STARTTIME(ptr); \ - PPC_STL ra, ACCOUNT_STARTTIME_USER(ptr); \ - subf rb,rb,ra; /* subtract start value */ \ - PPC_LL ra, ACCOUNT_SYSTEM_TIME(ptr); \ - add ra,ra,rb; /* add on to system time */ \ - PPC_STL ra, ACCOUNT_SYSTEM_TIME(ptr) -#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ - /* * Macros for storing registers into and loading registers from * exception frames. diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 8acc3590c971..7bf8a15af224 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -144,15 +144,12 @@ struct thread_struct { #endif #ifdef CONFIG_PPC32 void *pgdir; /* root of page-table tree */ - unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ #ifdef CONFIG_PPC_RTAS unsigned long rtas_sp; /* stack pointer for when in RTAS */ #endif -#endif #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) unsigned long kuap; /* opened segments for user access */ #endif -#ifdef CONFIG_VMAP_STACK unsigned long srr0; unsigned long srr1; unsigned long dar; @@ -161,7 +158,7 @@ struct thread_struct { unsigned long r0, r3, r4, r5, r6, r8, r9, r11; unsigned long lr, ctr; #endif -#endif +#endif /* CONFIG_PPC32 */ /* Debug Registers */ struct debug_reg debug; #ifdef CONFIG_PPC_FPU_REGS @@ -282,7 +279,6 @@ struct thread_struct { #ifdef CONFIG_PPC32 #define INIT_THREAD { \ .ksp = INIT_SP, \ - .ksp_limit = INIT_SP_LIMIT, \ .pgdir = swapper_pg_dir, \ .fpexc_mode = MSR_FE0 | MSR_FE1, \ SPEFSCR_INIT \ @@ -393,6 +389,7 @@ extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val); extern unsigned long isa206_idle_insn_mayloss(unsigned long type); #ifdef CONFIG_PPC_970_NAP extern void power4_idle_nap(void); +void power4_idle_nap_return(void); #endif extern unsigned long cpuidle_disable; @@ -417,6 +414,8 @@ extern int fix_alignment(struct pt_regs *); #define NET_IP_ALIGN 0 #endif +int do_mathemu(struct pt_regs *regs); + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PROCESSOR_H */ diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 1499e928ea6a..9c9ab2746168 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -185,44 +185,27 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) #define current_pt_regs() \ ((struct pt_regs *)((unsigned long)task_stack_page(current) + THREAD_SIZE) - 1) +/* + * The 4 low bits (0xf) are available as flags to overload the trap word, + * because interrupt vectors have minimum alignment of 0x10. TRAP_FLAGS_MASK + * must cover the bits used as flags, including bit 0 which is used as the + * "norestart" bit. + */ #ifdef __powerpc64__ -#ifdef CONFIG_PPC_BOOK3S -#define TRAP_FLAGS_MASK 0x10 -#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) -#define FULL_REGS(regs) true -#define SET_FULL_REGS(regs) do { } while (0) -#else -#define TRAP_FLAGS_MASK 0x11 -#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) -#define FULL_REGS(regs) (((regs)->trap & 1) == 0) -#define SET_FULL_REGS(regs) ((regs)->trap &= ~1) -#endif -#define CHECK_FULL_REGS(regs) BUG_ON(!FULL_REGS(regs)) -#define NV_REG_POISON 0xdeadbeefdeadbeefUL +#define TRAP_FLAGS_MASK 0x1 #else /* - * We use the least-significant bit of the trap field to indicate - * whether we have saved the full set of registers, or only a - * partial set. A 1 there means the partial set. - * On 4xx we use the next bit to indicate whether the exception + * On 4xx we use bit 1 in the trap word to indicate whether the exception * is a critical exception (1 means it is). */ -#define TRAP_FLAGS_MASK 0x1F -#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) -#define FULL_REGS(regs) (((regs)->trap & 1) == 0) -#define SET_FULL_REGS(regs) ((regs)->trap &= ~1) +#define TRAP_FLAGS_MASK 0xf #define IS_CRITICAL_EXC(regs) (((regs)->trap & 2) != 0) #define IS_MCHECK_EXC(regs) (((regs)->trap & 4) != 0) #define IS_DEBUG_EXC(regs) (((regs)->trap & 8) != 0) -#define NV_REG_POISON 0xdeadbeef -#define CHECK_FULL_REGS(regs) \ -do { \ - if ((regs)->trap & 1) \ - printk(KERN_CRIT "%s: partial register set\n", __func__); \ -} while (0) #endif /* __powerpc64__ */ +#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) -static inline void set_trap(struct pt_regs *regs, unsigned long val) +static __always_inline void set_trap(struct pt_regs *regs, unsigned long val) { regs->trap = (regs->trap & TRAP_FLAGS_MASK) | (val & ~TRAP_FLAGS_MASK); } @@ -244,12 +227,12 @@ static inline bool trap_is_syscall(struct pt_regs *regs) static inline bool trap_norestart(struct pt_regs *regs) { - return regs->trap & 0x10; + return regs->trap & 0x1; } -static inline void set_trap_norestart(struct pt_regs *regs) +static __always_inline void set_trap_norestart(struct pt_regs *regs) { - regs->trap |= 0x10; + regs->trap |= 0x1; } #define arch_has_single_step() (1) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index b752d34517b3..07318bc63e3d 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -44,20 +44,6 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) } #define queued_spin_lock queued_spin_lock -#define smp_mb__after_spinlock() smp_mb() - -static __always_inline int queued_spin_is_locked(struct qspinlock *lock) -{ - /* - * This barrier was added to simple spinlocks by commit 51d7d5205d338, - * but it should now be possible to remove it, asm arm64 has done with - * commit c6f5d02b6a0f. - */ - smp_mb(); - return atomic_read(&lock->val); -} -#define queued_spin_is_locked queued_spin_is_locked - #ifdef CONFIG_PARAVIRT_SPINLOCKS #define SPIN_THRESHOLD (1<<15) /* not tuned */ @@ -86,6 +72,13 @@ static inline void pv_spinlocks_init(void) #endif +/* + * Queued spinlocks rely heavily on smp_cond_load_relaxed() to busy-wait, + * which was found to have performance problems if implemented with + * the preferred spin_begin()/spin_end() SMT priority pattern. Use the + * generic version instead. + */ + #include #endif /* _ASM_POWERPC_QSPINLOCK_H */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index da103e92c112..7c81d3e563b2 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -124,7 +124,7 @@ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM #define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */ #else -#define MSR_TM_ACTIVE(x) 0 +#define MSR_TM_ACTIVE(x) ((void)(x), 0) #endif #if defined(CONFIG_PPC_BOOK3S_64) @@ -441,6 +441,7 @@ #define LPCR_VRMA_LP1 ASM_CONST(0x0000800000000000) #define LPCR_RMLS 0x1C000000 /* Implementation dependent RMO limit sel */ #define LPCR_RMLS_SH 26 +#define LPCR_HAIL ASM_CONST(0x0000000004000000) /* HV AIL (ISAv3.1) */ #define LPCR_ILE ASM_CONST(0x0000000002000000) /* !HV irqs set MSR:LE */ #define LPCR_AIL ASM_CONST(0x0000000001800000) /* Alternate interrupt location */ #define LPCR_AIL_0 ASM_CONST(0x0000000000000000) /* MMU off exception offset 0x0 */ @@ -1393,8 +1394,7 @@ static inline void mtmsr_isync(unsigned long val) : "r" ((unsigned long)(v)) \ : "memory") #endif -#define wrtspr(rn) asm volatile("mtspr " __stringify(rn) ",0" : \ - : : "memory") +#define wrtspr(rn) asm volatile("mtspr " __stringify(rn) ",2" : : : "memory") static inline void wrtee(unsigned long val) { diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 658448ca5b8a..9dc97d2f9d27 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -19,8 +19,8 @@ #define RTAS_UNKNOWN_SERVICE (-1) #define RTAS_INSTANTIATE_MAX (1ULL<<30) /* Don't instantiate rtas at/above this value */ -/* Buffer size for ppc_rtas system call. */ -#define RTAS_RMOBUF_MAX (64 * 1024) +/* Memory set aside for sys_rtas to use with calls that need a work area. */ +#define RTAS_USER_REGION_SIZE (64 * 1024) /* RTAS return status codes */ #define RTAS_BUSY -2 /* RTAS Busy */ @@ -357,7 +357,7 @@ extern void rtas_take_timebase(void); static inline int page_is_rtas_user_buf(unsigned long pfn) { unsigned long paddr = (pfn << PAGE_SHIFT); - if (paddr >= rtas_rmo_buf && paddr < (rtas_rmo_buf + RTAS_RMOBUF_MAX)) + if (paddr >= rtas_rmo_buf && paddr < (rtas_rmo_buf + RTAS_USER_REGION_SIZE)) return 1; return 0; } diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h index 5b862de29dff..552f325412cc 100644 --- a/arch/powerpc/include/asm/simple_spinlock.h +++ b/arch/powerpc/include/asm/simple_spinlock.h @@ -38,8 +38,7 @@ static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - smp_mb(); - return !arch_spin_value_unlocked(*lock); + return !arch_spin_value_unlocked(READ_ONCE(*lock)); } /* @@ -282,7 +281,4 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) #define arch_read_relax(lock) rw_yield(lock) #define arch_write_relax(lock) rw_yield(lock) -/* See include/linux/spinlock.h */ -#define smp_mb__after_spinlock() smp_mb() - #endif /* _ASM_POWERPC_SIMPLE_SPINLOCK_H */ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 7a13bc20f0a0..03b3d010cbab 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -31,6 +31,7 @@ extern u32 *cpu_to_phys_id; extern bool coregroup_enabled; extern int cpu_to_chip_id(int cpu); +extern int *chip_id_lookup_table; #ifdef CONFIG_SMP @@ -121,6 +122,11 @@ static inline struct cpumask *cpu_sibling_mask(int cpu) return per_cpu(cpu_sibling_map, cpu); } +static inline struct cpumask *cpu_core_mask(int cpu) +{ + return per_cpu(cpu_core_map, cpu); +} + static inline struct cpumask *cpu_l2_cache_mask(int cpu) { return per_cpu(cpu_l2_cache_map, cpu); diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 6ec72282888d..bd75872a6334 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -10,6 +10,9 @@ #include #endif +/* See include/linux/spinlock.h */ +#define smp_mb__after_spinlock() smp_mb() + #ifndef CONFIG_PARAVIRT_SPINLOCKS static inline void pv_spinlocks_init(void) { } #endif diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 386d576673a1..b4ec6c7dd72e 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -38,7 +38,6 @@ #ifndef __ASSEMBLY__ #include #include -#include #include #define SLB_PRELOAD_NR 16U @@ -152,6 +151,12 @@ void arch_setup_new_exec(void); #ifndef __ASSEMBLY__ +static inline void clear_thread_local_flags(unsigned int flags) +{ + struct thread_info *ti = current_thread_info(); + ti->local_flags &= ~flags; +} + static inline bool test_thread_local_flags(unsigned int flags) { struct thread_info *ti = current_thread_info(); diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 3beeb030cd78..e4db64c0e184 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -126,7 +126,7 @@ static inline int cpu_to_coregroup_id(int cpu) #define topology_physical_package_id(cpu) (cpu_to_chip_id(cpu)) #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) -#define topology_core_cpumask(cpu) (cpu_cpu_mask(cpu)) +#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_core_id(cpu) (cpu_to_core_id(cpu)) #endif diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 78e2a3990eab..a09e4240c5b1 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -43,129 +43,39 @@ static inline bool __access_ok(unsigned long addr, unsigned long size) * exception handling means that it's no longer "just"...) * */ -#define get_user(x, ptr) \ - __get_user_check((x), (ptr), sizeof(*(ptr))) -#define put_user(x, ptr) \ - __put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) - -#define __get_user(x, ptr) \ - __get_user_nocheck((x), (ptr), sizeof(*(ptr)), true) -#define __put_user(x, ptr) \ - __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) - -#define __get_user_allowed(x, ptr) \ - __get_user_nocheck((x), (ptr), sizeof(*(ptr)), false) - -#define __get_user_inatomic(x, ptr) \ - __get_user_nosleep((x), (ptr), sizeof(*(ptr))) -#define __put_user_inatomic(x, ptr) \ - __put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) - -#ifdef CONFIG_PPC64 - -#define ___get_user_instr(gu_op, dest, ptr) \ -({ \ - long __gui_ret = 0; \ - unsigned long __gui_ptr = (unsigned long)ptr; \ - struct ppc_inst __gui_inst; \ - unsigned int __prefix, __suffix; \ - __gui_ret = gu_op(__prefix, (unsigned int __user *)__gui_ptr); \ - if (__gui_ret == 0) { \ - if ((__prefix >> 26) == OP_PREFIX) { \ - __gui_ret = gu_op(__suffix, \ - (unsigned int __user *)__gui_ptr + 1); \ - __gui_inst = ppc_inst_prefix(__prefix, \ - __suffix); \ - } else { \ - __gui_inst = ppc_inst(__prefix); \ - } \ - if (__gui_ret == 0) \ - (dest) = __gui_inst; \ - } \ - __gui_ret; \ -}) - -#define get_user_instr(x, ptr) \ - ___get_user_instr(get_user, x, ptr) - -#define __get_user_instr(x, ptr) \ - ___get_user_instr(__get_user, x, ptr) - -#define __get_user_instr_inatomic(x, ptr) \ - ___get_user_instr(__get_user_inatomic, x, ptr) - -#else /* !CONFIG_PPC64 */ -#define get_user_instr(x, ptr) \ - get_user((x).val, (u32 __user *)(ptr)) - -#define __get_user_instr(x, ptr) \ - __get_user_nocheck((x).val, (u32 __user *)(ptr), sizeof(u32), true) - -#define __get_user_instr_inatomic(x, ptr) \ - __get_user_nosleep((x).val, (u32 __user *)(ptr), sizeof(u32)) - -#endif /* CONFIG_PPC64 */ - -extern long __put_user_bad(void); - -#define __put_user_size(x, ptr, size, retval) \ -do { \ - __label__ __pu_failed; \ +#define __put_user(x, ptr) \ +({ \ + long __pu_err; \ + __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ + __typeof__(*(ptr)) __pu_val = (__typeof__(*(ptr)))(x); \ + __typeof__(sizeof(*(ptr))) __pu_size = sizeof(*(ptr)); \ \ - retval = 0; \ - allow_write_to_user(ptr, size); \ - __put_user_size_goto(x, ptr, size, __pu_failed); \ - prevent_write_to_user(ptr, size); \ - break; \ + might_fault(); \ + do { \ + __label__ __pu_failed; \ + \ + allow_write_to_user(__pu_addr, __pu_size); \ + __put_user_size_goto(__pu_val, __pu_addr, __pu_size, __pu_failed); \ + prevent_write_to_user(__pu_addr, __pu_size); \ + __pu_err = 0; \ + break; \ \ __pu_failed: \ - retval = -EFAULT; \ - prevent_write_to_user(ptr, size); \ -} while (0) - -#define __put_user_nocheck(x, ptr, size) \ -({ \ - long __pu_err; \ - __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - __typeof__(*(ptr)) __pu_val = (x); \ - __typeof__(size) __pu_size = (size); \ - \ - if (!is_kernel_addr((unsigned long)__pu_addr)) \ - might_fault(); \ - __chk_user_ptr(__pu_addr); \ - __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ + prevent_write_to_user(__pu_addr, __pu_size); \ + __pu_err = -EFAULT; \ + } while (0); \ \ __pu_err; \ }) -#define __put_user_check(x, ptr, size) \ +#define put_user(x, ptr) \ ({ \ - long __pu_err = -EFAULT; \ - __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - __typeof__(*(ptr)) __pu_val = (x); \ - __typeof__(size) __pu_size = (size); \ + __typeof__(*(ptr)) __user *_pu_addr = (ptr); \ \ - might_fault(); \ - if (access_ok(__pu_addr, __pu_size)) \ - __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ - \ - __pu_err; \ + access_ok(_pu_addr, sizeof(*(ptr))) ? \ + __put_user(x, _pu_addr) : -EFAULT; \ }) -#define __put_user_nosleep(x, ptr, size) \ -({ \ - long __pu_err; \ - __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - __typeof__(*(ptr)) __pu_val = (x); \ - __typeof__(size) __pu_size = (size); \ - \ - __chk_user_ptr(__pu_addr); \ - __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ - \ - __pu_err; \ -}) - - /* * We don't tell gcc that we are accessing memory, but this is OK * because we do not write to any memory gcc knows about, so there @@ -198,25 +108,17 @@ __pu_failed: \ #define __put_user_size_goto(x, ptr, size, label) \ do { \ + __typeof__(*(ptr)) __user *__pus_addr = (ptr); \ + \ switch (size) { \ - case 1: __put_user_asm_goto(x, ptr, label, "stb"); break; \ - case 2: __put_user_asm_goto(x, ptr, label, "sth"); break; \ - case 4: __put_user_asm_goto(x, ptr, label, "stw"); break; \ - case 8: __put_user_asm2_goto(x, ptr, label); break; \ - default: __put_user_bad(); \ + case 1: __put_user_asm_goto(x, __pus_addr, label, "stb"); break; \ + case 2: __put_user_asm_goto(x, __pus_addr, label, "sth"); break; \ + case 4: __put_user_asm_goto(x, __pus_addr, label, "stw"); break; \ + case 8: __put_user_asm2_goto(x, __pus_addr, label); break; \ + default: BUILD_BUG(); \ } \ } while (0) -#define __unsafe_put_user_goto(x, ptr, size, label) \ -do { \ - __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - __chk_user_ptr(ptr); \ - __put_user_size_goto((x), __pu_addr, (size), label); \ -} while (0) - - -extern long __get_user_bad(void); - /* * This does an atomic 128 byte aligned load from userspace. * Upto caller to do enable_kernel_vmx() before calling! @@ -234,6 +136,59 @@ extern long __get_user_bad(void); : "=r" (err) \ : "b" (uaddr), "b" (kaddr), "i" (-EFAULT), "0" (err)) +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +#define __get_user_asm_goto(x, addr, label, op) \ + asm_volatile_goto( \ + "1: "op"%U1%X1 %0, %1 # get_user\n" \ + EX_TABLE(1b, %l2) \ + : "=r" (x) \ + : "m"UPD_CONSTR (*addr) \ + : \ + : label) + +#ifdef __powerpc64__ +#define __get_user_asm2_goto(x, addr, label) \ + __get_user_asm_goto(x, addr, label, "ld") +#else /* __powerpc64__ */ +#define __get_user_asm2_goto(x, addr, label) \ + asm_volatile_goto( \ + "1: lwz%X1 %0, %1\n" \ + "2: lwz%X1 %L0, %L1\n" \ + EX_TABLE(1b, %l2) \ + EX_TABLE(2b, %l2) \ + : "=r" (x) \ + : "m" (*addr) \ + : \ + : label) +#endif /* __powerpc64__ */ + +#define __get_user_size_goto(x, ptr, size, label) \ +do { \ + BUILD_BUG_ON(size > sizeof(x)); \ + switch (size) { \ + case 1: __get_user_asm_goto(x, (u8 __user *)ptr, label, "lbz"); break; \ + case 2: __get_user_asm_goto(x, (u16 __user *)ptr, label, "lhz"); break; \ + case 4: __get_user_asm_goto(x, (u32 __user *)ptr, label, "lwz"); break; \ + case 8: __get_user_asm2_goto(x, (u64 __user *)ptr, label); break; \ + default: x = 0; BUILD_BUG(); \ + } \ +} while (0) + +#define __get_user_size_allowed(x, ptr, size, retval) \ +do { \ + __label__ __gus_failed; \ + \ + __get_user_size_goto(x, ptr, size, __gus_failed); \ + retval = 0; \ + break; \ +__gus_failed: \ + x = 0; \ + retval = -EFAULT; \ +} while (0) + +#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + #define __get_user_asm(x, addr, err, op) \ __asm__ __volatile__( \ "1: "op"%U2%X2 %1, %2 # get_user\n" \ @@ -271,25 +226,27 @@ extern long __get_user_bad(void); #define __get_user_size_allowed(x, ptr, size, retval) \ do { \ retval = 0; \ - __chk_user_ptr(ptr); \ - if (size > sizeof(x)) \ - (x) = __get_user_bad(); \ + BUILD_BUG_ON(size > sizeof(x)); \ switch (size) { \ case 1: __get_user_asm(x, (u8 __user *)ptr, retval, "lbz"); break; \ case 2: __get_user_asm(x, (u16 __user *)ptr, retval, "lhz"); break; \ case 4: __get_user_asm(x, (u32 __user *)ptr, retval, "lwz"); break; \ case 8: __get_user_asm2(x, (u64 __user *)ptr, retval); break; \ - default: (x) = __get_user_bad(); \ + default: x = 0; BUILD_BUG(); \ } \ } while (0) -#define __get_user_size(x, ptr, size, retval) \ +#define __get_user_size_goto(x, ptr, size, label) \ do { \ - allow_read_from_user(ptr, size); \ - __get_user_size_allowed(x, ptr, size, retval); \ - prevent_read_from_user(ptr, size); \ + long __gus_retval; \ + \ + __get_user_size_allowed(x, ptr, size, __gus_retval); \ + if (__gus_retval) \ + goto label; \ } while (0) +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + /* * This is a type: either unsigned long, if the argument fits into * that type, or otherwise unsigned long long. @@ -297,86 +254,36 @@ do { \ #define __long_type(x) \ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) -#define __get_user_nocheck(x, ptr, size, do_allow) \ +#define __get_user(x, ptr) \ ({ \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - __typeof__(size) __gu_size = (size); \ + __typeof__(sizeof(*(ptr))) __gu_size = sizeof(*(ptr)); \ \ - __chk_user_ptr(__gu_addr); \ - if (do_allow && !is_kernel_addr((unsigned long)__gu_addr)) \ - might_fault(); \ - if (do_allow) \ - __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ - else \ - __get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err); \ + might_fault(); \ + allow_read_from_user(__gu_addr, __gu_size); \ + __get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err); \ + prevent_read_from_user(__gu_addr, __gu_size); \ (x) = (__typeof__(*(ptr)))__gu_val; \ \ __gu_err; \ }) -#define __get_user_check(x, ptr, size) \ +#define get_user(x, ptr) \ ({ \ - long __gu_err = -EFAULT; \ - __long_type(*(ptr)) __gu_val = 0; \ - __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - __typeof__(size) __gu_size = (size); \ + __typeof__(*(ptr)) __user *_gu_addr = (ptr); \ \ - might_fault(); \ - if (access_ok(__gu_addr, __gu_size)) \ - __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ - \ - __gu_err; \ + access_ok(_gu_addr, sizeof(*(ptr))) ? \ + __get_user(x, _gu_addr) : \ + ((x) = (__force __typeof__(*(ptr)))0, -EFAULT); \ }) -#define __get_user_nosleep(x, ptr, size) \ -({ \ - long __gu_err; \ - __long_type(*(ptr)) __gu_val; \ - __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - __typeof__(size) __gu_size = (size); \ - \ - __chk_user_ptr(__gu_addr); \ - __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ - \ - __gu_err; \ -}) - - /* more complex routines */ extern unsigned long __copy_tofrom_user(void __user *to, const void __user *from, unsigned long size); -#ifdef CONFIG_ARCH_HAS_COPY_MC -unsigned long __must_check -copy_mc_generic(void *to, const void *from, unsigned long size); - -static inline unsigned long __must_check -copy_mc_to_kernel(void *to, const void *from, unsigned long size) -{ - return copy_mc_generic(to, from, size); -} -#define copy_mc_to_kernel copy_mc_to_kernel - -static inline unsigned long __must_check -copy_mc_to_user(void __user *to, const void *from, unsigned long n) -{ - if (likely(check_copy_size(from, n, true))) { - if (access_ok(to, n)) { - allow_write_to_user(to, n); - n = copy_mc_generic((void *)to, from, n); - prevent_write_to_user(to, n); - } - } - - return n; -} -#endif - #ifdef __powerpc64__ static inline unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) @@ -414,26 +321,51 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) unsigned long __arch_clear_user(void __user *addr, unsigned long size); -static inline unsigned long clear_user(void __user *addr, unsigned long size) +static inline unsigned long __clear_user(void __user *addr, unsigned long size) { - unsigned long ret = size; + unsigned long ret; + might_fault(); - if (likely(access_ok(addr, size))) { - allow_write_to_user(addr, size); - ret = __arch_clear_user(addr, size); - prevent_write_to_user(addr, size); - } + allow_write_to_user(addr, size); + ret = __arch_clear_user(addr, size); + prevent_write_to_user(addr, size); return ret; } -static inline unsigned long __clear_user(void __user *addr, unsigned long size) +static inline unsigned long clear_user(void __user *addr, unsigned long size) { - return clear_user(addr, size); + return likely(access_ok(addr, size)) ? __clear_user(addr, size) : size; } extern long strncpy_from_user(char *dst, const char __user *src, long count); extern __must_check long strnlen_user(const char __user *str, long n); +#ifdef CONFIG_ARCH_HAS_COPY_MC +unsigned long __must_check +copy_mc_generic(void *to, const void *from, unsigned long size); + +static inline unsigned long __must_check +copy_mc_to_kernel(void *to, const void *from, unsigned long size) +{ + return copy_mc_generic(to, from, size); +} +#define copy_mc_to_kernel copy_mc_to_kernel + +static inline unsigned long __must_check +copy_mc_to_user(void __user *to, const void *from, unsigned long n) +{ + if (likely(check_copy_size(from, n, true))) { + if (access_ok(to, n)) { + allow_write_to_user(to, n); + n = copy_mc_generic((void *)to, from, n); + prevent_write_to_user(to, n); + } + } + + return n; +} +#endif + extern long __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size); extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset, @@ -482,10 +414,37 @@ user_write_access_begin(const void __user *ptr, size_t len) #define user_write_access_begin user_write_access_begin #define user_write_access_end prevent_current_write_to_user -#define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0) -#define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e) +#define unsafe_get_user(x, p, e) do { \ + __long_type(*(p)) __gu_val; \ + __typeof__(*(p)) __user *__gu_addr = (p); \ + \ + __get_user_size_goto(__gu_val, __gu_addr, sizeof(*(p)), e); \ + (x) = (__typeof__(*(p)))__gu_val; \ +} while (0) + #define unsafe_put_user(x, p, e) \ - __unsafe_put_user_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e) + __put_user_size_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e) + +#define unsafe_copy_from_user(d, s, l, e) \ +do { \ + u8 *_dst = (u8 *)(d); \ + const u8 __user *_src = (const u8 __user *)(s); \ + size_t _len = (l); \ + int _i; \ + \ + for (_i = 0; _i < (_len & ~(sizeof(u64) - 1)); _i += sizeof(u64)) \ + unsafe_get_user(*(u64 *)(_dst + _i), (u64 __user *)(_src + _i), e); \ + if (_len & 4) { \ + unsafe_get_user(*(u32 *)(_dst + _i), (u32 __user *)(_src + _i), e); \ + _i += 4; \ + } \ + if (_len & 2) { \ + unsafe_get_user(*(u16 *)(_dst + _i), (u16 __user *)(_src + _i), e); \ + _i += 2; \ + } \ + if (_len & 1) \ + unsafe_get_user(*(u8 *)(_dst + _i), (u8 __user *)(_src + _i), e); \ +} while (0) #define unsafe_copy_to_user(d, s, l, e) \ do { \ @@ -494,9 +453,9 @@ do { \ size_t _len = (l); \ int _i; \ \ - for (_i = 0; _i < (_len & ~(sizeof(long) - 1)); _i += sizeof(long)) \ - unsafe_put_user(*(long*)(_src + _i), (long __user *)(_dst + _i), e); \ - if (IS_ENABLED(CONFIG_PPC64) && (_len & 4)) { \ + for (_i = 0; _i < (_len & ~(sizeof(u64) - 1)); _i += sizeof(u64)) \ + unsafe_put_user(*(u64 *)(_src + _i), (u64 __user *)(_dst + _i), e); \ + if (_len & 4) { \ unsafe_put_user(*(u32*)(_src + _i), (u32 __user *)(_dst + _i), e); \ _i += 4; \ } \ @@ -511,14 +470,8 @@ do { \ #define HAVE_GET_KERNEL_NOFAULT #define __get_kernel_nofault(dst, src, type, err_label) \ -do { \ - int __kr_err; \ - \ - __get_user_size_allowed(*((type *)(dst)), (__force type __user *)(src),\ - sizeof(type), __kr_err); \ - if (unlikely(__kr_err)) \ - goto err_label; \ -} while (0) + __get_user_size_goto(*((type *)(dst)), \ + (__force type __user *)(src), sizeof(type), err_label) #define __put_kernel_nofault(dst, src, type, err_label) \ __put_user_size_goto(*((type *)(src)), \ diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 700fcdac2e3c..b541c690a31c 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -40,6 +40,7 @@ #define __ARCH_WANT_SYS_SIGPROCMASK #ifdef CONFIG_PPC32 #define __ARCH_WANT_OLD_STAT +#define __ARCH_WANT_SYS_OLD_SELECT #endif #ifdef CONFIG_PPC64 #define __ARCH_WANT_SYS_TIME diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index 77c635c2c90d..1faff0be1111 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -2,6 +2,8 @@ #ifndef _ASM_POWERPC_VDSO_GETTIMEOFDAY_H #define _ASM_POWERPC_VDSO_GETTIMEOFDAY_H +#include + #ifdef __ASSEMBLY__ #include @@ -154,6 +156,14 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *__arch_get_vdso_data(void); +#ifdef CONFIG_TIME_NS +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) +{ + return (void *)vd + PAGE_SIZE; +} +#endif + static inline bool vdso_clocksource_ok(const struct vdso_data *vd) { return true; diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index 3f958ecf2beb..a585c8e538ff 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -107,9 +107,7 @@ extern struct vdso_arch_data *vdso_data; bcl 20, 31, .+4 999: mflr \ptr -#if CONFIG_PPC_PAGE_SHIFT > 14 addis \ptr, \ptr, (_vdso_datapage - 999b)@ha -#endif addi \ptr, \ptr, (_vdso_datapage - 999b)@l .endm diff --git a/arch/powerpc/include/asm/vio.h b/arch/powerpc/include/asm/vio.h index 721c0d6715ac..e7479a4abf96 100644 --- a/arch/powerpc/include/asm/vio.h +++ b/arch/powerpc/include/asm/vio.h @@ -114,6 +114,7 @@ struct vio_driver { const struct vio_device_id *id_table; int (*probe)(struct vio_dev *dev, const struct vio_device_id *id); void (*remove)(struct vio_dev *dev); + void (*shutdown)(struct vio_dev *dev); /* A driver must have a get_desired_dma() function to * be loaded in a CMO environment if it uses DMA. */ diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 9a312b975ca8..aa094a8655b0 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -102,6 +102,7 @@ void xive_flush_interrupt(void); /* xmon hook */ void xmon_xive_do_dump(int cpu); int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d); +void xmon_xive_get_irq_all(void); /* APIs used by KVM */ u32 xive_native_default_eq_shift(void); diff --git a/arch/powerpc/include/uapi/asm/errno.h b/arch/powerpc/include/uapi/asm/errno.h index cc79856896a1..4ba87de32be0 100644 --- a/arch/powerpc/include/uapi/asm/errno.h +++ b/arch/powerpc/include/uapi/asm/errno.h @@ -2,6 +2,7 @@ #ifndef _ASM_POWERPC_ERRNO_H #define _ASM_POWERPC_ERRNO_H +#undef EDEADLOCK #include #undef EDEADLOCK diff --git a/arch/powerpc/include/uapi/asm/posix_types.h b/arch/powerpc/include/uapi/asm/posix_types.h index f698400e4bb0..9c0342312544 100644 --- a/arch/powerpc/include/uapi/asm/posix_types.h +++ b/arch/powerpc/include/uapi/asm/posix_types.h @@ -12,11 +12,6 @@ typedef unsigned long __kernel_old_dev_t; #define __kernel_old_dev_t __kernel_old_dev_t #else -typedef unsigned int __kernel_size_t; -typedef int __kernel_ssize_t; -typedef long __kernel_ptrdiff_t; -#define __kernel_size_t __kernel_size_t - typedef short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t #endif diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index c7797eb958c7..bbb4181621dd 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -107,7 +107,6 @@ static struct aligninfo spe_aligninfo[32] = { static int emulate_spe(struct pt_regs *regs, unsigned int reg, struct ppc_inst ppc_instr) { - int ret; union { u64 ll; u32 w[2]; @@ -127,11 +126,6 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, nb = spe_aligninfo[instr].len; flags = spe_aligninfo[instr].flags; - /* Verify the address of the operand */ - if (unlikely(user_mode(regs) && - !access_ok(addr, nb))) - return -EFAULT; - /* userland only */ if (unlikely(!user_mode(regs))) return 0; @@ -169,26 +163,27 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, } } else { temp.ll = data.ll = 0; - ret = 0; p = addr; + if (!user_read_access_begin(addr, nb)) + return -EFAULT; + switch (nb) { case 8: - ret |= __get_user_inatomic(temp.v[0], p++); - ret |= __get_user_inatomic(temp.v[1], p++); - ret |= __get_user_inatomic(temp.v[2], p++); - ret |= __get_user_inatomic(temp.v[3], p++); + unsafe_get_user(temp.v[0], p++, Efault_read); + unsafe_get_user(temp.v[1], p++, Efault_read); + unsafe_get_user(temp.v[2], p++, Efault_read); + unsafe_get_user(temp.v[3], p++, Efault_read); fallthrough; case 4: - ret |= __get_user_inatomic(temp.v[4], p++); - ret |= __get_user_inatomic(temp.v[5], p++); + unsafe_get_user(temp.v[4], p++, Efault_read); + unsafe_get_user(temp.v[5], p++, Efault_read); fallthrough; case 2: - ret |= __get_user_inatomic(temp.v[6], p++); - ret |= __get_user_inatomic(temp.v[7], p++); - if (unlikely(ret)) - return -EFAULT; + unsafe_get_user(temp.v[6], p++, Efault_read); + unsafe_get_user(temp.v[7], p++, Efault_read); } + user_read_access_end(); switch (instr) { case EVLDD: @@ -255,31 +250,41 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, /* Store result to memory or update registers */ if (flags & ST) { - ret = 0; p = addr; + + if (!user_write_access_begin(addr, nb)) + return -EFAULT; + switch (nb) { case 8: - ret |= __put_user_inatomic(data.v[0], p++); - ret |= __put_user_inatomic(data.v[1], p++); - ret |= __put_user_inatomic(data.v[2], p++); - ret |= __put_user_inatomic(data.v[3], p++); + unsafe_put_user(data.v[0], p++, Efault_write); + unsafe_put_user(data.v[1], p++, Efault_write); + unsafe_put_user(data.v[2], p++, Efault_write); + unsafe_put_user(data.v[3], p++, Efault_write); fallthrough; case 4: - ret |= __put_user_inatomic(data.v[4], p++); - ret |= __put_user_inatomic(data.v[5], p++); + unsafe_put_user(data.v[4], p++, Efault_write); + unsafe_put_user(data.v[5], p++, Efault_write); fallthrough; case 2: - ret |= __put_user_inatomic(data.v[6], p++); - ret |= __put_user_inatomic(data.v[7], p++); + unsafe_put_user(data.v[6], p++, Efault_write); + unsafe_put_user(data.v[7], p++, Efault_write); } - if (unlikely(ret)) - return -EFAULT; + user_write_access_end(); } else { *evr = data.w[0]; regs->gpr[reg] = data.w[1]; } return 1; + +Efault_read: + user_read_access_end(); + return -EFAULT; + +Efault_write: + user_write_access_end(); + return -EFAULT; } #endif /* CONFIG_SPE */ @@ -299,13 +304,12 @@ int fix_alignment(struct pt_regs *regs) struct instruction_op op; int r, type; - /* - * We require a complete register set, if not, then our assembly - * is broken - */ - CHECK_FULL_REGS(regs); + if (is_kernel_addr(regs->nip)) + r = copy_inst_from_kernel_nofault(&instr, (void *)regs->nip); + else + r = __get_user_instr(instr, (void __user *)regs->nip); - if (unlikely(__get_user_instr(instr, (void __user *)regs->nip))) + if (unlikely(r)) return -EFAULT; if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) { /* We don't handle PPC little-endian any more... */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f3a662201a9f..28af4efb4587 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -91,7 +91,6 @@ int main(void) DEFINE(SIGSEGV, SIGSEGV); DEFINE(NMI_MASK, NMI_MASK); #else - OFFSET(KSP_LIMIT, thread_struct, ksp_limit); #ifdef CONFIG_PPC_RTAS OFFSET(RTAS_SP, thread_struct, rtas_sp); #endif @@ -132,7 +131,6 @@ int main(void) OFFSET(KSP_VSID, thread_struct, ksp_vsid); #else /* CONFIG_PPC64 */ OFFSET(PGDIR, thread_struct, pgdir); -#ifdef CONFIG_VMAP_STACK OFFSET(SRR0, thread_struct, srr0); OFFSET(SRR1, thread_struct, srr1); OFFSET(DAR, thread_struct, dar); @@ -149,7 +147,6 @@ int main(void) OFFSET(THLR, thread_struct, lr); OFFSET(THCTR, thread_struct, ctr); #endif -#endif #ifdef CONFIG_SPE OFFSET(THREAD_EVR0, thread_struct, evr[0]); OFFSET(THREAD_ACC, thread_struct, acc); @@ -285,21 +282,11 @@ int main(void) OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); OFFSET(PACA_DSCR_DEFAULT, paca_struct, dscr_default); - OFFSET(ACCOUNT_STARTTIME, paca_struct, accounting.starttime); - OFFSET(ACCOUNT_STARTTIME_USER, paca_struct, accounting.starttime_user); - OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime); - OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime); #ifdef CONFIG_PPC_BOOK3E OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save); #endif OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso); #else /* CONFIG_PPC64 */ -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - OFFSET(ACCOUNT_STARTTIME, thread_info, accounting.starttime); - OFFSET(ACCOUNT_STARTTIME_USER, thread_info, accounting.starttime_user); - OFFSET(ACCOUNT_USER_TIME, thread_info, accounting.utime); - OFFSET(ACCOUNT_SYSTEM_TIME, thread_info, accounting.stime); -#endif #endif /* CONFIG_PPC64 */ /* RTAS */ @@ -323,9 +310,6 @@ int main(void) STACK_PT_REGS_OFFSET(GPR11, gpr[11]); STACK_PT_REGS_OFFSET(GPR12, gpr[12]); STACK_PT_REGS_OFFSET(GPR13, gpr[13]); -#ifndef CONFIG_PPC64 - STACK_PT_REGS_OFFSET(GPR14, gpr[14]); -#endif /* CONFIG_PPC64 */ /* * Note: these symbols include _ because they overlap with special * register names @@ -381,7 +365,6 @@ int main(void) DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1)); DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0)); DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1)); - DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit)); #endif #endif diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index cd60bc1c8701..f24cd53ff26e 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -362,14 +362,11 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) pa = pte_pfn(*ptep); /* On radix we can do hugepage mappings for io, so handle that */ - if (hugepage_shift) { - pa <<= hugepage_shift; - pa |= token & ((1ul << hugepage_shift) - 1); - } else { - pa <<= PAGE_SHIFT; - pa |= token & (PAGE_SIZE - 1); - } + if (!hugepage_shift) + hugepage_shift = PAGE_SHIFT; + pa <<= PAGE_SHIFT; + pa |= token & ((1ul << hugepage_shift) - 1); return pa; } @@ -779,7 +776,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat default: eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true); return -EINVAL; - }; + } return 0; } @@ -1568,6 +1565,7 @@ int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, } EXPORT_SYMBOL_GPL(eeh_pe_inject_err); +#ifdef CONFIG_PROC_FS static int proc_eeh_show(struct seq_file *m, void *v) { if (!eeh_enabled()) { @@ -1594,6 +1592,7 @@ static int proc_eeh_show(struct seq_file *m, void *v) return 0; } +#endif /* CONFIG_PROC_FS */ #ifdef CONFIG_DEBUG_FS diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 78c430b7f9d9..9160285cb2f4 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -48,195 +48,16 @@ */ .align 12 -#ifdef CONFIG_BOOKE - .globl mcheck_transfer_to_handler -mcheck_transfer_to_handler: - mfspr r0,SPRN_DSRR0 - stw r0,_DSRR0(r11) - mfspr r0,SPRN_DSRR1 - stw r0,_DSRR1(r11) - /* fall through */ -_ASM_NOKPROBE_SYMBOL(mcheck_transfer_to_handler) - - .globl debug_transfer_to_handler -debug_transfer_to_handler: - mfspr r0,SPRN_CSRR0 - stw r0,_CSRR0(r11) - mfspr r0,SPRN_CSRR1 - stw r0,_CSRR1(r11) - /* fall through */ -_ASM_NOKPROBE_SYMBOL(debug_transfer_to_handler) - - .globl crit_transfer_to_handler -crit_transfer_to_handler: -#ifdef CONFIG_PPC_BOOK3E_MMU - mfspr r0,SPRN_MAS0 - stw r0,MAS0(r11) - mfspr r0,SPRN_MAS1 - stw r0,MAS1(r11) - mfspr r0,SPRN_MAS2 - stw r0,MAS2(r11) - mfspr r0,SPRN_MAS3 - stw r0,MAS3(r11) - mfspr r0,SPRN_MAS6 - stw r0,MAS6(r11) -#ifdef CONFIG_PHYS_64BIT - mfspr r0,SPRN_MAS7 - stw r0,MAS7(r11) -#endif /* CONFIG_PHYS_64BIT */ -#endif /* CONFIG_PPC_BOOK3E_MMU */ -#ifdef CONFIG_44x - mfspr r0,SPRN_MMUCR - stw r0,MMUCR(r11) -#endif - mfspr r0,SPRN_SRR0 - stw r0,_SRR0(r11) - mfspr r0,SPRN_SRR1 - stw r0,_SRR1(r11) - - /* set the stack limit to the current stack */ - mfspr r8,SPRN_SPRG_THREAD - lwz r0,KSP_LIMIT(r8) - stw r0,SAVED_KSP_LIMIT(r11) - rlwinm r0,r1,0,0,(31 - THREAD_SHIFT) - stw r0,KSP_LIMIT(r8) - /* fall through */ -_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) -#endif - -#ifdef CONFIG_40x - .globl crit_transfer_to_handler -crit_transfer_to_handler: - lwz r0,crit_r10@l(0) - stw r0,GPR10(r11) - lwz r0,crit_r11@l(0) - stw r0,GPR11(r11) - mfspr r0,SPRN_SRR0 - stw r0,crit_srr0@l(0) - mfspr r0,SPRN_SRR1 - stw r0,crit_srr1@l(0) - - /* set the stack limit to the current stack */ - mfspr r8,SPRN_SPRG_THREAD - lwz r0,KSP_LIMIT(r8) - stw r0,saved_ksp_limit@l(0) - rlwinm r0,r1,0,0,(31 - THREAD_SHIFT) - stw r0,KSP_LIMIT(r8) - /* fall through */ -_ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler) -#endif - -/* - * This code finishes saving the registers to the exception frame - * and jumps to the appropriate handler for the exception, turning - * on address translation. - * Note that we rely on the caller having set cr0.eq iff the exception - * occurred in kernel mode (i.e. MSR:PR = 0). - */ - .globl transfer_to_handler_full -transfer_to_handler_full: - SAVE_NVGPRS(r11) -_ASM_NOKPROBE_SYMBOL(transfer_to_handler_full) - /* fall through */ - - .globl transfer_to_handler -transfer_to_handler: - stw r2,GPR2(r11) - stw r12,_NIP(r11) - stw r9,_MSR(r11) - andi. r2,r9,MSR_PR - mfctr r12 - mfspr r2,SPRN_XER - stw r12,_CTR(r11) - stw r2,_XER(r11) - mfspr r12,SPRN_SPRG_THREAD - tovirt_vmstack r12, r12 - beq 2f /* if from user, fix up THREAD.regs */ - addi r2, r12, -THREAD - addi r11,r1,STACK_FRAME_OVERHEAD - stw r11,PT_REGS(r12) -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) - /* Check to see if the dbcr0 register is set up to debug. Use the - internal debug mode bit to do this. */ - lwz r12,THREAD_DBCR0(r12) - andis. r12,r12,DBCR0_IDM@h -#endif - ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_lock r11, r12 -#endif -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) - beq+ 3f - /* From user and task is ptraced - load up global dbcr0 */ - li r12,-1 /* clear all pending debug events */ - mtspr SPRN_DBSR,r12 - lis r11,global_dbcr0@ha - tophys(r11,r11) - addi r11,r11,global_dbcr0@l -#ifdef CONFIG_SMP - lwz r9,TASK_CPU(r2) - slwi r9,r9,2 - add r11,r11,r9 -#endif - lwz r12,0(r11) - mtspr SPRN_DBCR0,r12 -#endif - - b 3f - -2: /* if from kernel, check interrupted DOZE/NAP mode and - * check for stack overflow - */ - kuap_save_and_lock r11, r12, r9, r2, r6 - addi r2, r12, -THREAD -#ifndef CONFIG_VMAP_STACK - lwz r9,KSP_LIMIT(r12) - cmplw r1,r9 /* if r1 <= ksp_limit */ - ble- stack_ovf /* then the kernel stack overflowed */ -#endif -5: #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) + .globl prepare_transfer_to_handler +prepare_transfer_to_handler: + /* if from kernel, check interrupted DOZE/NAP mode */ lwz r12,TI_LOCAL_FLAGS(r2) mtcrf 0x01,r12 bt- 31-TLF_NAPPING,4f bt- 31-TLF_SLEEPING,7f -#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */ - .globl transfer_to_handler_cont -transfer_to_handler_cont: -3: - mflr r9 - tovirt_novmstack r2, r2 /* set r2 to current */ - tovirt_vmstack r9, r9 - lwz r11,0(r9) /* virtual address of handler */ - lwz r9,4(r9) /* where to go when done */ -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) - mtspr SPRN_NRI, r0 -#endif -#ifdef CONFIG_TRACE_IRQFLAGS - /* - * When tracing IRQ state (lockdep) we enable the MMU before we call - * the IRQ tracing functions as they might access vmalloc space or - * perform IOs for console output. - * - * To speed up the syscall path where interrupts stay on, let's check - * first if we are changing the MSR value at all. - */ - tophys_novmstack r12, r1 - lwz r12,_MSR(r12) - andi. r12,r12,MSR_EE - bne 1f + blr - /* MSR isn't changing, just transition directly */ -#endif - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r10 - mtlr r9 - rfi /* jump to handler, enable MMU */ -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif - -#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) 4: rlwinm r12,r12,0,~_TLF_NAPPING stw r12,TI_LOCAL_FLAGS(r2) b power_save_ppc32_restore @@ -246,97 +67,18 @@ transfer_to_handler_cont: lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */ rlwinm r9,r9,0,~MSR_EE lwz r12,_LINK(r11) /* and return to address in LR */ - kuap_restore r11, r2, r3, r4, r5 lwz r2, GPR2(r11) b fast_exception_return -#endif -_ASM_NOKPROBE_SYMBOL(transfer_to_handler) -_ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) - -#ifdef CONFIG_TRACE_IRQFLAGS -1: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to - * keep interrupts disabled at this point otherwise we might risk - * taking an interrupt before we tell lockdep they are enabled. - */ - lis r12,reenable_mmu@h - ori r12,r12,reenable_mmu@l - LOAD_REG_IMMEDIATE(r0, MSR_KERNEL) - mtspr SPRN_SRR0,r12 - mtspr SPRN_SRR1,r0 - rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif - -reenable_mmu: - /* - * We save a bunch of GPRs, - * r3 can be different from GPR3(r1) at this point, r9 and r11 - * contains the old MSR and handler address respectively, - * r0, r4-r8, r12, CCR, CTR, XER etc... are left - * clobbered as they aren't useful past this point. - */ - - stwu r1,-32(r1) - stw r9,8(r1) - stw r11,12(r1) - stw r3,16(r1) - - /* If we are disabling interrupts (normal case), simply log it with - * lockdep - */ -1: bl trace_hardirqs_off - lwz r3,16(r1) - lwz r11,12(r1) - lwz r9,8(r1) - addi r1,r1,32 - mtctr r11 - mtlr r9 - bctr /* jump to handler */ -#endif /* CONFIG_TRACE_IRQFLAGS */ - -#ifndef CONFIG_VMAP_STACK -/* - * On kernel stack overflow, load up an initial stack pointer - * and call StackOverflow(regs), which should not return. - */ -stack_ovf: - /* sometimes we use a statically-allocated stack, which is OK. */ - lis r12,_end@h - ori r12,r12,_end@l - cmplw r1,r12 - ble 5b /* r1 <= &_end is OK */ - SAVE_NVGPRS(r11) - addi r3,r1,STACK_FRAME_OVERHEAD - lis r1,init_thread_union@ha - addi r1,r1,init_thread_union@l - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD - lis r9,StackOverflow@ha - addi r9,r9,StackOverflow@l - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) - mtspr SPRN_NRI, r0 -#endif - mtspr SPRN_SRR0,r9 - mtspr SPRN_SRR1,r10 - rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif -_ASM_NOKPROBE_SYMBOL(stack_ovf) -#endif +_ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) +#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */ .globl transfer_to_syscall transfer_to_syscall: SAVE_NVGPRS(r1) -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_lock r11, r12 -#endif /* Calling convention has r9 = orig r0, r10 = regs */ addi r10,r1,STACK_FRAME_OVERHEAD mr r9,r0 - stw r10,THREAD+PT_REGS(r2) bl system_call_exception ret_from_syscall: @@ -349,10 +91,6 @@ ret_from_syscall: cmplwi cr0,r5,0 bne- 2f #endif /* CONFIG_PPC_47x */ -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_unlock r5, r7 -#endif - kuap_check r2, r4 lwz r4,_LINK(r1) lwz r5,_CCR(r1) mtlr r4 @@ -411,27 +149,6 @@ ret_from_kernel_thread: li r3,0 b ret_from_syscall -/* - * Top-level page fault handling. - * This is in assembler because if do_page_fault tells us that - * it is a bad kernel page fault, we want to save the non-volatile - * registers before calling bad_page_fault. - */ - .globl handle_page_fault -handle_page_fault: - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_page_fault - cmpwi r3,0 - beq+ ret_from_except - SAVE_NVGPRS(r1) - lwz r0,_TRAP(r1) - clrrwi r0,r0,1 - stw r0,_TRAP(r1) - mr r4,r3 /* err arg for bad_page_fault */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl __bad_page_fault - b ret_from_except_full - /* * This routine switches between two different tasks. The process * state of one is saved on its kernel stack. Then the state @@ -485,7 +202,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) stw r10,_CCR(r1) stw r1,KSP(r3) /* Set old stack pointer */ - kuap_check r2, r0 #ifdef CONFIG_SMP /* We need a sync somewhere here to make sure that if the * previous task gets rescheduled on another CPU, it sees all @@ -529,12 +245,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) fast_exception_return: #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) andi. r10,r9,MSR_RI /* check for recoverable interrupt */ - beq 1f /* if not, we've got problems */ + beq 3f /* if not, we've got problems */ #endif 2: REST_4GPRS(3, r11) lwz r10,_CCR(r11) - REST_GPR(1, r11) + REST_2GPRS(1, r11) mtcr r10 lwz r10,_LINK(r11) mtlr r10 @@ -556,257 +272,147 @@ fast_exception_return: #endif _ASM_NOKPROBE_SYMBOL(fast_exception_return) -#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) -/* check if the exception happened in a restartable section */ -1: lis r3,exc_exit_restart_end@ha - addi r3,r3,exc_exit_restart_end@l - cmplw r12,r3 - bge 3f - lis r4,exc_exit_restart@ha - addi r4,r4,exc_exit_restart@l - cmplw r12,r4 - blt 3f - lis r3,fee_restarts@ha - tophys(r3,r3) - lwz r5,fee_restarts@l(r3) - addi r5,r5,1 - stw r5,fee_restarts@l(r3) - mr r12,r4 /* restart at exc_exit_restart */ - b 2b - - .section .bss - .align 2 -fee_restarts: - .space 4 - .previous - /* aargh, a nonrecoverable interrupt, panic */ /* aargh, we don't know which trap this is */ 3: li r10,-1 stw r10,_TRAP(r11) + prepare_transfer_to_handler + bl unrecoverable_exception + trap /* should not get here */ + + .globl interrupt_return +interrupt_return: + lwz r4,_MSR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - lis r10,MSR_KERNEL@h - ori r10,r10,MSR_KERNEL@l - bl transfer_to_handler_full - .long unrecoverable_exception - .long ret_from_except -#endif + andi. r0,r4,MSR_PR + beq .Lkernel_interrupt_return + bl interrupt_exit_user_prepare + cmpwi r3,0 + bne- .Lrestore_nvgprs - .globl ret_from_except_full -ret_from_except_full: - REST_NVGPRS(r1) - /* fall through */ - - .globl ret_from_except -ret_from_except: - /* Hard-disable interrupts so that current_thread_info()->flags - * can't change between when we test it and when we return - * from the interrupt. */ - /* Note: We don't bother telling lockdep about it */ - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) - mtmsr r10 /* disable interrupts */ - - lwz r3,_MSR(r1) /* Returning to user mode? */ - andi. r0,r3,MSR_PR - beq resume_kernel - -user_exc_return: /* r10 contains MSR_KERNEL here */ - /* Check current_thread_info()->flags */ - lwz r9,TI_FLAGS(r2) - andi. r0,r9,_TIF_USER_WORK_MASK - bne do_work - -restore_user: -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - /* Check whether this process has its own DBCR0 value. The internal - debug mode bit tells us that dbcr0 should be loaded. */ - lwz r0,THREAD+THREAD_DBCR0(r2) - andis. r10,r0,DBCR0_IDM@h - bnel- load_dbcr0 -#endif - ACCOUNT_CPU_USER_EXIT(r2, r10, r11) -#ifdef CONFIG_PPC_BOOK3S_32 - kuep_unlock r10, r11 -#endif - - b restore - -/* N.B. the only way to get here is from the beq following ret_from_except. */ -resume_kernel: - /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ - lwz r8,TI_FLAGS(r2) - andis. r0,r8,_TIF_EMULATE_STACK_STORE@h - beq+ 1f - - addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ - - lwz r3,GPR1(r1) - subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */ - mr r4,r1 /* src: current exception frame */ - mr r1,r3 /* Reroute the trampoline frame to r1 */ - - /* Copy from the original to the trampoline. */ - li r5,INT_FRAME_SIZE/4 /* size: INT_FRAME_SIZE */ - li r6,0 /* start offset: 0 */ - mtctr r5 -2: lwzx r0,r6,r4 - stwx r0,r6,r3 - addi r6,r6,4 - bdnz 2b - - /* Do real store operation to complete stwu */ - lwz r5,GPR1(r1) - stw r8,0(r5) - - /* Clear _TIF_EMULATE_STACK_STORE flag */ - lis r11,_TIF_EMULATE_STACK_STORE@h - addi r5,r2,TI_FLAGS -0: lwarx r8,0,r5 - andc r8,r8,r11 - stwcx. r8,0,r5 - bne- 0b -1: - -#ifdef CONFIG_PREEMPTION - /* check current_thread_info->preempt_count */ - lwz r0,TI_PREEMPT(r2) - cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ - bne restore_kuap - andi. r8,r8,_TIF_NEED_RESCHED - beq+ restore_kuap - lwz r3,_MSR(r1) - andi. r0,r3,MSR_EE /* interrupts off? */ - beq restore_kuap /* don't schedule if so */ -#ifdef CONFIG_TRACE_IRQFLAGS - /* Lockdep thinks irqs are enabled, we need to call - * preempt_schedule_irq with IRQs off, so we inform lockdep - * now that we -did- turn them off already - */ - bl trace_hardirqs_off -#endif - bl preempt_schedule_irq -#ifdef CONFIG_TRACE_IRQFLAGS - /* And now, to properly rebalance the above, we tell lockdep they - * are being turned back on, which will happen when we return - */ - bl trace_hardirqs_on -#endif -#endif /* CONFIG_PREEMPTION */ -restore_kuap: - kuap_restore r1, r2, r9, r10, r0 - - /* interrupts are hard-disabled at this point */ -restore: -#if defined(CONFIG_44x) && !defined(CONFIG_PPC_47x) - lis r4,icache_44x_need_flush@ha - lwz r5,icache_44x_need_flush@l(r4) - cmplwi cr0,r5,0 - beq+ 1f - li r6,0 - iccci r0,r0 - stw r6,icache_44x_need_flush@l(r4) -1: -#endif /* CONFIG_44x */ - - lwz r9,_MSR(r1) -#ifdef CONFIG_TRACE_IRQFLAGS - /* Lockdep doesn't know about the fact that IRQs are temporarily turned - * off in this assembly code while peeking at TI_FLAGS() and such. However - * we need to inform it if the exception turned interrupts off, and we - * are about to trun them back on. - */ - andi. r10,r9,MSR_EE - beq 1f - stwu r1,-32(r1) - mflr r0 - stw r0,4(r1) - bl trace_hardirqs_on - addi r1, r1, 32 - lwz r9,_MSR(r1) -1: -#endif /* CONFIG_TRACE_IRQFLAGS */ - - lwz r0,GPR0(r1) - lwz r2,GPR2(r1) - REST_4GPRS(3, r1) - REST_2GPRS(7, r1) - - lwz r10,_XER(r1) - lwz r11,_CTR(r1) - mtspr SPRN_XER,r10 - mtctr r11 - -BEGIN_FTR_SECTION - lwarx r11,0,r1 -END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) - stwcx. r0,0,r1 /* to clear the reservation */ - -#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) - andi. r10,r9,MSR_RI /* check if this exception occurred */ - beql nonrecoverable /* at a bad place (MSR:RI = 0) */ - - lwz r10,_CCR(r1) - lwz r11,_LINK(r1) - mtcrf 0xFF,r10 - mtlr r11 - - /* Clear the exception_marker on the stack to avoid confusing stacktrace */ - li r10, 0 - stw r10, 8(r1) - /* - * Once we put values in SRR0 and SRR1, we are in a state - * where exceptions are not recoverable, since taking an - * exception will trash SRR0 and SRR1. Therefore we clear the - * MSR:RI bit to indicate this. If we do take an exception, - * we can't return to the point of the exception but we - * can restart the exception exit path at the label - * exc_exit_restart below. -- paulus - */ - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI) - mtmsr r10 /* clear the RI bit */ - .globl exc_exit_restart -exc_exit_restart: - lwz r12,_NIP(r1) - mtspr SPRN_SRR0,r12 - mtspr SPRN_SRR1,r9 - REST_4GPRS(9, r1) - lwz r1,GPR1(r1) - .globl exc_exit_restart_end -exc_exit_restart_end: - rfi -_ASM_NOKPROBE_SYMBOL(exc_exit_restart) -_ASM_NOKPROBE_SYMBOL(exc_exit_restart_end) - -#else /* !(CONFIG_4xx || CONFIG_BOOKE) */ - /* - * This is a bit different on 4xx/Book-E because it doesn't have - * the RI bit in the MSR. - * The TLB miss handler checks if we have interrupted - * the exception exit path and restarts it if so - * (well maybe one day it will... :). - */ - lwz r11,_LINK(r1) - mtlr r11 - lwz r10,_CCR(r1) - mtcrf 0xff,r10 - /* Clear the exception_marker on the stack to avoid confusing stacktrace */ - li r10, 0 - stw r10, 8(r1) - REST_2GPRS(9, r1) - .globl exc_exit_restart -exc_exit_restart: +.Lfast_user_interrupt_return: lwz r11,_NIP(r1) lwz r12,_MSR(r1) mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r12 + +BEGIN_FTR_SECTION + stwcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + lwarx r0,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + + lwz r3,_CCR(r1) + lwz r4,_LINK(r1) + lwz r5,_CTR(r1) + lwz r6,_XER(r1) + li r0,0 + + /* + * Leaving a stale exception_marker on the stack can confuse + * the reliable stack unwinder later on. Clear it. + */ + stw r0,8(r1) + REST_4GPRS(7, r1) REST_2GPRS(11, r1) - lwz r1,GPR1(r1) - .globl exc_exit_restart_end -exc_exit_restart_end: + + mtcr r3 + mtlr r4 + mtctr r5 + mtspr SPRN_XER,r6 + + REST_4GPRS(2, r1) + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) rfi - b . /* prevent prefetch past rfi */ -_ASM_NOKPROBE_SYMBOL(exc_exit_restart) +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif + +.Lrestore_nvgprs: + REST_NVGPRS(r1) + b .Lfast_user_interrupt_return + +.Lkernel_interrupt_return: + bl interrupt_exit_kernel_prepare + +.Lfast_kernel_interrupt_return: + cmpwi cr1,r3,0 + lwz r11,_NIP(r1) + lwz r12,_MSR(r1) + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 + +BEGIN_FTR_SECTION + stwcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + lwarx r0,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + + lwz r3,_LINK(r1) + lwz r4,_CTR(r1) + lwz r5,_XER(r1) + lwz r6,_CCR(r1) + li r0,0 + + REST_4GPRS(7, r1) + REST_2GPRS(11, r1) + + mtlr r3 + mtctr r4 + mtspr SPRN_XER,r5 + + /* + * Leaving a stale exception_marker on the stack can confuse + * the reliable stack unwinder later on. Clear it. + */ + stw r0,8(r1) + + REST_4GPRS(2, r1) + + bne- cr1,1f /* emulate stack store */ + mtcr r6 + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif + +1: /* + * Emulate stack store with update. New r1 value was already calculated + * and updated in our interrupt regs by emulate_loadstore, but we can't + * store the previous value of r1 to the stack before re-loading our + * registers from it, otherwise they could be clobbered. Use + * SPRG Scratch0 as temporary storage to hold the store + * data, as interrupts are disabled here so it won't be clobbered. + */ + mtcr r6 +#ifdef CONFIG_BOOKE + mtspr SPRN_SPRG_WSCRATCH0, r9 +#else + mtspr SPRN_SPRG_SCRATCH0, r9 +#endif + addi r9,r1,INT_FRAME_SIZE /* get original r1 */ + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + stw r9,0(r1) /* perform store component of stwu */ +#ifdef CONFIG_BOOKE + mfspr r9, SPRN_SPRG_RSCRATCH0 +#else + mfspr r9, SPRN_SPRG_SCRATCH0 +#endif + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif +_ASM_NOKPROBE_SYMBOL(interrupt_return) + +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) /* * Returning from a critical interrupt in user mode doesn't need @@ -837,8 +443,7 @@ _ASM_NOKPROBE_SYMBOL(exc_exit_restart) REST_NVGPRS(r1); \ lwz r3,_MSR(r1); \ andi. r3,r3,MSR_PR; \ - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL); \ - bne user_exc_return; \ + bne interrupt_return; \ lwz r0,GPR0(r1); \ lwz r2,GPR2(r1); \ REST_4GPRS(3, r1); \ @@ -906,11 +511,6 @@ _ASM_NOKPROBE_SYMBOL(exc_exit_restart) #ifdef CONFIG_40x .globl ret_from_crit_exc ret_from_crit_exc: - mfspr r9,SPRN_SPRG_THREAD - lis r10,saved_ksp_limit@ha; - lwz r10,saved_ksp_limit@l(r10); - tovirt(r9,r9); - stw r10,KSP_LIMIT(r9) lis r9,crit_srr0@ha; lwz r9,crit_srr0@l(r9); lis r10,crit_srr1@ha; @@ -924,9 +524,6 @@ _ASM_NOKPROBE_SYMBOL(ret_from_crit_exc) #ifdef CONFIG_BOOKE .globl ret_from_crit_exc ret_from_crit_exc: - mfspr r9,SPRN_SPRG_THREAD - lwz r10,SAVED_KSP_LIMIT(r1) - stw r10,KSP_LIMIT(r9) RESTORE_xSRR(SRR0,SRR1); RESTORE_MMU_REGS; RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI) @@ -934,9 +531,6 @@ _ASM_NOKPROBE_SYMBOL(ret_from_crit_exc) .globl ret_from_debug_exc ret_from_debug_exc: - mfspr r9,SPRN_SPRG_THREAD - lwz r10,SAVED_KSP_LIMIT(r1) - stw r10,KSP_LIMIT(r9) RESTORE_xSRR(SRR0,SRR1); RESTORE_xSRR(CSRR0,CSRR1); RESTORE_MMU_REGS; @@ -945,9 +539,6 @@ _ASM_NOKPROBE_SYMBOL(ret_from_debug_exc) .globl ret_from_mcheck_exc ret_from_mcheck_exc: - mfspr r9,SPRN_SPRG_THREAD - lwz r10,SAVED_KSP_LIMIT(r1) - stw r10,KSP_LIMIT(r9) RESTORE_xSRR(SRR0,SRR1); RESTORE_xSRR(CSRR0,CSRR1); RESTORE_xSRR(DSRR0,DSRR1); @@ -955,121 +546,8 @@ ret_from_mcheck_exc: RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI) _ASM_NOKPROBE_SYMBOL(ret_from_mcheck_exc) #endif /* CONFIG_BOOKE */ - -/* - * Load the DBCR0 value for a task that is being ptraced, - * having first saved away the global DBCR0. Note that r0 - * has the dbcr0 value to set upon entry to this. - */ -load_dbcr0: - mfmsr r10 /* first disable debug exceptions */ - rlwinm r10,r10,0,~MSR_DE - mtmsr r10 - isync - mfspr r10,SPRN_DBCR0 - lis r11,global_dbcr0@ha - addi r11,r11,global_dbcr0@l -#ifdef CONFIG_SMP - lwz r9,TASK_CPU(r2) - slwi r9,r9,2 - add r11,r11,r9 -#endif - stw r10,0(r11) - mtspr SPRN_DBCR0,r0 - li r11,-1 - mtspr SPRN_DBSR,r11 /* clear all pending debug events */ - blr - - .section .bss - .align 4 - .global global_dbcr0 -global_dbcr0: - .space 4*NR_CPUS - .previous #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ -do_work: /* r10 contains MSR_KERNEL here */ - andi. r0,r9,_TIF_NEED_RESCHED - beq do_user_signal - -do_resched: /* r10 contains MSR_KERNEL here */ -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_on - mfmsr r10 -#endif - ori r10,r10,MSR_EE - mtmsr r10 /* hard-enable interrupts */ - bl schedule -recheck: - /* Note: And we don't tell it we are disabling them again - * neither. Those disable/enable cycles used to peek at - * TI_FLAGS aren't advertised. - */ - LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) - mtmsr r10 /* disable interrupts */ - lwz r9,TI_FLAGS(r2) - andi. r0,r9,_TIF_NEED_RESCHED - bne- do_resched - andi. r0,r9,_TIF_USER_WORK_MASK - beq restore_user -do_user_signal: /* r10 contains MSR_KERNEL here */ - ori r10,r10,MSR_EE - mtmsr r10 /* hard-enable interrupts */ - /* save r13-r31 in the exception frame, if not already done */ - lwz r3,_TRAP(r1) - andi. r0,r3,1 - beq 2f - SAVE_NVGPRS(r1) - rlwinm r3,r3,0,0,30 - stw r3,_TRAP(r1) -2: addi r3,r1,STACK_FRAME_OVERHEAD - mr r4,r9 - bl do_notify_resume - REST_NVGPRS(r1) - b recheck - -/* - * We come here when we are at the end of handling an exception - * that occurred at a place where taking an exception will lose - * state information, such as the contents of SRR0 and SRR1. - */ -nonrecoverable: - lis r10,exc_exit_restart_end@ha - addi r10,r10,exc_exit_restart_end@l - cmplw r12,r10 - bge 3f - lis r11,exc_exit_restart@ha - addi r11,r11,exc_exit_restart@l - cmplw r12,r11 - blt 3f - lis r10,ee_restarts@ha - lwz r12,ee_restarts@l(r10) - addi r12,r12,1 - stw r12,ee_restarts@l(r10) - mr r12,r11 /* restart at exc_exit_restart */ - blr -3: /* OK, we can't recover, kill this process */ - lwz r3,_TRAP(r1) - andi. r0,r3,1 - beq 5f - SAVE_NVGPRS(r1) - rlwinm r3,r3,0,0,30 - stw r3,_TRAP(r1) -5: mfspr r2,SPRN_SPRG_THREAD - addi r2,r2,-THREAD - tovirt(r2,r2) /* set back r2 to current */ -4: addi r3,r1,STACK_FRAME_OVERHEAD - bl unrecoverable_exception - /* shouldn't return */ - b 4b -_ASM_NOKPROBE_SYMBOL(nonrecoverable) - - .section .bss - .align 2 -ee_restarts: - .space 4 - .previous - /* * PROM code for specific machines follows. Put it * here so it's easy to add arch-specific sections later. @@ -1088,7 +566,6 @@ _GLOBAL(enter_rtas) lis r6,1f@ha /* physical return address for rtas */ addi r6,r6,1f@l tophys(r6,r6) - tophys_novmstack r7, r1 lwz r8,RTASENTRY(r4) lwz r4,RTASBASE(r4) mfmsr r9 @@ -1097,24 +574,25 @@ _GLOBAL(enter_rtas) mtmsr r0 /* disable interrupts so SRR0/1 don't get trashed */ li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR) mtlr r6 - stw r7, THREAD + RTAS_SP(r2) + stw r1, THREAD + RTAS_SP(r2) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 rfi -1: tophys_novmstack r9, r1 -#ifdef CONFIG_VMAP_STACK - li r0, MSR_KERNEL & ~MSR_IR /* can take DTLB miss */ - mtmsr r0 - isync -#endif - lwz r8,INT_FRAME_SIZE+4(r9) /* get return address */ - lwz r9,8(r9) /* original msr value */ - addi r1,r1,INT_FRAME_SIZE - li r0,0 - tophys_novmstack r7, r2 - stw r0, THREAD + RTAS_SP(r7) +1: + lis r8, 1f@h + ori r8, r8, 1f@l + LOAD_REG_IMMEDIATE(r9,MSR_KERNEL) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 - rfi /* return to caller */ + rfi /* Reactivate MMU translation */ +1: + lwz r8,INT_FRAME_SIZE+4(r1) /* get return address */ + lwz r9,8(r1) /* original msr value */ + addi r1,r1,INT_FRAME_SIZE + li r0,0 + stw r0, THREAD + RTAS_SP(r2) + mtlr r8 + mtmsr r9 + blr /* return to caller */ _ASM_NOKPROBE_SYMBOL(enter_rtas) #endif /* CONFIG_PPC_RTAS */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 6c4d9e276c4d..03727308d8cc 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -117,13 +117,12 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) /* - * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which - * would clobber syscall parameters. Also we always enter with IRQs - * enabled and nothing pending. system_call_exception() will call - * trace_hardirqs_off(). - * - * scv enters with MSR[EE]=1, so don't set PACA_IRQ_HARD_DIS. The - * entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED. + * scv enters with MSR[EE]=1 and is immediately considered soft-masked. + * The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED, + * and interrupts may be masked and pending already. + * system_call_exception() will call trace_hardirqs_off() which means + * interrupts could already have been blocked before trace_hardirqs_off, + * but this is the best we can do. */ /* Calling convention has r9 = orig r0, r10 = regs */ @@ -288,9 +287,8 @@ END_BTB_FLUSH_SECTION std r11,-16(r10) /* "regshere" marker */ /* - * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which - * would clobber syscall parameters. Also we always enter with IRQs - * enabled and nothing pending. system_call_exception() will call + * We always enter kernel from userspace with irq soft-mask enabled and + * nothing pending. system_call_exception() will call * trace_hardirqs_off(). */ li r11,IRQS_ALL_DISABLED @@ -417,19 +415,6 @@ _GLOBAL(ret_from_kernel_thread) li r3,0 b .Lsyscall_exit -#ifdef CONFIG_PPC_BOOK3E -/* Save non-volatile GPRs, if not already saved. */ -_GLOBAL(save_nvgprs) - ld r11,_TRAP(r1) - andi. r0,r11,1 - beqlr- - SAVE_NVGPRS(r1) - clrrdi r0,r11,1 - std r0,_TRAP(r1) - blr -_ASM_NOKPROBE_SYMBOL(save_nvgprs); -#endif - #ifdef CONFIG_PPC_BOOK3S_64 #define FLUSH_COUNT_CACHE \ @@ -645,7 +630,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) addi r1,r1,SWITCH_FRAME_SIZE blr -#ifdef CONFIG_PPC_BOOK3S /* * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not * touched, no exit work created, then this can be used. @@ -657,6 +641,7 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return) kuap_check_amr r3, r4 ld r5,_MSR(r1) andi. r0,r5,MSR_PR +#ifdef CONFIG_PPC_BOOK3S bne .Lfast_user_interrupt_return_amr kuap_kernel_restore r3, r4 andi. r0,r5,MSR_RI @@ -665,6 +650,10 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return) addi r3,r1,STACK_FRAME_OVERHEAD bl unrecoverable_exception b . /* should not get here */ +#else + bne .Lfast_user_interrupt_return + b .Lfast_kernel_interrupt_return +#endif .balign IFETCH_ALIGN_BYTES .globl interrupt_return @@ -678,8 +667,10 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) cmpdi r3,0 bne- .Lrestore_nvgprs +#ifdef CONFIG_PPC_BOOK3S .Lfast_user_interrupt_return_amr: kuap_user_restore r3, r4 +#endif .Lfast_user_interrupt_return: ld r11,_NIP(r1) ld r12,_MSR(r1) @@ -788,7 +779,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) RFI_TO_KERNEL b . /* prevent speculative execution */ -#endif /* CONFIG_PPC_BOOK3S */ #ifdef CONFIG_PPC_RTAS /* diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index e8eb9992a270..7c3654b0d0f4 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -63,9 +63,6 @@ ld reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1) special_reg_save: - lbz r9,PACAIRQHAPPENED(r13) - RECONCILE_IRQ_STATE(r3,r4) - /* * We only need (or have stack space) to save this stuff if * we interrupted the kernel. @@ -119,15 +116,11 @@ BEGIN_FTR_SECTION mtspr SPRN_MAS5,r10 mtspr SPRN_MAS8,r10 END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) - SPECIAL_EXC_STORE(r9,IRQHAPPENED) - mfspr r10,SPRN_DEAR SPECIAL_EXC_STORE(r10,DEAR) mfspr r10,SPRN_ESR SPECIAL_EXC_STORE(r10,ESR) - lbz r10,PACAIRQSOFTMASK(r13) - SPECIAL_EXC_STORE(r10,SOFTE) ld r10,_NIP(r1) SPECIAL_EXC_STORE(r10,CSRR0) ld r10,_MSR(r1) @@ -139,7 +132,8 @@ ret_from_level_except: ld r3,_MSR(r1) andi. r3,r3,MSR_PR beq 1f - b ret_from_except + REST_NVGPRS(r1) + b interrupt_return 1: LOAD_REG_ADDR(r11,extlb_level_exc) @@ -193,27 +187,6 @@ BEGIN_FTR_SECTION mtspr SPRN_MAS8,r10 END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) - lbz r6,PACAIRQSOFTMASK(r13) - ld r5,SOFTE(r1) - - /* Interrupts had better not already be enabled... */ - tweqi r6,IRQS_ENABLED - - andi. r6,r5,IRQS_DISABLED - bne 1f - - TRACE_ENABLE_INTS - stb r5,PACAIRQSOFTMASK(r13) -1: - /* - * Restore PACAIRQHAPPENED rather than setting it based on - * the return MSR[EE], since we could have interrupted - * __check_irq_replay() or other inconsistent transitory - * states that must remain that way. - */ - SPECIAL_EXC_LOAD(r10,IRQHAPPENED) - stb r10,PACAIRQHAPPENED(r13) - SPECIAL_EXC_LOAD(r10,DEAR) mtspr SPRN_DEAR,r10 SPECIAL_EXC_LOAD(r10,ESR) @@ -417,14 +390,15 @@ exc_##n##_common: \ std r6,_LINK(r1); \ std r7,_CTR(r1); \ std r8,_XER(r1); \ - li r3,(n)+1; /* indicate partial regs in trap */ \ + li r3,(n); /* regs.trap vector */ \ std r9,0(r1); /* store stack frame back link */ \ std r10,_CCR(r1); /* store orig CR in stackframe */ \ std r9,GPR1(r1); /* store stack frame back link */ \ std r11,SOFTE(r1); /* and save it to stackframe */ \ std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \ std r3,_TRAP(r1); /* set trap number */ \ - std r0,RESULT(r1); /* clear regs->result */ + std r0,RESULT(r1); /* clear regs->result */ \ + SAVE_NVGPRS(r1); #define EXCEPTION_COMMON(n) \ EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN) @@ -435,28 +409,6 @@ exc_##n##_common: \ #define EXCEPTION_COMMON_DBG(n) \ EXCEPTION_COMMON_LVL(n, SPRN_SPRG_DBG_SCRATCH, PACA_EXDBG) -/* - * This is meant for exceptions that don't immediately hard-enable. We - * set a bit in paca->irq_happened to ensure that a subsequent call to - * arch_local_irq_restore() will properly hard-enable and avoid the - * fast-path, and then reconcile irq state. - */ -#define INTS_DISABLE RECONCILE_IRQ_STATE(r3,r4) - -/* - * This is called by exceptions that don't use INTS_DISABLE (that did not - * touch irq indicators in the PACA). This will restore MSR:EE to it's - * previous value - * - * XXX In the long run, we may want to open-code it in order to separate the - * load from the wrtee, thus limiting the latency caused by the dependency - * but at this point, I'll favor code clarity until we have a near to final - * implementation - */ -#define INTS_RESTORE_HARD \ - ld r11,_MSR(r1); \ - wrtee r11; - /* XXX FIXME: Restore r14/r15 when necessary */ #define BAD_STACK_TRAMPOLINE(n) \ exc_##n##_bad_stack: \ @@ -505,12 +457,11 @@ exc_##n##_bad_stack: \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\ EXCEPTION_COMMON(trapnum) \ - INTS_DISABLE; \ ack(r8); \ CHECK_NAPPING(); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ bl hdlr; \ - b ret_from_except_lite; + b interrupt_return /* This value is used to mark exception frames on the stack. */ .section ".toc","aw" @@ -561,11 +512,10 @@ __end_interrupts: CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON_CRIT(0x100) - bl save_nvgprs bl special_reg_save CHECK_NAPPING(); addi r3,r1,STACK_FRAME_OVERHEAD - bl unknown_exception + bl unknown_nmi_exception b ret_from_crit_except /* Machine Check Interrupt */ @@ -573,7 +523,6 @@ __end_interrupts: MC_EXCEPTION_PROLOG(0x000, BOOKE_INTERRUPT_MACHINE_CHECK, PROLOG_ADDITION_NONE) EXCEPTION_COMMON_MC(0x000) - bl save_nvgprs bl special_reg_save CHECK_NAPPING(); addi r3,r1,STACK_FRAME_OVERHEAD @@ -587,7 +536,6 @@ __end_interrupts: mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR EXCEPTION_COMMON(0x300) - INTS_DISABLE b storage_fault_common /* Instruction Storage Interrupt */ @@ -597,7 +545,6 @@ __end_interrupts: li r15,0 mr r14,r10 EXCEPTION_COMMON(0x400) - INTS_DISABLE b storage_fault_common /* External Input Interrupt */ @@ -619,13 +566,12 @@ __end_interrupts: PROLOG_ADDITION_1REG) mfspr r14,SPRN_ESR EXCEPTION_COMMON(0x700) - INTS_DISABLE std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXGEN+EX_R14(r13) - bl save_nvgprs bl program_check_exception - b ret_from_except + REST_NVGPRS(r1) + b interrupt_return /* Floating Point Unavailable Interrupt */ START_EXCEPTION(fp_unavailable); @@ -637,12 +583,10 @@ __end_interrupts: andi. r0,r12,MSR_PR; beq- 1f bl load_up_fpu - b fast_exception_return -1: INTS_DISABLE - bl save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD + b fast_interrupt_return +1: addi r3,r1,STACK_FRAME_OVERHEAD bl kernel_fp_unavailable_exception - b ret_from_except + b interrupt_return /* Altivec Unavailable Interrupt */ START_EXCEPTION(altivec_unavailable); @@ -656,15 +600,13 @@ BEGIN_FTR_SECTION andi. r0,r12,MSR_PR; beq- 1f bl load_up_altivec - b fast_exception_return + b fast_interrupt_return 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif - INTS_DISABLE - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_exception - b ret_from_except + b interrupt_return /* AltiVec Assist */ START_EXCEPTION(altivec_assist); @@ -672,17 +614,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) BOOKE_INTERRUPT_ALTIVEC_ASSIST, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x220) - INTS_DISABLE - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION bl altivec_assist_exception END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + REST_NVGPRS(r1) #else bl unknown_exception #endif - b ret_from_except + b interrupt_return /* Decrementer Interrupt */ @@ -698,14 +639,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG, PROLOG_ADDITION_NONE) EXCEPTION_COMMON_CRIT(0x9f0) - bl save_nvgprs bl special_reg_save CHECK_NAPPING(); addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_BOOKE_WDT bl WatchdogException #else - bl unknown_exception + bl unknown_nmi_exception #endif b ret_from_crit_except @@ -722,11 +662,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0xf20) - INTS_DISABLE - bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl unknown_exception - b ret_from_except + b interrupt_return /* Debug exception as a critical interrupt*/ START_EXCEPTION(debug_crit); @@ -792,9 +730,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXCRIT+EX_R14(r13) ld r15,PACA_EXCRIT+EX_R15(r13) - bl save_nvgprs bl DebugException - b ret_from_except + REST_NVGPRS(r1) + b interrupt_return kernel_dbg_exc: b . /* NYI */ @@ -859,24 +797,22 @@ kernel_dbg_exc: */ mfspr r14,SPRN_DBSR EXCEPTION_COMMON_DBG(0xd08) - INTS_DISABLE std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXDBG+EX_R14(r13) ld r15,PACA_EXDBG+EX_R15(r13) - bl save_nvgprs bl DebugException - b ret_from_except + REST_NVGPRS(r1) + b interrupt_return START_EXCEPTION(perfmon); NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x260) - INTS_DISABLE CHECK_NAPPING() addi r3,r1,STACK_FRAME_OVERHEAD bl performance_monitor_exception - b ret_from_except_lite + b interrupt_return /* Doorbell interrupt */ MASKABLE_EXCEPTION(0x280, BOOKE_INTERRUPT_DOORBELL, @@ -887,11 +823,10 @@ kernel_dbg_exc: CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON_CRIT(0x2a0) - bl save_nvgprs bl special_reg_save CHECK_NAPPING(); addi r3,r1,STACK_FRAME_OVERHEAD - bl unknown_exception + bl unknown_nmi_exception b ret_from_crit_except /* @@ -903,21 +838,18 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x2c0) addi r3,r1,STACK_FRAME_OVERHEAD - bl save_nvgprs - INTS_RESTORE_HARD bl unknown_exception - b ret_from_except + b interrupt_return /* Guest Doorbell critical Interrupt */ START_EXCEPTION(guest_doorbell_crit); CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT, PROLOG_ADDITION_NONE) EXCEPTION_COMMON_CRIT(0x2e0) - bl save_nvgprs bl special_reg_save CHECK_NAPPING(); addi r3,r1,STACK_FRAME_OVERHEAD - bl unknown_exception + bl unknown_nmi_exception b ret_from_crit_except /* Hypervisor call */ @@ -926,10 +858,8 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x310) addi r3,r1,STACK_FRAME_OVERHEAD - bl save_nvgprs - INTS_RESTORE_HARD bl unknown_exception - b ret_from_except + b interrupt_return /* Embedded Hypervisor priviledged */ START_EXCEPTION(ehpriv); @@ -937,10 +867,8 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x320) addi r3,r1,STACK_FRAME_OVERHEAD - bl save_nvgprs - INTS_RESTORE_HARD bl unknown_exception - b ret_from_except + b interrupt_return /* LRAT Error interrupt */ START_EXCEPTION(lrat_error); @@ -948,10 +876,8 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x340) addi r3,r1,STACK_FRAME_OVERHEAD - bl save_nvgprs - INTS_RESTORE_HARD bl unknown_exception - b ret_from_except + b interrupt_return /* * An interrupt came in while soft-disabled; We mark paca->irq_happened @@ -1011,14 +937,7 @@ storage_fault_common: ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) bl do_page_fault - cmpdi r3,0 - bne- 1f - b ret_from_except_lite -1: bl save_nvgprs - mr r4,r3 - addi r3,r1,STACK_FRAME_OVERHEAD - bl __bad_page_fault - b ret_from_except + b interrupt_return /* * Alignment exception doesn't fit entirely in the 0x100 bytes so it @@ -1030,291 +949,9 @@ alignment_more: addi r3,r1,STACK_FRAME_OVERHEAD ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) - bl save_nvgprs - INTS_RESTORE_HARD bl alignment_exception - b ret_from_except - - .align 7 -_GLOBAL(ret_from_except) - ld r11,_TRAP(r1) - andi. r0,r11,1 - bne ret_from_except_lite REST_NVGPRS(r1) - -_GLOBAL(ret_from_except_lite) - /* - * Disable interrupts so that current_thread_info()->flags - * can't change between when we test it and when we return - * from the interrupt. - */ - wrteei 0 - - ld r9, PACA_THREAD_INFO(r13) - ld r3,_MSR(r1) - ld r10,PACACURRENT(r13) - ld r4,TI_FLAGS(r9) - andi. r3,r3,MSR_PR - beq resume_kernel - lwz r3,(THREAD+THREAD_DBCR0)(r10) - - /* Check current_thread_info()->flags */ - andi. r0,r4,_TIF_USER_WORK_MASK - bne 1f - /* - * Check to see if the dbcr0 register is set up to debug. - * Use the internal debug mode bit to do this. - */ - andis. r0,r3,DBCR0_IDM@h - beq restore - mfmsr r0 - rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */ - mtmsr r0 - mtspr SPRN_DBCR0,r3 - li r10, -1 - mtspr SPRN_DBSR,r10 - b restore -1: andi. r0,r4,_TIF_NEED_RESCHED - beq 2f - bl restore_interrupts - SCHEDULE_USER - b ret_from_except_lite -2: - bl save_nvgprs - /* - * Use a non volatile GPR to save and restore our thread_info flags - * across the call to restore_interrupts. - */ - mr r30,r4 - bl restore_interrupts - mr r4,r30 - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_notify_resume - b ret_from_except - -resume_kernel: - /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ - andis. r8,r4,_TIF_EMULATE_STACK_STORE@h - beq+ 1f - - addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ - - ld r3,GPR1(r1) - subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */ - mr r4,r1 /* src: current exception frame */ - mr r1,r3 /* Reroute the trampoline frame to r1 */ - - /* Copy from the original to the trampoline. */ - li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */ - li r6,0 /* start offset: 0 */ - mtctr r5 -2: ldx r0,r6,r4 - stdx r0,r6,r3 - addi r6,r6,8 - bdnz 2b - - /* Do real store operation to complete stdu */ - ld r5,GPR1(r1) - std r8,0(r5) - - /* Clear _TIF_EMULATE_STACK_STORE flag */ - lis r11,_TIF_EMULATE_STACK_STORE@h - addi r5,r9,TI_FLAGS -0: ldarx r4,0,r5 - andc r4,r4,r11 - stdcx. r4,0,r5 - bne- 0b -1: - -#ifdef CONFIG_PREEMPT - /* Check if we need to preempt */ - andi. r0,r4,_TIF_NEED_RESCHED - beq+ restore - /* Check that preempt_count() == 0 and interrupts are enabled */ - lwz r8,TI_PREEMPT(r9) - cmpwi cr0,r8,0 - bne restore - ld r0,SOFTE(r1) - andi. r0,r0,IRQS_DISABLED - bne restore - - /* - * Here we are preempting the current task. We want to make - * sure we are soft-disabled first and reconcile irq state. - */ - RECONCILE_IRQ_STATE(r3,r4) - bl preempt_schedule_irq - - /* - * arch_local_irq_restore() from preempt_schedule_irq above may - * enable hard interrupt but we really should disable interrupts - * when we return from the interrupt, and so that we don't get - * interrupted after loading SRR0/1. - */ - wrteei 0 -#endif /* CONFIG_PREEMPT */ - -restore: - /* - * This is the main kernel exit path. First we check if we - * are about to re-enable interrupts - */ - ld r5,SOFTE(r1) - lbz r6,PACAIRQSOFTMASK(r13) - andi. r5,r5,IRQS_DISABLED - bne .Lrestore_irq_off - - /* We are enabling, were we already enabled ? Yes, just return */ - andi. r6,r6,IRQS_DISABLED - beq cr0,fast_exception_return - - /* - * We are about to soft-enable interrupts (we are hard disabled - * at this point). We check if there's anything that needs to - * be replayed first. - */ - lbz r0,PACAIRQHAPPENED(r13) - cmpwi cr0,r0,0 - bne- .Lrestore_check_irq_replay - - /* - * Get here when nothing happened while soft-disabled, just - * soft-enable and move-on. We will hard-enable as a side - * effect of rfi - */ -.Lrestore_no_replay: - TRACE_ENABLE_INTS - li r0,IRQS_ENABLED - stb r0,PACAIRQSOFTMASK(r13); - -/* This is the return from load_up_fpu fast path which could do with - * less GPR restores in fact, but for now we have a single return path - */ -fast_exception_return: - wrteei 0 -1: mr r0,r13 - ld r10,_MSR(r1) - REST_4GPRS(2, r1) - andi. r6,r10,MSR_PR - REST_2GPRS(6, r1) - beq 1f - ACCOUNT_CPU_USER_EXIT(r13, r10, r11) - ld r0,GPR13(r1) - -1: stdcx. r0,0,r1 /* to clear the reservation */ - - ld r8,_CCR(r1) - ld r9,_LINK(r1) - ld r10,_CTR(r1) - ld r11,_XER(r1) - mtcr r8 - mtlr r9 - mtctr r10 - mtxer r11 - REST_2GPRS(8, r1) - ld r10,GPR10(r1) - ld r11,GPR11(r1) - ld r12,GPR12(r1) - mtspr SPRN_SPRG_GEN_SCRATCH,r0 - - std r10,PACA_EXGEN+EX_R10(r13); - std r11,PACA_EXGEN+EX_R11(r13); - ld r10,_NIP(r1) - ld r11,_MSR(r1) - ld r0,GPR0(r1) - ld r1,GPR1(r1) - mtspr SPRN_SRR0,r10 - mtspr SPRN_SRR1,r11 - ld r10,PACA_EXGEN+EX_R10(r13) - ld r11,PACA_EXGEN+EX_R11(r13) - mfspr r13,SPRN_SPRG_GEN_SCRATCH - rfi - - /* - * We are returning to a context with interrupts soft disabled. - * - * However, we may also about to hard enable, so we need to - * make sure that in this case, we also clear PACA_IRQ_HARD_DIS - * or that bit can get out of sync and bad things will happen - */ -.Lrestore_irq_off: - ld r3,_MSR(r1) - lbz r7,PACAIRQHAPPENED(r13) - andi. r0,r3,MSR_EE - beq 1f - rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS - stb r7,PACAIRQHAPPENED(r13) -1: -#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG) - /* The interrupt should not have soft enabled. */ - lbz r7,PACAIRQSOFTMASK(r13) -1: tdeqi r7,IRQS_ENABLED - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING -#endif - b fast_exception_return - - /* - * Something did happen, check if a re-emit is needed - * (this also clears paca->irq_happened) - */ -.Lrestore_check_irq_replay: - /* XXX: We could implement a fast path here where we check - * for irq_happened being just 0x01, in which case we can - * clear it and return. That means that we would potentially - * miss a decrementer having wrapped all the way around. - * - * Still, this might be useful for things like hash_page - */ - bl __check_irq_replay - cmpwi cr0,r3,0 - beq .Lrestore_no_replay - - /* - * We need to re-emit an interrupt. We do so by re-using our - * existing exception frame. We first change the trap value, - * but we need to ensure we preserve the low nibble of it - */ - ld r4,_TRAP(r1) - clrldi r4,r4,60 - or r4,r4,r3 - std r4,_TRAP(r1) - - /* - * PACA_IRQ_HARD_DIS won't always be set here, so set it now - * to reconcile the IRQ state. Tracing is already accounted for. - */ - lbz r4,PACAIRQHAPPENED(r13) - ori r4,r4,PACA_IRQ_HARD_DIS - stb r4,PACAIRQHAPPENED(r13) - - /* - * Then find the right handler and call it. Interrupts are - * still soft-disabled and we keep them that way. - */ - cmpwi cr0,r3,0x500 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl do_IRQ - b ret_from_except -1: cmpwi cr0,r3,0x900 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl timer_interrupt - b ret_from_except -#ifdef CONFIG_PPC_DOORBELL -1: - cmpwi cr0,r3,0x280 - bne 1f - addi r3,r1,STACK_FRAME_OVERHEAD; - bl doorbell_exception -#endif /* CONFIG_PPC_DOORBELL */ -1: b ret_from_except /* What else to do here ? */ - -_ASM_NOKPROBE_SYMBOL(ret_from_except); -_ASM_NOKPROBE_SYMBOL(ret_from_except_lite); -_ASM_NOKPROBE_SYMBOL(resume_kernel); -_ASM_NOKPROBE_SYMBOL(restore); -_ASM_NOKPROBE_SYMBOL(fast_exception_return); + b interrupt_return /* * Trampolines used when spotting a bad kernel stack pointer in diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 8082b690e874..fa8e52a0239e 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -692,25 +692,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r1,GPR1(r1) .endm -/* - * When the idle code in power4_idle puts the CPU into NAP mode, - * it has to do so in a loop, and relies on the external interrupt - * and decrementer interrupt entry code to get it out of the loop. - * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags - * to signal that it is in the loop and needs help to get out. - */ -#ifdef CONFIG_PPC_970_NAP -#define FINISH_NAP \ -BEGIN_FTR_SECTION \ - ld r11, PACA_THREAD_INFO(r13); \ - ld r9,TI_LOCAL_FLAGS(r11); \ - andi. r10,r9,_TLF_NAPPING; \ - bnel power4_fixup_nap; \ -END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) -#else -#define FINISH_NAP -#endif - /* * There are a few constraints to be concerned with. * - Real mode exceptions code/data must be located at their physical location. @@ -1248,7 +1229,6 @@ EXC_COMMON_BEGIN(machine_check_common) */ GEN_COMMON machine_check - FINISH_NAP /* Enable MSR_RI when finished with PACA_EXMC */ li r10,MSR_RI mtmsrd r10,1 @@ -1571,7 +1551,6 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) EXC_COMMON_BEGIN(hardware_interrupt_common) GEN_COMMON hardware_interrupt - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ b interrupt_return @@ -1801,7 +1780,6 @@ EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80) EXC_VIRT_END(decrementer, 0x4900, 0x80) EXC_COMMON_BEGIN(decrementer_common) GEN_COMMON decrementer - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD bl timer_interrupt b interrupt_return @@ -1886,7 +1864,6 @@ EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100) EXC_VIRT_END(doorbell_super, 0x4a00, 0x100) EXC_COMMON_BEGIN(doorbell_super_common) GEN_COMMON doorbell_super - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception @@ -2237,7 +2214,6 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) EXC_COMMON_BEGIN(hmi_exception_common) GEN_COMMON hmi_exception - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD bl handle_hmi_exception b interrupt_return @@ -2266,7 +2242,6 @@ EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20) EXC_VIRT_END(h_doorbell, 0x4e80, 0x20) EXC_COMMON_BEGIN(h_doorbell_common) GEN_COMMON h_doorbell - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_DOORBELL bl doorbell_exception @@ -2299,7 +2274,6 @@ EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20) EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20) EXC_COMMON_BEGIN(h_virt_irq_common) GEN_COMMON h_virt_irq - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ b interrupt_return @@ -2345,7 +2319,6 @@ EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20) EXC_VIRT_END(performance_monitor, 0x4f00, 0x20) EXC_COMMON_BEGIN(performance_monitor_common) GEN_COMMON performance_monitor - FINISH_NAP addi r3,r1,STACK_FRAME_OVERHEAD bl performance_monitor_exception b interrupt_return @@ -2530,8 +2503,6 @@ EXC_VIRT_NONE(0x5100, 0x100) INT_DEFINE_BEGIN(cbe_system_error) IVEC=0x1200 IHSRR=1 - IKVM_SKIP=1 - IKVM_REAL=1 INT_DEFINE_END(cbe_system_error) EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100) @@ -2551,11 +2522,16 @@ EXC_REAL_NONE(0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) #endif - +/** + * Interrupt 0x1300 - Instruction Address Breakpoint Interrupt. + * This has been removed from the ISA before 2.01, which is the earliest + * 64-bit BookS ISA supported, however the G5 / 970 implements this + * interrupt with a non-architected feature available through the support + * processor interface. + */ INT_DEFINE_BEGIN(instruction_breakpoint) IVEC=0x1300 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE - IKVM_SKIP=1 IKVM_REAL=1 #endif INT_DEFINE_END(instruction_breakpoint) @@ -2701,8 +2677,6 @@ EXC_COMMON_BEGIN(denorm_exception_common) INT_DEFINE_BEGIN(cbe_maintenance) IVEC=0x1600 IHSRR=1 - IKVM_SKIP=1 - IKVM_REAL=1 INT_DEFINE_END(cbe_maintenance) EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100) @@ -2754,8 +2728,6 @@ EXC_COMMON_BEGIN(altivec_assist_common) INT_DEFINE_BEGIN(cbe_thermal) IVEC=0x1800 IHSRR=1 - IKVM_SKIP=1 - IKVM_REAL=1 INT_DEFINE_END(cbe_thermal) EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100) @@ -3096,24 +3068,6 @@ USE_FIXED_SECTION(virt_trampolines) __end_interrupts: DEFINE_FIXED_SYMBOL(__end_interrupts) -#ifdef CONFIG_PPC_970_NAP - /* - * Called by exception entry code if _TLF_NAPPING was set, this clears - * the NAPPING flag, and redirects the exception exit to - * power4_fixup_nap_return. - */ - .globl power4_fixup_nap -EXC_COMMON_BEGIN(power4_fixup_nap) - andc r9,r9,r10 - std r9,TI_LOCAL_FLAGS(r11) - LOAD_REG_ADDR(r10, power4_idle_nap_return) - std r10,_NIP(r1) - blr - -power4_idle_nap_return: - blr -#endif - CLOSE_FIXED_SECTION(real_vectors); CLOSE_FIXED_SECTION(real_trampolines); CLOSE_FIXED_SECTION(virt_vectors); diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 8482739d42f3..b990075285f5 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -31,6 +31,7 @@ #include #include #include +#include /* * The CPU who acquired the lock to trigger the fadump crash should @@ -44,22 +45,21 @@ static struct fw_dump fw_dump; static void __init fadump_reserve_crash_area(u64 base); -struct kobject *fadump_kobj; - #ifndef CONFIG_PRESERVE_FA_DUMP +static struct kobject *fadump_kobj; + static atomic_t cpus_in_fadump; static DEFINE_MUTEX(fadump_mutex); -struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; +static struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; #define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */ #define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \ sizeof(struct fadump_memory_range)) static struct fadump_memory_range rngs[RESERVED_RNGS_CNT]; -struct fadump_mrange_info reserved_mrange_info = { "reserved", rngs, - RESERVED_RNGS_SZ, 0, - RESERVED_RNGS_CNT, true }; +static struct fadump_mrange_info +reserved_mrange_info = { "reserved", rngs, RESERVED_RNGS_SZ, 0, RESERVED_RNGS_CNT, true }; static void __init early_init_dt_scan_reserved_ranges(unsigned long node); @@ -79,7 +79,7 @@ static struct cma *fadump_cma; * But for some reason even if it fails we still have the memory reservation * with us and we can still continue doing fadump. */ -int __init fadump_cma_init(void) +static int __init fadump_cma_init(void) { unsigned long long base, size; int rc; @@ -292,7 +292,7 @@ static void fadump_show_config(void) * that is required for a kernel to boot successfully. * */ -static inline u64 fadump_calculate_reserve_size(void) +static __init u64 fadump_calculate_reserve_size(void) { u64 base, size, bootmem_min; int ret; @@ -728,7 +728,7 @@ void crash_fadump(struct pt_regs *regs, const char *str) * If we came in via system reset, wait a while for the secondary * CPUs to enter. */ - if (TRAP(&(fdh->regs)) == 0x100) { + if (TRAP(&(fdh->regs)) == INTERRUPT_SYSTEM_RESET) { msecs = CRASH_TIMEOUT; while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0)) mdelay(1); diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 3ff9a8fafa46..2c57ece6671c 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -92,9 +92,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) /* enable use of FP after return */ #ifdef CONFIG_PPC32 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ -#ifdef CONFIG_VMAP_STACK tovirt(r5, r5) -#endif lwz r4,THREAD_FPEXC_MODE(r5) ori r9,r9,MSR_FP /* enable FP for current */ or r9,r9,r4 diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 5d4706c14572..a8221ddcbd66 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -10,36 +10,39 @@ * We assume sprg3 has the physical address of the current * task's thread_struct. */ -.macro EXCEPTION_PROLOG handle_dar_dsisr=0 +.macro EXCEPTION_PROLOG trapno name handle_dar_dsisr=0 EXCEPTION_PROLOG_0 handle_dar_dsisr=\handle_dar_dsisr EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 handle_dar_dsisr=\handle_dar_dsisr + EXCEPTION_PROLOG_2 \trapno \name handle_dar_dsisr=\handle_dar_dsisr .endm .macro EXCEPTION_PROLOG_0 handle_dar_dsisr=0 mtspr SPRN_SPRG_SCRATCH0,r10 mtspr SPRN_SPRG_SCRATCH1,r11 -#ifdef CONFIG_VMAP_STACK mfspr r10, SPRN_SPRG_THREAD .if \handle_dar_dsisr +#ifdef CONFIG_40x + mfspr r11, SPRN_DEAR +#else mfspr r11, SPRN_DAR +#endif stw r11, DAR(r10) +#ifdef CONFIG_40x + mfspr r11, SPRN_ESR +#else mfspr r11, SPRN_DSISR +#endif stw r11, DSISR(r10) .endif mfspr r11, SPRN_SRR0 stw r11, SRR0(r10) -#endif mfspr r11, SPRN_SRR1 /* check whether user or kernel */ -#ifdef CONFIG_VMAP_STACK stw r11, SRR1(r10) -#endif mfcr r10 andi. r11, r11, MSR_PR .endm -.macro EXCEPTION_PROLOG_1 for_rtas=0 -#ifdef CONFIG_VMAP_STACK +.macro EXCEPTION_PROLOG_1 mtspr SPRN_SPRG_SCRATCH2,r1 subi r1, r1, INT_FRAME_SIZE /* use r1 if kernel */ beq 1f @@ -47,32 +50,33 @@ lwz r1,TASK_STACK-THREAD(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE 1: +#ifdef CONFIG_VMAP_STACK mtcrf 0x3f, r1 - bt 32 - THREAD_ALIGN_SHIFT, stack_overflow -#else - subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */ - beq 1f - mfspr r11,SPRN_SPRG_THREAD - lwz r11,TASK_STACK-THREAD(r11) - addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE -1: tophys(r11, r11) + bt 32 - THREAD_ALIGN_SHIFT, vmap_stack_overflow #endif .endm -.macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0 -#ifdef CONFIG_VMAP_STACK - li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ - mtmsr r11 - isync +.macro EXCEPTION_PROLOG_2 trapno name handle_dar_dsisr=0 +#ifdef CONFIG_PPC_8xx + .if \handle_dar_dsisr + li r11, RPN_PATTERN + mtspr SPRN_DAR, r11 /* Tag DAR, to be used in DTLB Error */ + .endif +#endif + LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~MSR_RI) /* re-enable MMU */ + mtspr SPRN_SRR1, r11 + lis r11, 1f@h + ori r11, r11, 1f@l + mtspr SPRN_SRR0, r11 mfspr r11, SPRN_SPRG_SCRATCH2 + rfi + + .text +\name\()_virt: +1: stw r11,GPR1(r1) stw r11,0(r1) mr r11, r1 -#else - stw r1,GPR1(r11) - stw r1,0(r11) - tovirt(r1, r11) /* set new kernel sp */ -#endif stw r10,_CCR(r11) /* save registers */ stw r12,GPR12(r11) stw r9,GPR9(r11) @@ -82,7 +86,6 @@ stw r12,GPR11(r11) mflr r10 stw r10,_LINK(r11) -#ifdef CONFIG_VMAP_STACK mfspr r12, SPRN_SPRG_THREAD tovirt(r12, r12) .if \handle_dar_dsisr @@ -93,26 +96,48 @@ .endif lwz r9, SRR1(r12) lwz r12, SRR0(r12) -#else - mfspr r12,SPRN_SRR0 - mfspr r9,SPRN_SRR1 -#endif #ifdef CONFIG_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ +#elif defined(CONFIG_PPC_8xx) + mtspr SPRN_EID, r2 /* Set MSR_RI */ #else -#ifdef CONFIG_VMAP_STACK - li r10, MSR_KERNEL & ~MSR_IR /* can take exceptions */ -#else - li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */ -#endif + li r10, MSR_KERNEL /* can take exceptions */ mtmsr r10 /* (except for mach check in rtas) */ #endif - stw r0,GPR0(r11) + COMMON_EXCEPTION_PROLOG_END \trapno +_ASM_NOKPROBE_SYMBOL(\name\()_virt) +.endm + +.macro COMMON_EXCEPTION_PROLOG_END trapno + stw r0,GPR0(r1) lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ addi r10,r10,STACK_FRAME_REGS_MARKER@l - stw r10,8(r11) - SAVE_4GPRS(3, r11) - SAVE_2GPRS(7, r11) + stw r10,8(r1) + li r10, \trapno + stw r10,_TRAP(r1) + SAVE_4GPRS(3, r1) + SAVE_2GPRS(7, r1) + SAVE_NVGPRS(r1) + stw r2,GPR2(r1) + stw r12,_NIP(r1) + stw r9,_MSR(r1) + mfctr r10 + mfspr r2,SPRN_SPRG_THREAD + stw r10,_CTR(r1) + tovirt(r2, r2) + mfspr r10,SPRN_XER + addi r2, r2, -THREAD + stw r10,_XER(r1) + addi r3,r1,STACK_FRAME_OVERHEAD +.endm + +.macro prepare_transfer_to_handler +#ifdef CONFIG_PPC_BOOK3S_32 + andi. r12,r9,MSR_PR + bne 777f + bl prepare_transfer_to_handler +777: +#endif .endm .macro SYSCALL_ENTRY trapno @@ -156,54 +181,6 @@ b transfer_to_syscall /* jump to handler */ .endm -.macro save_dar_dsisr_on_stack reg1, reg2, sp -#ifndef CONFIG_VMAP_STACK - mfspr \reg1, SPRN_DAR - mfspr \reg2, SPRN_DSISR - stw \reg1, _DAR(\sp) - stw \reg2, _DSISR(\sp) -#endif -.endm - -.macro get_and_save_dar_dsisr_on_stack reg1, reg2, sp -#ifdef CONFIG_VMAP_STACK - lwz \reg1, _DAR(\sp) - lwz \reg2, _DSISR(\sp) -#else - save_dar_dsisr_on_stack \reg1, \reg2, \sp -#endif -.endm - -.macro tovirt_vmstack dst, src -#ifdef CONFIG_VMAP_STACK - tovirt(\dst, \src) -#else - .ifnc \dst, \src - mr \dst, \src - .endif -#endif -.endm - -.macro tovirt_novmstack dst, src -#ifndef CONFIG_VMAP_STACK - tovirt(\dst, \src) -#else - .ifnc \dst, \src - mr \dst, \src - .endif -#endif -.endm - -.macro tophys_novmstack dst, src -#ifndef CONFIG_VMAP_STACK - tophys(\dst, \src) -#else - .ifnc \dst, \src - mr \dst, \src - .endif -#endif -.endm - /* * Note: code which follows this uses cr0.eq (set if from kernel), * r11, r12 (SRR0), and r9 (SRR1). @@ -217,41 +194,29 @@ */ #ifdef CONFIG_PPC_BOOK3S #define START_EXCEPTION(n, label) \ + __HEAD; \ . = n; \ DO_KVM n; \ label: #else #define START_EXCEPTION(n, label) \ + __HEAD; \ . = n; \ label: #endif -#define EXCEPTION(n, label, hdlr, xfer) \ +#define EXCEPTION(n, label, hdlr) \ START_EXCEPTION(n, label) \ - EXCEPTION_PROLOG; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - xfer(n, hdlr) - -#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ - li r10,trap; \ - stw r10,_TRAP(r11); \ - LOAD_REG_IMMEDIATE(r10, msr); \ - bl tfer; \ - .long hdlr; \ - .long ret - -#define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \ - ret_from_except) + EXCEPTION_PROLOG n label; \ + prepare_transfer_to_handler; \ + bl hdlr; \ + b interrupt_return .macro vmap_stack_overflow_exception -#ifdef CONFIG_VMAP_STACK + __HEAD +vmap_stack_overflow: #ifdef CONFIG_SMP mfspr r1, SPRN_SPRG_THREAD lwz r1, TASK_CPU - THREAD(r1) @@ -261,16 +226,11 @@ lis r1, emergency_ctx@ha #endif lwz r1, emergency_ctx@l(r1) - cmpwi cr1, r1, 0 - bne cr1, 1f - lis r1, init_thread_union@ha - addi r1, r1, init_thread_union@l -1: addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE - EXCEPTION_PROLOG_2 - SAVE_NVGPRS(r11) - addi r3, r1, STACK_FRAME_OVERHEAD - EXC_XFER_STD(0, stack_overflow_exception) -#endif + addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE + EXCEPTION_PROLOG_2 0 vmap_stack_overflow + prepare_transfer_to_handler + bl stack_overflow_exception + b interrupt_return .endm #endif /* __HEAD_32_H__ */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 24724a7dad49..e1360b88b6cb 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -89,7 +89,11 @@ _ENTRY(crit_srr0) .space 4 _ENTRY(crit_srr1) .space 4 -_ENTRY(saved_ksp_limit) +_ENTRY(crit_r1) + .space 4 +_ENTRY(crit_dear) + .space 4 +_ENTRY(crit_esr) .space 4 /* @@ -100,42 +104,62 @@ _ENTRY(saved_ksp_limit) * Instead we use a couple of words of memory at low physical addresses. * This is OK since we don't support SMP on these processors. */ -#define CRITICAL_EXCEPTION_PROLOG \ - stw r10,crit_r10@l(0); /* save two registers to work with */\ - stw r11,crit_r11@l(0); \ - mfcr r10; /* save CR in r10 for now */\ - mfspr r11,SPRN_SRR3; /* check whether user or kernel */\ - andi. r11,r11,MSR_PR; \ - lis r11,critirq_ctx@ha; \ - tophys(r11,r11); \ - lwz r11,critirq_ctx@l(r11); \ - beq 1f; \ - /* COMING FROM USER MODE */ \ - mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ - lwz r11,TASK_STACK-THREAD(r11); /* this thread's kernel stack */\ -1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm */\ - tophys(r11,r11); \ - stw r10,_CCR(r11); /* save various registers */\ - stw r12,GPR12(r11); \ - stw r9,GPR9(r11); \ - mflr r10; \ - stw r10,_LINK(r11); \ - mfspr r12,SPRN_DEAR; /* save DEAR and ESR in the frame */\ - stw r12,_DEAR(r11); /* since they may have had stuff */\ - mfspr r9,SPRN_ESR; /* in them at the point where the */\ - stw r9,_ESR(r11); /* exception was taken */\ - mfspr r12,SPRN_SRR2; \ - stw r1,GPR1(r11); \ - mfspr r9,SPRN_SRR3; \ - stw r1,0(r11); \ - tovirt(r1,r11); \ - rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ - stw r0,GPR0(r11); \ - lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\ - addi r10, r10, STACK_FRAME_REGS_MARKER@l; \ - stw r10, 8(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) +.macro CRITICAL_EXCEPTION_PROLOG trapno name + stw r10,crit_r10@l(0) /* save two registers to work with */ + stw r11,crit_r11@l(0) + mfspr r10,SPRN_SRR0 + mfspr r11,SPRN_SRR1 + stw r10,crit_srr0@l(0) + stw r11,crit_srr1@l(0) + mfspr r10,SPRN_DEAR + mfspr r11,SPRN_ESR + stw r10,crit_dear@l(0) + stw r11,crit_esr@l(0) + mfcr r10 /* save CR in r10 for now */ + mfspr r11,SPRN_SRR3 /* check whether user or kernel */ + andi. r11,r11,MSR_PR + lis r11,(critirq_ctx-PAGE_OFFSET)@ha + lwz r11,(critirq_ctx-PAGE_OFFSET)@l(r11) + beq 1f + /* COMING FROM USER MODE */ + mfspr r11,SPRN_SPRG_THREAD /* if from user, start at top of */ + lwz r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */ +1: stw r1,crit_r1@l(0) + addi r1,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm */ + LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)) /* re-enable MMU */ + mtspr SPRN_SRR1, r11 + lis r11, 1f@h + ori r11, r11, 1f@l + mtspr SPRN_SRR0, r11 + rfi + + .text +1: +\name\()_virt: + lwz r11,crit_r1@l(0) + stw r11,GPR1(r1) + stw r11,0(r1) + mr r11,r1 + stw r10,_CCR(r11) /* save various registers */ + stw r12,GPR12(r11) + stw r9,GPR9(r11) + mflr r10 + stw r10,_LINK(r11) + lis r9,PAGE_OFFSET@ha + lwz r10,crit_r10@l(r9) + lwz r12,crit_r11@l(r9) + stw r10,GPR10(r11) + stw r12,GPR11(r11) + lwz r12,crit_dear@l(r9) + lwz r9,crit_esr@l(r9) + stw r12,_DEAR(r11) /* since they may have had stuff */ + stw r9,_ESR(r11) /* exception was taken */ + mfspr r12,SPRN_SRR2 + mfspr r9,SPRN_SRR3 + rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ + COMMON_EXCEPTION_PROLOG_END \trapno + 2 +_ASM_NOKPROBE_SYMBOL(\name\()_virt) +.endm /* * State at this point: @@ -155,10 +179,10 @@ _ENTRY(saved_ksp_limit) */ #define CRITICAL_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(n, label); \ - CRITICAL_EXCEPTION_PROLOG; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - crit_transfer_to_handler, ret_from_crit_exc) + CRITICAL_EXCEPTION_PROLOG n label; \ + prepare_transfer_to_handler; \ + bl hdlr; \ + b ret_from_crit_exc /* * 0x0100 - Critical Interrupt Exception @@ -178,69 +202,67 @@ _ENTRY(saved_ksp_limit) * if they can't resolve the lightweight TLB fault. */ START_EXCEPTION(0x0300, DataStorage) - EXCEPTION_PROLOG - mfspr r5, SPRN_ESR /* Grab the ESR, save it */ - stw r5, _ESR(r11) - mfspr r4, SPRN_DEAR /* Grab the DEAR, save it */ - stw r4, _DEAR(r11) - EXC_XFER_LITE(0x300, handle_page_fault) + EXCEPTION_PROLOG 0x300 DataStorage handle_dar_dsisr=1 + prepare_transfer_to_handler + bl do_page_fault + b interrupt_return /* * 0x0400 - Instruction Storage Exception * This is caused by a fetch from non-execute or guarded pages. */ START_EXCEPTION(0x0400, InstructionAccess) - EXCEPTION_PROLOG + EXCEPTION_PROLOG 0x400 InstructionAccess li r5,0 stw r5, _ESR(r11) /* Zero ESR */ stw r12, _DEAR(r11) /* SRR0 as DEAR */ - EXC_XFER_LITE(0x400, handle_page_fault) + prepare_transfer_to_handler + bl do_page_fault + b interrupt_return /* 0x0500 - External Interrupt Exception */ - EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + EXCEPTION(0x0500, HardwareInterrupt, do_IRQ) /* 0x0600 - Alignment Exception */ START_EXCEPTION(0x0600, Alignment) - EXCEPTION_PROLOG - mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */ - stw r4,_DEAR(r11) - addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x600, alignment_exception) + EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1 + prepare_transfer_to_handler + bl alignment_exception + REST_NVGPRS(r1) + b interrupt_return /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) - EXCEPTION_PROLOG - mfspr r4,SPRN_ESR /* Grab the ESR and save it */ - stw r4,_ESR(r11) - addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x700, program_check_exception) + EXCEPTION_PROLOG 0x700 ProgramCheck handle_dar_dsisr=1 + prepare_transfer_to_handler + bl program_check_exception + REST_NVGPRS(r1) + b interrupt_return - EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0800, Trap_08, unknown_exception) + EXCEPTION(0x0900, Trap_09, unknown_exception) + EXCEPTION(0x0A00, Trap_0A, unknown_exception) + EXCEPTION(0x0B00, Trap_0B, unknown_exception) /* 0x0C00 - System Call Exception */ START_EXCEPTION(0x0C00, SystemCall) SYSCALL_ENTRY 0xc00 /* Trap_0D is commented out to get more space for system call exception */ -/* EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD) */ - EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD) +/* EXCEPTION(0x0D00, Trap_0D, unknown_exception) */ + EXCEPTION(0x0E00, Trap_0E, unknown_exception) + EXCEPTION(0x0F00, Trap_0F, unknown_exception) /* 0x1000 - Programmable Interval Timer (PIT) Exception */ - . = 0x1000 + START_EXCEPTION(0x1000, DecrementerTrap) b Decrementer -/* 0x1010 - Fixed Interval Timer (FIT) Exception -*/ - . = 0x1010 +/* 0x1010 - Fixed Interval Timer (FIT) Exception */ + START_EXCEPTION(0x1010, FITExceptionTrap) b FITException -/* 0x1020 - Watchdog Timer (WDT) Exception -*/ - . = 0x1020 +/* 0x1020 - Watchdog Timer (WDT) Exception */ + START_EXCEPTION(0x1020, WDTExceptionTrap) b WDTException /* 0x1100 - Data TLB Miss Exception @@ -249,13 +271,13 @@ _ENTRY(saved_ksp_limit) * load TLB entries from the page table if they exist. */ START_EXCEPTION(0x1100, DTLBMiss) - mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ - mtspr SPRN_SPRG_SCRATCH1, r11 + mtspr SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */ + mtspr SPRN_SPRG_SCRATCH6, r11 mtspr SPRN_SPRG_SCRATCH3, r12 mtspr SPRN_SPRG_SCRATCH4, r9 mfcr r12 mfspr r9, SPRN_PID - mtspr SPRN_SPRG_SCRATCH5, r9 + rlwimi r12, r9, 0, 0xff mfspr r10, SPRN_DEAR /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -316,13 +338,12 @@ _ENTRY(saved_ksp_limit) /* The bailout. Restore registers to pre-exception conditions * and call the heavyweights to help us out. */ - mfspr r9, SPRN_SPRG_SCRATCH5 - mtspr SPRN_PID, r9 - mtcr r12 + mtspr SPRN_PID, r12 + mtcrf 0x80, r12 mfspr r9, SPRN_SPRG_SCRATCH4 mfspr r12, SPRN_SPRG_SCRATCH3 - mfspr r11, SPRN_SPRG_SCRATCH1 - mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH6 + mfspr r10, SPRN_SPRG_SCRATCH5 b DataStorage /* 0x1200 - Instruction TLB Miss Exception @@ -330,13 +351,13 @@ _ENTRY(saved_ksp_limit) * registers and bailout to a different point. */ START_EXCEPTION(0x1200, ITLBMiss) - mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */ - mtspr SPRN_SPRG_SCRATCH1, r11 + mtspr SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */ + mtspr SPRN_SPRG_SCRATCH6, r11 mtspr SPRN_SPRG_SCRATCH3, r12 mtspr SPRN_SPRG_SCRATCH4, r9 mfcr r12 mfspr r9, SPRN_PID - mtspr SPRN_SPRG_SCRATCH5, r9 + rlwimi r12, r9, 0, 0xff mfspr r10, SPRN_SRR0 /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -397,28 +418,27 @@ _ENTRY(saved_ksp_limit) /* The bailout. Restore registers to pre-exception conditions * and call the heavyweights to help us out. */ - mfspr r9, SPRN_SPRG_SCRATCH5 - mtspr SPRN_PID, r9 - mtcr r12 + mtspr SPRN_PID, r12 + mtcrf 0x80, r12 mfspr r9, SPRN_SPRG_SCRATCH4 mfspr r12, SPRN_SPRG_SCRATCH3 - mfspr r11, SPRN_SPRG_SCRATCH1 - mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH6 + mfspr r10, SPRN_SPRG_SCRATCH5 b InstructionAccess - EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1300, Trap_13, unknown_exception) + EXCEPTION(0x1400, Trap_14, unknown_exception) + EXCEPTION(0x1500, Trap_15, unknown_exception) + EXCEPTION(0x1600, Trap_16, unknown_exception) + EXCEPTION(0x1700, Trap_17, unknown_exception) + EXCEPTION(0x1800, Trap_18, unknown_exception) + EXCEPTION(0x1900, Trap_19, unknown_exception) + EXCEPTION(0x1A00, Trap_1A, unknown_exception) + EXCEPTION(0x1B00, Trap_1B, unknown_exception) + EXCEPTION(0x1C00, Trap_1C, unknown_exception) + EXCEPTION(0x1D00, Trap_1D, unknown_exception) + EXCEPTION(0x1E00, Trap_1E, unknown_exception) + EXCEPTION(0x1F00, Trap_1F, unknown_exception) /* Check for a single step debug exception while in an exception * handler before state has been saved. This is to catch the case @@ -435,7 +455,7 @@ _ENTRY(saved_ksp_limit) */ /* 0x2000 - Debug Exception */ START_EXCEPTION(0x2000, DebugTrap) - CRITICAL_EXCEPTION_PROLOG + CRITICAL_EXCEPTION_PROLOG 0x2000 DebugTrap /* * If this is a single step or branch-taken exception in an @@ -477,32 +497,35 @@ _ENTRY(saved_ksp_limit) /* continue normal handling for a critical exception... */ 2: mfspr r4,SPRN_DBSR stw r4,_ESR(r11) /* DebugException takes DBSR in _ESR */ - addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_TEMPLATE(DebugException, 0x2002, \ - (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - crit_transfer_to_handler, ret_from_crit_exc) + prepare_transfer_to_handler + bl DebugException + b ret_from_crit_exc /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */ + __HEAD Decrementer: - EXCEPTION_PROLOG + EXCEPTION_PROLOG 0x1000 Decrementer lis r0,TSR_PIS@h mtspr SPRN_TSR,r0 /* Clear the PIT exception */ - addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_LITE(0x1000, timer_interrupt) + prepare_transfer_to_handler + bl timer_interrupt + b interrupt_return /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */ + __HEAD FITException: - EXCEPTION_PROLOG - addi r3,r1,STACK_FRAME_OVERHEAD; - EXC_XFER_STD(0x1010, unknown_exception) + EXCEPTION_PROLOG 0x1010 FITException + prepare_transfer_to_handler + bl unknown_exception + b interrupt_return /* Watchdog Timer (WDT) Exception. (from 0x1020) */ + __HEAD WDTException: - CRITICAL_EXCEPTION_PROLOG; - addi r3,r1,STACK_FRAME_OVERHEAD; - EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2, - (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), - crit_transfer_to_handler, ret_from_crit_exc) + CRITICAL_EXCEPTION_PROLOG 0x1020 WDTException + prepare_transfer_to_handler + bl WatchdogException + b ret_from_crit_exc /* Other PowerPC processors, namely those derived from the 6xx-series * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved. @@ -510,6 +533,7 @@ WDTException: * reserved. */ + __HEAD /* Damn, I came up one instruction too many to fit into the * exception space :-). Both the instruction and data TLB * miss get to this point to load the TLB. @@ -543,13 +567,12 @@ finish_tlb_load: /* Done...restore registers and get out of here. */ - mfspr r9, SPRN_SPRG_SCRATCH5 - mtspr SPRN_PID, r9 - mtcr r12 + mtspr SPRN_PID, r12 + mtcrf 0x80, r12 mfspr r9, SPRN_SPRG_SCRATCH4 mfspr r12, SPRN_SPRG_SCRATCH3 - mfspr r11, SPRN_SPRG_SCRATCH1 - mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH6 + mfspr r10, SPRN_SPRG_SCRATCH5 rfi /* Should sync shadow TLBs */ b . /* prevent prefetch past rfi */ diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 813fa305c33b..5c106ac36626 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -263,8 +263,7 @@ interrupt_base: INSTRUCTION_STORAGE_EXCEPTION /* External Input Interrupt */ - EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, \ - do_IRQ, EXC_XFER_LITE) + EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, do_IRQ) /* Alignment Interrupt */ ALIGNMENT_EXCEPTION @@ -277,7 +276,7 @@ interrupt_base: FP_UNAVAILABLE_EXCEPTION #else EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \ - FloatingPointUnavailable, unknown_exception, EXC_XFER_STD) + FloatingPointUnavailable, unknown_exception) #endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) @@ -285,15 +284,14 @@ interrupt_base: /* Auxiliary Processor Unavailable Interrupt */ EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \ - AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_STD) + AuxillaryProcessorUnavailable, unknown_exception) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \ - unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, unknown_exception) /* Watchdog Timer Interrupt */ /* TODO: Add watchdog support */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 46dff3f9c31f..7d445e4342c0 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -29,6 +29,13 @@ #include #include #include +#include + +/* + * Value for the bits that have fixed value in RPN entries. + * Also used for tagging DAR for DTLBerror. + */ +#define RPN_PATTERN 0x00f0 #include "head_32.h" @@ -42,12 +49,6 @@ #endif .endm -/* - * Value for the bits that have fixed value in RPN entries. - * Also used for tagging DAR for DTLBerror. - */ -#define RPN_PATTERN 0x00f0 - #define PAGE_SHIFT_512K 19 #define PAGE_SHIFT_8M 23 @@ -118,56 +119,54 @@ instruction_counter: #endif /* System reset */ - EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD) + EXCEPTION(INTERRUPT_SYSTEM_RESET, Reset, system_reset_exception) /* Machine check */ - . = 0x200 -MachineCheck: - EXCEPTION_PROLOG handle_dar_dsisr=1 - save_dar_dsisr_on_stack r4, r5, r11 - li r6, RPN_PATTERN - mtspr SPRN_DAR, r6 /* Tag DAR, to be used in DTLB Error */ - addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0x200, machine_check_exception) + START_EXCEPTION(INTERRUPT_MACHINE_CHECK, MachineCheck) + EXCEPTION_PROLOG INTERRUPT_MACHINE_CHECK MachineCheck handle_dar_dsisr=1 + prepare_transfer_to_handler + bl machine_check_exception + b interrupt_return /* External interrupt */ - EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + EXCEPTION(INTERRUPT_EXTERNAL, HardwareInterrupt, do_IRQ) /* Alignment exception */ - . = 0x600 -Alignment: - EXCEPTION_PROLOG handle_dar_dsisr=1 - save_dar_dsisr_on_stack r4, r5, r11 - li r6, RPN_PATTERN - mtspr SPRN_DAR, r6 /* Tag DAR, to be used in DTLB Error */ - addi r3,r1,STACK_FRAME_OVERHEAD - b .Lalignment_exception_ool + START_EXCEPTION(INTERRUPT_ALIGNMENT, Alignment) + EXCEPTION_PROLOG INTERRUPT_ALIGNMENT Alignment handle_dar_dsisr=1 + prepare_transfer_to_handler + bl alignment_exception + REST_NVGPRS(r1) + b interrupt_return /* Program check exception */ - EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) + START_EXCEPTION(INTERRUPT_PROGRAM, ProgramCheck) + EXCEPTION_PROLOG INTERRUPT_PROGRAM ProgramCheck + prepare_transfer_to_handler + bl program_check_exception + REST_NVGPRS(r1) + b interrupt_return /* Decrementer */ - EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - - /* With VMAP_STACK there's not enough room for this at 0x600 */ - . = 0xa00 -.Lalignment_exception_ool: - EXC_XFER_STD(0x600, alignment_exception) + EXCEPTION(INTERRUPT_DECREMENTER, Decrementer, timer_interrupt) /* System call */ - . = 0xc00 -SystemCall: - SYSCALL_ENTRY 0xc00 + START_EXCEPTION(INTERRUPT_SYSCALL, SystemCall) + SYSCALL_ENTRY INTERRUPT_SYSCALL /* Single step - not used on 601 */ - EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) + EXCEPTION(INTERRUPT_TRACE, SingleStep, single_step_exception) /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. */ - EXCEPTION(0x1000, SoftEmu, emulation_assist_interrupt, EXC_XFER_STD) + START_EXCEPTION(INTERRUPT_SOFT_EMU_8xx, SoftEmu) + EXCEPTION_PROLOG INTERRUPT_SOFT_EMU_8xx SoftEmu + prepare_transfer_to_handler + bl emulation_assist_interrupt + REST_NVGPRS(r1) + b interrupt_return - . = 0x1100 /* * For the MPC8xx, this is a software tablewalk to load the instruction * TLB. The task switch loads the M_TWB register with the pointer to the first @@ -189,7 +188,7 @@ SystemCall: #define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp) #endif -InstructionTLBMiss: + START_EXCEPTION(INTERRUPT_INST_TLB_MISS_8xx, InstructionTLBMiss) mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 @@ -245,8 +244,7 @@ InstructionTLBMiss: rfi #endif - . = 0x1200 -DataStoreTLBMiss: + START_EXCEPTION(INTERRUPT_DATA_TLB_MISS_8xx, DataStoreTLBMiss) mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 mfcr r11 @@ -309,83 +307,74 @@ DataStoreTLBMiss: * to many reasons, such as executing guarded memory or illegal instruction * addresses. There is nothing to do but handle a big time error fault. */ - . = 0x1300 -InstructionTLBError: - EXCEPTION_PROLOG + START_EXCEPTION(INTERRUPT_INST_TLB_ERROR_8xx, InstructionTLBError) + /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ + EXCEPTION_PROLOG INTERRUPT_INST_STORAGE InstructionTLBError andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ andis. r10,r9,SRR1_ISI_NOPT@h beq+ .Litlbie tlbie r12 - /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ .Litlbie: stw r12, _DAR(r11) stw r5, _DSISR(r11) - EXC_XFER_LITE(0x400, handle_page_fault) + prepare_transfer_to_handler + bl do_page_fault + b interrupt_return /* This is the data TLB error on the MPC8xx. This could be due to * many reasons, including a dirty update to a pte. We bail out to * a higher level function that can handle it. */ - . = 0x1400 -DataTLBError: + START_EXCEPTION(INTERRUPT_DATA_TLB_ERROR_8xx, DataTLBError) EXCEPTION_PROLOG_0 handle_dar_dsisr=1 mfspr r11, SPRN_DAR cmpwi cr1, r11, RPN_PATTERN beq- cr1, FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ -#ifdef CONFIG_VMAP_STACK - li r11, RPN_PATTERN - mtspr SPRN_DAR, r11 /* Tag DAR, to be used in DTLB Error */ -#endif EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 handle_dar_dsisr=1 - get_and_save_dar_dsisr_on_stack r4, r5, r11 + /* 0x300 is DataAccess exception, needed by bad_page_fault() */ + EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataTLBError handle_dar_dsisr=1 + lwz r4, _DAR(r11) + lwz r5, _DSISR(r11) andis. r10,r5,DSISR_NOHPTE@h beq+ .Ldtlbie tlbie r4 .Ldtlbie: -#ifndef CONFIG_VMAP_STACK - li r10,RPN_PATTERN - mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ -#endif - /* 0x300 is DataAccess exception, needed by bad_page_fault() */ - EXC_XFER_LITE(0x300, handle_page_fault) + prepare_transfer_to_handler + bl do_page_fault + b interrupt_return -stack_overflow: +#ifdef CONFIG_VMAP_STACK vmap_stack_overflow_exception +#endif /* On the MPC8xx, these next four traps are used for development * support of breakpoints and such. Someday I will get around to * using them. */ -do_databreakpoint: - EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 handle_dar_dsisr=1 - addi r3,r1,STACK_FRAME_OVERHEAD - mfspr r4,SPRN_BAR - stw r4,_DAR(r11) -#ifndef CONFIG_VMAP_STACK - mfspr r5,SPRN_DSISR - stw r5,_DSISR(r11) -#endif - EXC_XFER_STD(0x1c00, do_break) - - . = 0x1c00 -DataBreakpoint: + START_EXCEPTION(INTERRUPT_DATA_BREAKPOINT_8xx, DataBreakpoint) EXCEPTION_PROLOG_0 handle_dar_dsisr=1 mfspr r11, SPRN_SRR0 cmplwi cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l cmplwi cr7, r11, (.Litlbie - PAGE_OFFSET)@l cror 4*cr1+eq, 4*cr1+eq, 4*cr7+eq - bne cr1, do_databreakpoint + bne cr1, 1f mtcr r10 mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r11, SPRN_SPRG_SCRATCH1 rfi +1: EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 INTERRUPT_DATA_BREAKPOINT_8xx DataBreakpoint handle_dar_dsisr=1 + mfspr r4,SPRN_BAR + stw r4,_DAR(r11) + prepare_transfer_to_handler + bl do_break + REST_NVGPRS(r1) + b interrupt_return + #ifdef CONFIG_PERF_EVENTS - . = 0x1d00 -InstructionBreakpoint: + START_EXCEPTION(INTERRUPT_INST_BREAKPOINT_8xx, InstructionBreakpoint) mtspr SPRN_SPRG_SCRATCH0, r10 lwz r10, (instruction_counter - PAGE_OFFSET)@l(0) addi r10, r10, -1 @@ -396,11 +385,12 @@ InstructionBreakpoint: mfspr r10, SPRN_SPRG_SCRATCH0 rfi #else - EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD) + EXCEPTION(INTERRUPT_INST_BREAKPOINT_8xx, Trap_1d, unknown_exception) #endif - EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1e00, Trap_1e, unknown_exception) + EXCEPTION(0x1f00, Trap_1f, unknown_exception) + __HEAD . = 0x2000 /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions @@ -510,14 +500,10 @@ FixupDAR:/* Entry point for dcbx workaround. */ 152: mfdar r11 mtctr r11 /* restore ctr reg from DAR */ -#ifdef CONFIG_VMAP_STACK mfspr r11, SPRN_SPRG_THREAD stw r10, DAR(r11) mfspr r10, SPRN_DSISR stw r10, DSISR(r11) -#else - mtdar r10 /* save fault EA to DAR */ -#endif mfspr r10,SPRN_M_TW b DARFixed /* Go back to normal TLB handling */ @@ -819,7 +805,7 @@ EXPORT_SYMBOL(empty_zero_page) swapper_pg_dir: .space PGD_TABLE_SIZE -/* Room for two PTE table poiners, usually the kernel and current user +/* Room for two PTE table pointers, usually the kernel and current user * pointer to their respective root page table (pgdir). */ .globl abatron_pteptrs diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 565e84e20a72..065178f19a3d 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -31,6 +31,7 @@ #include #include #include +#include #include "head_32.h" @@ -239,7 +240,7 @@ __secondary_hold_acknowledge: /* System reset */ /* core99 pmac starts the seconary here by changing the vector, and putting it back to what it was (unknown_async_exception) when done. */ - EXCEPTION(0x100, Reset, unknown_async_exception, EXC_XFER_STD) + EXCEPTION(INTERRUPT_SYSTEM_RESET, Reset, unknown_async_exception) /* Machine check */ /* @@ -255,40 +256,28 @@ __secondary_hold_acknowledge: * pointer when we take an exception from supervisor mode.) * -- paulus. */ - . = 0x200 - DO_KVM 0x200 -MachineCheck: + START_EXCEPTION(INTERRUPT_MACHINE_CHECK, MachineCheck) EXCEPTION_PROLOG_0 #ifdef CONFIG_PPC_CHRP -#ifdef CONFIG_VMAP_STACK mtspr SPRN_SPRG_SCRATCH2,r1 mfspr r1, SPRN_SPRG_THREAD lwz r1, RTAS_SP(r1) cmpwi cr1, r1, 0 bne cr1, 7f mfspr r1, SPRN_SPRG_SCRATCH2 -#else - mfspr r11, SPRN_SPRG_THREAD - lwz r11, RTAS_SP(r11) - cmpwi cr1, r11, 0 - bne cr1, 7f -#endif #endif /* CONFIG_PPC_CHRP */ - EXCEPTION_PROLOG_1 for_rtas=1 -7: EXCEPTION_PROLOG_2 - addi r3,r1,STACK_FRAME_OVERHEAD + EXCEPTION_PROLOG_1 +7: EXCEPTION_PROLOG_2 0x200 MachineCheck #ifdef CONFIG_PPC_CHRP - beq cr1, machine_check_tramp + beq cr1, 1f twi 31, 0, 0 -#else - b machine_check_tramp #endif +1: prepare_transfer_to_handler + bl machine_check_exception + b interrupt_return /* Data access exception. */ - . = 0x300 - DO_KVM 0x300 -DataAccess: -#ifdef CONFIG_VMAP_STACK + START_EXCEPTION(INTERRUPT_DATA_STORAGE, DataAccess) #ifdef CONFIG_PPC_BOOK3S_604 BEGIN_MMU_FTR_SECTION mtspr SPRN_SPRG_SCRATCH2,r10 @@ -309,30 +298,20 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) #endif 1: EXCEPTION_PROLOG_0 handle_dar_dsisr=1 EXCEPTION_PROLOG_1 - b handle_page_fault_tramp_1 -#else /* CONFIG_VMAP_STACK */ - EXCEPTION_PROLOG handle_dar_dsisr=1 - get_and_save_dar_dsisr_on_stack r4, r5, r11 -#ifdef CONFIG_PPC_BOOK3S_604 -BEGIN_MMU_FTR_SECTION - andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h - bne handle_page_fault_tramp_2 /* if not, try to put a PTE */ - rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */ - bl hash_page - b handle_page_fault_tramp_1 -MMU_FTR_SECTION_ELSE -#endif - b handle_page_fault_tramp_2 -#ifdef CONFIG_PPC_BOOK3S_604 -ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) -#endif -#endif /* CONFIG_VMAP_STACK */ + EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataAccess handle_dar_dsisr=1 + prepare_transfer_to_handler + lwz r5, _DSISR(r11) + andis. r0, r5, DSISR_DABRMATCH@h + bne- 1f + bl do_page_fault + b interrupt_return +1: bl do_break + REST_NVGPRS(r1) + b interrupt_return + /* Instruction access exception. */ - . = 0x400 - DO_KVM 0x400 -InstructionAccess: -#ifdef CONFIG_VMAP_STACK + START_EXCEPTION(INTERRUPT_INST_STORAGE, InstructionAccess) mtspr SPRN_SPRG_SCRATCH0,r10 mtspr SPRN_SPRG_SCRATCH1,r11 mfspr r10, SPRN_SPRG_THREAD @@ -352,43 +331,35 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) andi. r11, r11, MSR_PR EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 -#else /* CONFIG_VMAP_STACK */ - EXCEPTION_PROLOG - andis. r0,r9,SRR1_ISI_NOPT@h /* no pte found? */ - beq 1f /* if so, try to put a PTE */ - li r3,0 /* into the hash table */ - mr r4,r12 /* SRR0 is fault address */ -#ifdef CONFIG_PPC_BOOK3S_604 -BEGIN_MMU_FTR_SECTION - bl hash_page -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) -#endif -#endif /* CONFIG_VMAP_STACK */ + EXCEPTION_PROLOG_2 INTERRUPT_INST_STORAGE InstructionAccess andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ stw r5, _DSISR(r11) stw r12, _DAR(r11) - EXC_XFER_LITE(0x400, handle_page_fault) + prepare_transfer_to_handler + bl do_page_fault + b interrupt_return /* External interrupt */ - EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + EXCEPTION(INTERRUPT_EXTERNAL, HardwareInterrupt, do_IRQ) /* Alignment exception */ - . = 0x600 - DO_KVM 0x600 -Alignment: - EXCEPTION_PROLOG handle_dar_dsisr=1 - save_dar_dsisr_on_stack r4, r5, r11 - addi r3,r1,STACK_FRAME_OVERHEAD - b alignment_exception_tramp + START_EXCEPTION(INTERRUPT_ALIGNMENT, Alignment) + EXCEPTION_PROLOG INTERRUPT_ALIGNMENT Alignment handle_dar_dsisr=1 + prepare_transfer_to_handler + bl alignment_exception + REST_NVGPRS(r1) + b interrupt_return /* Program check exception */ - EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) + START_EXCEPTION(INTERRUPT_PROGRAM, ProgramCheck) + EXCEPTION_PROLOG INTERRUPT_PROGRAM ProgramCheck + prepare_transfer_to_handler + bl program_check_exception + REST_NVGPRS(r1) + b interrupt_return /* Floating-point unavailable */ - . = 0x800 - DO_KVM 0x800 -FPUnavailable: + START_EXCEPTION(0x800, FPUnavailable) #ifdef CONFIG_PPC_FPU BEGIN_FTR_SECTION /* @@ -397,30 +368,29 @@ BEGIN_FTR_SECTION */ b ProgramCheck END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) - EXCEPTION_PROLOG + EXCEPTION_PROLOG INTERRUPT_FP_UNAVAIL FPUnavailable beq 1f bl load_up_fpu /* if from user, just load it up */ b fast_exception_return -1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception) +1: prepare_transfer_to_handler + bl kernel_fp_unavailable_exception + b interrupt_return #else b ProgramCheck #endif /* Decrementer */ - EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) + EXCEPTION(INTERRUPT_DECREMENTER, Decrementer, timer_interrupt) - EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD) - EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD) + EXCEPTION(0xa00, Trap_0a, unknown_exception) + EXCEPTION(0xb00, Trap_0b, unknown_exception) /* System call */ - . = 0xc00 - DO_KVM 0xc00 -SystemCall: - SYSCALL_ENTRY 0xc00 + START_EXCEPTION(INTERRUPT_SYSCALL, SystemCall) + SYSCALL_ENTRY INTERRUPT_SYSCALL - EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD) + EXCEPTION(INTERRUPT_TRACE, SingleStep, single_step_exception) + EXCEPTION(0xe00, Trap_0e, unknown_exception) /* * The Altivec unavailable trap is at 0x0f20. Foo. @@ -430,19 +400,18 @@ SystemCall: * non-altivec kernel running on a machine with altivec just * by executing an altivec instruction. */ - . = 0xf00 - DO_KVM 0xf00 + START_EXCEPTION(INTERRUPT_PERFMON, PerformanceMonitorTrap) b PerformanceMonitor - . = 0xf20 - DO_KVM 0xf20 + START_EXCEPTION(INTERRUPT_ALTIVEC_UNAVAIL, AltiVecUnavailableTrap) b AltiVecUnavailable + __HEAD /* * Handle TLB miss for instruction on 603/603e. * Note: we get an alternate set of r0 - r3 to use automatically. */ - . = 0x1000 + . = INTERRUPT_INST_TLB_MISS_603 InstructionTLBMiss: /* * r0: scratch @@ -508,7 +477,7 @@ InstructionAddressInvalid: /* * Handle TLB miss for DATA Load operation on 603/603e */ - . = 0x1100 + . = INTERRUPT_DATA_LOAD_TLB_MISS_603 DataLoadTLBMiss: /* * r0: scratch @@ -586,7 +555,7 @@ DataAddressInvalid: /* * Handle TLB miss for DATA Store on 603/603e */ - . = 0x1200 + . = INTERRUPT_DATA_STORE_TLB_MISS_603 DataStoreTLBMiss: /* * r0: scratch @@ -650,57 +619,39 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) #define TAUException unknown_async_exception #endif - EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD) - EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD) - EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD) - EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD) - EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_STD) - EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception) + EXCEPTION(0x1400, SMI, SMIException) + EXCEPTION(0x1500, Trap_15, unknown_exception) + EXCEPTION(0x1600, Trap_16, altivec_assist_exception) + EXCEPTION(0x1700, Trap_17, TAUException) + EXCEPTION(0x1800, Trap_18, unknown_exception) + EXCEPTION(0x1900, Trap_19, unknown_exception) + EXCEPTION(0x1a00, Trap_1a, unknown_exception) + EXCEPTION(0x1b00, Trap_1b, unknown_exception) + EXCEPTION(0x1c00, Trap_1c, unknown_exception) + EXCEPTION(0x1d00, Trap_1d, unknown_exception) + EXCEPTION(0x1e00, Trap_1e, unknown_exception) + EXCEPTION(0x1f00, Trap_1f, unknown_exception) + EXCEPTION(0x2000, RunMode, RunModeException) + EXCEPTION(0x2100, Trap_21, unknown_exception) + EXCEPTION(0x2200, Trap_22, unknown_exception) + EXCEPTION(0x2300, Trap_23, unknown_exception) + EXCEPTION(0x2400, Trap_24, unknown_exception) + EXCEPTION(0x2500, Trap_25, unknown_exception) + EXCEPTION(0x2600, Trap_26, unknown_exception) + EXCEPTION(0x2700, Trap_27, unknown_exception) + EXCEPTION(0x2800, Trap_28, unknown_exception) + EXCEPTION(0x2900, Trap_29, unknown_exception) + EXCEPTION(0x2a00, Trap_2a, unknown_exception) + EXCEPTION(0x2b00, Trap_2b, unknown_exception) + EXCEPTION(0x2c00, Trap_2c, unknown_exception) + EXCEPTION(0x2d00, Trap_2d, unknown_exception) + EXCEPTION(0x2e00, Trap_2e, unknown_exception) + EXCEPTION(0x2f00, Trap_2f, unknown_exception) + __HEAD . = 0x3000 -machine_check_tramp: - EXC_XFER_STD(0x200, machine_check_exception) - -alignment_exception_tramp: - EXC_XFER_STD(0x600, alignment_exception) - -handle_page_fault_tramp_1: -#ifdef CONFIG_VMAP_STACK - EXCEPTION_PROLOG_2 handle_dar_dsisr=1 -#endif - lwz r5, _DSISR(r11) - /* fall through */ -handle_page_fault_tramp_2: - andis. r0, r5, DSISR_DABRMATCH@h - bne- 1f - EXC_XFER_LITE(0x300, handle_page_fault) -1: EXC_XFER_STD(0x300, do_break) - -#ifdef CONFIG_VMAP_STACK #ifdef CONFIG_PPC_BOOK3S_604 .macro save_regs_thread thread stw r0, THR0(\thread) @@ -775,26 +726,31 @@ fast_hash_page_return: rfi #endif /* CONFIG_PPC_BOOK3S_604 */ -stack_overflow: +#ifdef CONFIG_VMAP_STACK vmap_stack_overflow_exception #endif + __HEAD AltiVecUnavailable: - EXCEPTION_PROLOG + EXCEPTION_PROLOG 0xf20 AltiVecUnavailable #ifdef CONFIG_ALTIVEC beq 1f bl load_up_altivec /* if from user, just load it up */ b fast_exception_return #endif /* CONFIG_ALTIVEC */ -1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_LITE(0xf20, altivec_unavailable_exception) +1: prepare_transfer_to_handler + bl altivec_unavailable_exception + b interrupt_return + __HEAD PerformanceMonitor: - EXCEPTION_PROLOG - addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_STD(0xf00, performance_monitor_exception) + EXCEPTION_PROLOG 0xf00 PerformanceMonitor + prepare_transfer_to_handler + bl performance_monitor_exception + b interrupt_return + __HEAD /* * This code is jumped to from the startup code to copy * the kernel image to physical address PHYSICAL_START. diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 47857795f50a..f82470091697 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -44,7 +44,7 @@ END_BTB_FLUSH_SECTION #endif -#define NORMAL_EXCEPTION_PROLOG(intno) \ +#define NORMAL_EXCEPTION_PROLOG(trapno, intno) \ mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \ mfspr r10, SPRN_SPRG_THREAD; \ stw r11, THREAD_NORMSAVE(0)(r10); \ @@ -53,6 +53,8 @@ END_BTB_FLUSH_SECTION mfspr r11, SPRN_SRR1; \ DO_KVM BOOKE_INTERRUPT_##intno SPRN_SRR1; \ andi. r11, r11, MSR_PR; /* check whether user or kernel */\ + LOAD_REG_IMMEDIATE(r11, MSR_KERNEL); \ + mtmsr r11; \ mr r11, r1; \ beq 1f; \ BOOKE_CLEAR_BTB(r11) \ @@ -76,12 +78,39 @@ END_BTB_FLUSH_SECTION stw r1, 0(r11); \ mr r1, r11; \ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ - stw r0,GPR0(r11); \ - lis r10, STACK_FRAME_REGS_MARKER@ha;/* exception frame marker */ \ - addi r10, r10, STACK_FRAME_REGS_MARKER@l; \ - stw r10, 8(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) + COMMON_EXCEPTION_PROLOG_END trapno + +.macro COMMON_EXCEPTION_PROLOG_END trapno + stw r0,GPR0(r1) + lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + addi r10, r10, STACK_FRAME_REGS_MARKER@l + stw r10, 8(r1) + li r10, \trapno + stw r10,_TRAP(r1) + SAVE_4GPRS(3, r1) + SAVE_2GPRS(7, r1) + SAVE_NVGPRS(r1) + stw r2,GPR2(r1) + stw r12,_NIP(r1) + stw r9,_MSR(r1) + mfctr r10 + mfspr r2,SPRN_SPRG_THREAD + stw r10,_CTR(r1) + tovirt(r2, r2) + mfspr r10,SPRN_XER + addi r2, r2, -THREAD + stw r10,_XER(r1) + addi r3,r1,STACK_FRAME_OVERHEAD +.endm + +.macro prepare_transfer_to_handler +#ifdef CONFIG_E500 + andi. r12,r9,MSR_PR + bne 777f + bl prepare_transfer_to_handler +777: +#endif +.endm .macro SYSCALL_ENTRY trapno intno srr1 mfspr r10, SPRN_SPRG_THREAD @@ -180,7 +209,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) * registers as the normal prolog above. Instead we use a portion of the * critical/machine check exception stack at low physical addresses. */ -#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, intno, exc_level_srr0, exc_level_srr1) \ +#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, trapno, intno, exc_level_srr0, exc_level_srr1) \ mtspr SPRN_SPRG_WSCRATCH_##exc_level,r8; \ BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \ stw r9,GPR9(r8); /* save various registers */\ @@ -192,6 +221,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) DO_KVM BOOKE_INTERRUPT_##intno exc_level_srr1; \ BOOKE_CLEAR_BTB(r10) \ andi. r11,r11,MSR_PR; \ + LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)); \ + mtmsr r11; \ mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ lwz r11, TASK_STACK - THREAD(r11); /* this thread's kernel stack */\ addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\ @@ -221,16 +252,44 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) stw r1,0(r11); \ mr r1,r11; \ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ - stw r0,GPR0(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) + COMMON_EXCEPTION_PROLOG_END trapno -#define CRITICAL_EXCEPTION_PROLOG(intno) \ - EXC_LEVEL_EXCEPTION_PROLOG(CRIT, intno, SPRN_CSRR0, SPRN_CSRR1) -#define DEBUG_EXCEPTION_PROLOG \ - EXC_LEVEL_EXCEPTION_PROLOG(DBG, DEBUG, SPRN_DSRR0, SPRN_DSRR1) -#define MCHECK_EXCEPTION_PROLOG \ - EXC_LEVEL_EXCEPTION_PROLOG(MC, MACHINE_CHECK, \ +#define SAVE_xSRR(xSRR) \ + mfspr r0,SPRN_##xSRR##0; \ + stw r0,_##xSRR##0(r1); \ + mfspr r0,SPRN_##xSRR##1; \ + stw r0,_##xSRR##1(r1) + + +.macro SAVE_MMU_REGS +#ifdef CONFIG_PPC_BOOK3E_MMU + mfspr r0,SPRN_MAS0 + stw r0,MAS0(r1) + mfspr r0,SPRN_MAS1 + stw r0,MAS1(r1) + mfspr r0,SPRN_MAS2 + stw r0,MAS2(r1) + mfspr r0,SPRN_MAS3 + stw r0,MAS3(r1) + mfspr r0,SPRN_MAS6 + stw r0,MAS6(r1) +#ifdef CONFIG_PHYS_64BIT + mfspr r0,SPRN_MAS7 + stw r0,MAS7(r1) +#endif /* CONFIG_PHYS_64BIT */ +#endif /* CONFIG_PPC_BOOK3E_MMU */ +#ifdef CONFIG_44x + mfspr r0,SPRN_MMUCR + stw r0,MMUCR(r1) +#endif +.endm + +#define CRITICAL_EXCEPTION_PROLOG(trapno, intno) \ + EXC_LEVEL_EXCEPTION_PROLOG(CRIT, trapno+2, intno, SPRN_CSRR0, SPRN_CSRR1) +#define DEBUG_EXCEPTION_PROLOG(trapno) \ + EXC_LEVEL_EXCEPTION_PROLOG(DBG, trapno+8, DEBUG, SPRN_DSRR0, SPRN_DSRR1) +#define MCHECK_EXCEPTION_PROLOG(trapno) \ + EXC_LEVEL_EXCEPTION_PROLOG(MC, trapno+4, MACHINE_CHECK, \ SPRN_MCSRR0, SPRN_MCSRR1) /* @@ -257,44 +316,34 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) .align 5; \ label: -#define EXCEPTION(n, intno, label, hdlr, xfer) \ +#define EXCEPTION(n, intno, label, hdlr) \ START_EXCEPTION(label); \ - NORMAL_EXCEPTION_PROLOG(intno); \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - xfer(n, hdlr) + NORMAL_EXCEPTION_PROLOG(n, intno); \ + prepare_transfer_to_handler; \ + bl hdlr; \ + b interrupt_return #define CRITICAL_EXCEPTION(n, intno, label, hdlr) \ START_EXCEPTION(label); \ - CRITICAL_EXCEPTION_PROLOG(intno); \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - crit_transfer_to_handler, ret_from_crit_exc) + CRITICAL_EXCEPTION_PROLOG(n, intno); \ + SAVE_MMU_REGS; \ + SAVE_xSRR(SRR); \ + prepare_transfer_to_handler; \ + bl hdlr; \ + b ret_from_crit_exc #define MCHECK_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(label); \ - MCHECK_EXCEPTION_PROLOG; \ + MCHECK_EXCEPTION_PROLOG(n); \ mfspr r5,SPRN_ESR; \ stw r5,_ESR(r11); \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - mcheck_transfer_to_handler, ret_from_mcheck_exc) - -#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ - li r10,trap; \ - stw r10,_TRAP(r11); \ - lis r10,msr@h; \ - ori r10,r10,msr@l; \ - bl tfer; \ - .long hdlr; \ - .long ret - -#define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \ - ret_from_except) + SAVE_xSRR(DSRR); \ + SAVE_xSRR(CSRR); \ + SAVE_MMU_REGS; \ + SAVE_xSRR(SRR); \ + prepare_transfer_to_handler; \ + bl hdlr; \ + b ret_from_mcheck_exc /* Check for a single step debug exception while in an exception * handler before state has been saved. This is to catch the case @@ -311,7 +360,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) */ #define DEBUG_DEBUG_EXCEPTION \ START_EXCEPTION(DebugDebug); \ - DEBUG_EXCEPTION_PROLOG; \ + DEBUG_EXCEPTION_PROLOG(2000); \ \ /* \ * If there is a single step or branch-taken exception in an \ @@ -360,12 +409,16 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) /* continue normal handling for a debug exception... */ \ 2: mfspr r4,SPRN_DBSR; \ stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), debug_transfer_to_handler, ret_from_debug_exc) + SAVE_xSRR(CSRR); \ + SAVE_MMU_REGS; \ + SAVE_xSRR(SRR); \ + prepare_transfer_to_handler; \ + bl DebugException; \ + b ret_from_debug_exc #define DEBUG_CRIT_EXCEPTION \ START_EXCEPTION(DebugCrit); \ - CRITICAL_EXCEPTION_PROLOG(DEBUG); \ + CRITICAL_EXCEPTION_PROLOG(2000,DEBUG); \ \ /* \ * If there is a single step or branch-taken exception in an \ @@ -414,58 +467,71 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) /* continue normal handling for a critical exception... */ \ 2: mfspr r4,SPRN_DBSR; \ stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc) + SAVE_MMU_REGS; \ + SAVE_xSRR(SRR); \ + prepare_transfer_to_handler; \ + bl DebugException; \ + b ret_from_crit_exc #define DATA_STORAGE_EXCEPTION \ START_EXCEPTION(DataStorage) \ - NORMAL_EXCEPTION_PROLOG(DATA_STORAGE); \ + NORMAL_EXCEPTION_PROLOG(0x300, DATA_STORAGE); \ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ stw r4, _DEAR(r11); \ - EXC_XFER_LITE(0x0300, handle_page_fault) + prepare_transfer_to_handler; \ + bl do_page_fault; \ + b interrupt_return #define INSTRUCTION_STORAGE_EXCEPTION \ START_EXCEPTION(InstructionStorage) \ - NORMAL_EXCEPTION_PROLOG(INST_STORAGE); \ + NORMAL_EXCEPTION_PROLOG(0x400, INST_STORAGE); \ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ stw r12, _DEAR(r11); /* Pass SRR0 as arg2 */ \ - EXC_XFER_LITE(0x0400, handle_page_fault) + prepare_transfer_to_handler; \ + bl do_page_fault; \ + b interrupt_return #define ALIGNMENT_EXCEPTION \ START_EXCEPTION(Alignment) \ - NORMAL_EXCEPTION_PROLOG(ALIGNMENT); \ + NORMAL_EXCEPTION_PROLOG(0x600, ALIGNMENT); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \ stw r4,_DEAR(r11); \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_STD(0x0600, alignment_exception) + prepare_transfer_to_handler; \ + bl alignment_exception; \ + REST_NVGPRS(r1); \ + b interrupt_return #define PROGRAM_EXCEPTION \ START_EXCEPTION(Program) \ - NORMAL_EXCEPTION_PROLOG(PROGRAM); \ + NORMAL_EXCEPTION_PROLOG(0x700, PROGRAM); \ mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \ stw r4,_ESR(r11); \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_STD(0x0700, program_check_exception) + prepare_transfer_to_handler; \ + bl program_check_exception; \ + REST_NVGPRS(r1); \ + b interrupt_return #define DECREMENTER_EXCEPTION \ START_EXCEPTION(Decrementer) \ - NORMAL_EXCEPTION_PROLOG(DECREMENTER); \ + NORMAL_EXCEPTION_PROLOG(0x900, DECREMENTER); \ lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \ mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_LITE(0x0900, timer_interrupt) + prepare_transfer_to_handler; \ + bl timer_interrupt; \ + b interrupt_return #define FP_UNAVAILABLE_EXCEPTION \ START_EXCEPTION(FloatingPointUnavailable) \ - NORMAL_EXCEPTION_PROLOG(FP_UNAVAIL); \ + NORMAL_EXCEPTION_PROLOG(0x800, FP_UNAVAIL); \ beq 1f; \ bl load_up_fpu; /* if from user, just load it up */ \ b fast_exception_return; \ -1: addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_STD(0x800, kernel_fp_unavailable_exception) +1: prepare_transfer_to_handler; \ + bl kernel_fp_unavailable_exception; \ + b interrupt_return #else /* __ASSEMBLY__ */ struct exception_regs { @@ -481,7 +547,6 @@ struct exception_regs { unsigned long csrr1; unsigned long dsrr0; unsigned long dsrr1; - unsigned long saved_ksp_limit; }; /* ensure this structure is always sized to a multiple of the stack alignment */ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 3f4a40cccef5..a1a5c3f10dc4 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -113,7 +113,7 @@ _ENTRY(_start); 1: /* - * We have the runtime (virutal) address of our base. + * We have the runtime (virtual) address of our base. * We calculate our shift of offset from a 64M page. * We could map the 64M page we belong to at PAGE_OFFSET and * get going from there. @@ -363,23 +363,26 @@ interrupt_base: /* Data Storage Interrupt */ START_EXCEPTION(DataStorage) - NORMAL_EXCEPTION_PROLOG(DATA_STORAGE) + NORMAL_EXCEPTION_PROLOG(0x300, DATA_STORAGE) mfspr r5,SPRN_ESR /* Grab the ESR, save it */ stw r5,_ESR(r11) mfspr r4,SPRN_DEAR /* Grab the DEAR, save it */ stw r4, _DEAR(r11) andis. r10,r5,(ESR_ILK|ESR_DLK)@h bne 1f - EXC_XFER_LITE(0x0300, handle_page_fault) + prepare_transfer_to_handler + bl do_page_fault + b interrupt_return 1: - addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_LITE(0x0300, CacheLockingException) + prepare_transfer_to_handler + bl CacheLockingException + b interrupt_return /* Instruction Storage Interrupt */ INSTRUCTION_STORAGE_EXCEPTION /* External Input Interrupt */ - EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ, EXC_XFER_LITE) + EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ) /* Alignment Interrupt */ ALIGNMENT_EXCEPTION @@ -391,8 +394,7 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else - EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ - unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, unknown_exception) #endif /* System Call Interrupt */ @@ -400,16 +402,14 @@ interrupt_base: SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL SPRN_SRR1 /* Auxiliary Processor Unavailable Interrupt */ - EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \ - unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, unknown_exception) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x3100, FIT, FixedIntervalTimer, \ - unknown_exception, EXC_XFER_STD) + EXCEPTION(0x3100, FIT, FixedIntervalTimer, unknown_exception) /* Watchdog Timer Interrupt */ #ifdef CONFIG_BOOKE_WDT @@ -497,7 +497,7 @@ END_BTB_FLUSH_SECTION #endif #endif - bne 2f /* Bail if permission/valid mismach */ + bne 2f /* Bail if permission/valid mismatch */ /* Jump to common tlb load */ b finish_tlb_load @@ -592,7 +592,7 @@ END_BTB_FLUSH_SECTION #endif #endif - bne 2f /* Bail if permission mismach */ + bne 2f /* Bail if permission mismatch */ /* Jump to common TLB load point */ b finish_tlb_load @@ -614,38 +614,44 @@ END_BTB_FLUSH_SECTION #ifdef CONFIG_SPE /* SPE Unavailable */ START_EXCEPTION(SPEUnavailable) - NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL) + NORMAL_EXCEPTION_PROLOG(0x2010, SPE_UNAVAIL) beq 1f bl load_up_spe b fast_exception_return -1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_LITE(0x2010, KernelSPE) +1: prepare_transfer_to_handler + bl KernelSPE + b interrupt_return #elif defined(CONFIG_SPE_POSSIBLE) - EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ - unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, unknown_exception) #endif /* CONFIG_SPE_POSSIBLE */ /* SPE Floating Point Data */ #ifdef CONFIG_SPE - EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, - SPEFloatingPointException, EXC_XFER_STD) + START_EXCEPTION(SPEFloatingPointData) + NORMAL_EXCEPTION_PROLOG(0x2030, SPE_FP_DATA) + prepare_transfer_to_handler + bl SPEFloatingPointException + REST_NVGPRS(r1) + b interrupt_return /* SPE Floating Point Round */ - EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ - SPEFloatingPointRoundException, EXC_XFER_STD) + START_EXCEPTION(SPEFloatingPointRound) + NORMAL_EXCEPTION_PROLOG(0x2050, SPE_FP_ROUND) + prepare_transfer_to_handler + bl SPEFloatingPointRoundException + REST_NVGPRS(r1) + b interrupt_return #elif defined(CONFIG_SPE_POSSIBLE) - EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, - unknown_exception, EXC_XFER_STD) - EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ - unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, unknown_exception) + EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, unknown_exception) #endif /* CONFIG_SPE_POSSIBLE */ /* Performance Monitor */ EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \ - performance_monitor_exception, EXC_XFER_STD) + performance_monitor_exception) - EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception, EXC_XFER_STD) + EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception) CRITICAL_EXCEPTION(0x2080, DOORBELL_CRITICAL, \ CriticalDoorbell, unknown_exception) @@ -660,10 +666,10 @@ END_BTB_FLUSH_SECTION unknown_exception) /* Hypercall */ - EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_STD) + EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception) /* Embedded Hypervisor Privilege */ - EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_STD) + EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception) interrupt_end: @@ -854,7 +860,7 @@ KernelSPE: lwz r5,_NIP(r1) bl printk #endif - b ret_from_except + b interrupt_return #ifdef CONFIG_PRINTK 87: .string "SPE used in kernel (task=%p, pc=%x) \n" #endif diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c index 867ee4aa026a..675d1f66ab72 100644 --- a/arch/powerpc/kernel/hw_breakpoint_constraints.c +++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c @@ -141,7 +141,7 @@ void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, { struct instruction_op op; - if (__get_user_instr_inatomic(*instr, (void __user *)regs->nip)) + if (__get_user_instr(*instr, (void __user *)regs->nip)) return; analyse_instr(&op, regs, *instr); diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 69df840f7253..13cad9297d82 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -145,9 +145,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) /* * Return from NAP/DOZE mode, restore some CPU specific registers, - * we are called with DR/IR still off and r2 containing physical - * address of current. R11 points to the exception frame (physical - * address). We have to preserve r10. + * R11 points to the exception frame. We have to preserve r10. */ _GLOBAL(power_save_ppc32_restore) lwz r9,_LINK(r11) /* interrupted in ppc6xx_idle: */ @@ -166,11 +164,7 @@ BEGIN_FTR_SECTION mfspr r9,SPRN_HID0 andis. r9,r9,HID0_NAP@h beq 1f -#ifdef CONFIG_VMAP_STACK addis r9, r11, nap_save_msscr0@ha -#else - addis r9,r11,(nap_save_msscr0-KERNELBASE)@ha -#endif lwz r9,nap_save_msscr0@l(r9) mtspr SPRN_MSSCR0, r9 sync @@ -178,15 +172,11 @@ BEGIN_FTR_SECTION 1: END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR) BEGIN_FTR_SECTION -#ifdef CONFIG_VMAP_STACK addis r9, r11, nap_save_hid1@ha -#else - addis r9,r11,(nap_save_hid1-KERNELBASE)@ha -#endif lwz r9,nap_save_hid1@l(r9) mtspr SPRN_HID1, r9 END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX) - b transfer_to_handler_cont + blr _ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore) .data diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index f9e6d83e6720..abb719b21cae 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -209,4 +209,8 @@ _GLOBAL(power4_idle_nap) mtmsrd r7 isync b 1b + + .globl power4_idle_nap_return +power4_idle_nap_return: + blr #endif diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S index 72c85b6f3898..9e1bc4502c50 100644 --- a/arch/powerpc/kernel/idle_e500.S +++ b/arch/powerpc/kernel/idle_e500.S @@ -74,20 +74,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) /* * Return from NAP/DOZE mode, restore some CPU specific registers, - * r2 containing physical address of current. - * r11 points to the exception frame (physical address). + * r2 containing address of current. + * r11 points to the exception frame. * We have to preserve r10. */ _GLOBAL(power_save_ppc32_restore) lwz r9,_LINK(r11) /* interrupted in e500_idle */ stw r9,_NIP(r11) /* make it do a blr */ - -#ifdef CONFIG_SMP - lwz r11,TASK_CPU(r2) /* get cpu number * 4 */ - slwi r11,r11,2 -#else - li r11,0 -#endif - - b transfer_to_handler_cont + blr _ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index c475a229a42a..e4559f8914eb 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -20,6 +20,10 @@ #include #include +#if defined(CONFIG_PPC_ADV_DEBUG_REGS) && defined(CONFIG_PPC32) +unsigned long global_dbcr0[NR_CPUS]; +#endif + typedef long (*syscall_fn)(long, long, long, long, long, long); /* Has to run notrace because it is entered not completely "reconciled" */ @@ -29,20 +33,24 @@ notrace long system_call_exception(long r3, long r4, long r5, { syscall_fn f; + kuep_lock(); +#ifdef CONFIG_PPC32 + kuap_save_and_lock(regs); +#endif + regs->orig_gpr3 = r3; if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); + trace_hardirqs_off(); /* finish reconciling */ + CT_WARN_ON(ct_state() == CONTEXT_KERNEL); user_exit_irqoff(); - trace_hardirqs_off(); /* finish reconciling */ - if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) BUG_ON(!(regs->msr & MSR_RI)); BUG_ON(!(regs->msr & MSR_PR)); - BUG_ON(!FULL_REGS(regs)); BUG_ON(arch_irq_disabled_regs(regs)); #ifdef CONFIG_PPC_PKEY @@ -69,9 +77,7 @@ notrace long system_call_exception(long r3, long r4, long r5, isync(); } else #endif -#ifdef CONFIG_PPC64 - kuap_check_amr(); -#endif + kuap_assert_locked(); booke_restore_dbcr0(); @@ -247,9 +253,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, CT_WARN_ON(ct_state() == CONTEXT_USER); -#ifdef CONFIG_PPC64 - kuap_check_amr(); -#endif + kuap_assert_locked(); regs->result = r3; @@ -344,16 +348,13 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, account_cpu_user_exit(); -#ifdef CONFIG_PPC_BOOK3S_64 /* BOOK3E and ppc32 not using this */ - /* - * We do this at the end so that we do context switch with KERNEL AMR - */ + /* Restore user access locks last */ kuap_user_restore(regs); -#endif + kuep_unlock(); + return ret; } -#ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not yet using this */ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr) { unsigned long ti_flags; @@ -363,7 +364,6 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) BUG_ON(!(regs->msr & MSR_RI)); BUG_ON(!(regs->msr & MSR_PR)); - BUG_ON(!FULL_REGS(regs)); BUG_ON(arch_irq_disabled_regs(regs)); CT_WARN_ON(ct_state() == CONTEXT_USER); @@ -371,9 +371,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned * We don't need to restore AMR on the way back to userspace for KUAP. * AMR can only have been unlocked if we interrupted the kernel. */ -#ifdef CONFIG_PPC64 - kuap_check_amr(); -#endif + kuap_assert_locked(); local_irq_save(flags); @@ -392,7 +390,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned ti_flags = READ_ONCE(current_thread_info()->flags); } - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && unlikely((ti_flags & _TIF_RESTORE_TM))) { restore_tm_state(regs); @@ -427,12 +425,9 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned account_cpu_user_exit(); - /* - * We do this at the end so that we do context switch with KERNEL AMR - */ -#ifdef CONFIG_PPC64 + /* Restore user access locks last */ kuap_user_restore(regs); -#endif + return ret; } @@ -442,25 +437,20 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign { unsigned long flags; unsigned long ret = 0; -#ifdef CONFIG_PPC64 - unsigned long amr; -#endif + unsigned long kuap; if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && unlikely(!(regs->msr & MSR_RI))) unrecoverable_exception(regs); BUG_ON(regs->msr & MSR_PR); - BUG_ON(!FULL_REGS(regs)); /* * CT_WARN_ON comes here via program_check_exception, * so avoid recursion. */ - if (TRAP(regs) != 0x700) + if (TRAP(regs) != INTERRUPT_PROGRAM) CT_WARN_ON(ct_state() == CONTEXT_USER); -#ifdef CONFIG_PPC64 - amr = kuap_get_and_check_amr(); -#endif + kuap = kuap_get_and_assert_locked(); if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) { clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); @@ -498,14 +488,11 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign #endif /* - * Don't want to mfspr(SPRN_AMR) here, because this comes after mtmsr, - * which would cause Read-After-Write stalls. Hence, we take the AMR - * value from the check above. + * 64s does not want to mfspr(SPRN_AMR) here, because this comes after + * mtmsr, which would cause Read-After-Write stalls. Hence, take the + * AMR value from the check above. */ -#ifdef CONFIG_PPC64 - kuap_kernel_restore(regs, amr); -#endif + kuap_kernel_restore(regs, kuap); return ret; } -#endif diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index c00214a4355c..57d6b85e9b96 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -72,8 +72,7 @@ static void iommu_debugfs_del(struct iommu_table *tbl) sprintf(name, "%08lx", tbl->it_index); liobn_entry = debugfs_lookup(name, iommu_debugfs_dir); - if (liobn_entry) - debugfs_remove(liobn_entry); + debugfs_remove(liobn_entry); } #else static void iommu_debugfs_add(struct iommu_table *tbl){} @@ -297,6 +296,15 @@ static unsigned long iommu_range_alloc(struct device *dev, pass++; goto again; + } else if (pass == tbl->nr_pools + 1) { + /* Last resort: try largepool */ + spin_unlock(&pool->lock); + pool = &tbl->large_pool; + spin_lock(&pool->lock); + pool->hint = pool->start; + pass++; + goto again; + } else { /* Give up */ spin_unlock_irqrestore(&(pool->lock), flags); @@ -719,7 +727,6 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, { unsigned long sz; static int welcomed = 0; - struct page *page; unsigned int i; struct iommu_pool *p; @@ -728,11 +735,11 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, /* number of bytes needed for the bitmap */ sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); - page = alloc_pages_node(nid, GFP_KERNEL, get_order(sz)); - if (!page) - panic("iommu_init_table: Can't allocate %ld bytes\n", sz); - tbl->it_map = page_address(page); - memset(tbl->it_map, 0, sz); + tbl->it_map = vzalloc_node(sz, nid); + if (!tbl->it_map) { + pr_err("%s: Can't allocate %ld bytes\n", __func__, sz); + return NULL; + } iommu_table_reserve_pages(tbl, res_start, res_end); @@ -774,8 +781,6 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, static void iommu_table_free(struct kref *kref) { - unsigned long bitmap_sz; - unsigned int order; struct iommu_table *tbl; tbl = container_of(kref, struct iommu_table, it_kref); @@ -796,12 +801,8 @@ static void iommu_table_free(struct kref *kref) if (!bitmap_empty(tbl->it_map, tbl->it_size)) pr_warn("%s: Unexpected TCEs\n", __func__); - /* calculate bitmap size in bytes */ - bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); - /* free bitmap */ - order = get_order(bitmap_sz); - free_pages((unsigned long) tbl->it_map, order); + vfree(tbl->it_map); /* free table */ kfree(tbl); @@ -897,6 +898,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, unsigned int order; unsigned int nio_pages, io_order; struct page *page; + size_t size_io = size; size = PAGE_ALIGN(size); order = get_order(size); @@ -923,8 +925,9 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, memset(ret, 0, size); /* Set up tces to cover the allocated range */ - nio_pages = size >> tbl->it_page_shift; - io_order = get_iommu_order(size, tbl); + size_io = IOMMU_PAGE_ALIGN(size_io, tbl); + nio_pages = size_io >> tbl->it_page_shift; + io_order = get_iommu_order(size_io, tbl); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, mask >> tbl->it_page_shift, io_order, 0); if (mapping == DMA_MAPPING_ERROR) { @@ -939,10 +942,9 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, void *vaddr, dma_addr_t dma_handle) { if (tbl) { - unsigned int nio_pages; + size_t size_io = IOMMU_PAGE_ALIGN(size, tbl); + unsigned int nio_pages = size_io >> tbl->it_page_shift; - size = PAGE_ALIGN(size); - nio_pages = size >> tbl->it_page_shift; iommu_free(tbl, dma_handle, nio_pages); size = PAGE_ALIGN(size); free_pages((unsigned long)vaddr, get_order(size)); @@ -1096,7 +1098,7 @@ int iommu_take_ownership(struct iommu_table *tbl) spin_lock_irqsave(&tbl->large_pool.lock, flags); for (i = 0; i < tbl->nr_pools; i++) - spin_lock(&tbl->pools[i].lock); + spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); iommu_table_release_pages(tbl); @@ -1124,7 +1126,7 @@ void iommu_release_ownership(struct iommu_table *tbl) spin_lock_irqsave(&tbl->large_pool.lock, flags); for (i = 0; i < tbl->nr_pools; i++) - spin_lock(&tbl->pools[i].lock); + spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); memset(tbl->it_map, 0, sz); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index d71fd10a1dd4..72cb45393ef2 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -104,82 +104,6 @@ static inline notrace unsigned long get_irq_happened(void) return happened; } -#ifdef CONFIG_PPC_BOOK3E - -/* This is called whenever we are re-enabling interrupts - * and returns either 0 (nothing to do) or 500/900/280 if - * there's an EE, DEC or DBELL to generate. - * - * This is called in two contexts: From arch_local_irq_restore() - * before soft-enabling interrupts, and from the exception exit - * path when returning from an interrupt from a soft-disabled to - * a soft enabled context. In both case we have interrupts hard - * disabled. - * - * We take care of only clearing the bits we handled in the - * PACA irq_happened field since we can only re-emit one at a - * time and we don't want to "lose" one. - */ -notrace unsigned int __check_irq_replay(void) -{ - /* - * We use local_paca rather than get_paca() to avoid all - * the debug_smp_processor_id() business in this low level - * function - */ - unsigned char happened = local_paca->irq_happened; - - /* - * We are responding to the next interrupt, so interrupt-off - * latencies should be reset here. - */ - trace_hardirqs_on(); - trace_hardirqs_off(); - - if (happened & PACA_IRQ_DEC) { - local_paca->irq_happened &= ~PACA_IRQ_DEC; - return 0x900; - } - - if (happened & PACA_IRQ_EE) { - local_paca->irq_happened &= ~PACA_IRQ_EE; - return 0x500; - } - - if (happened & PACA_IRQ_DBELL) { - local_paca->irq_happened &= ~PACA_IRQ_DBELL; - return 0x280; - } - - if (happened & PACA_IRQ_HARD_DIS) - local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; - - /* There should be nothing left ! */ - BUG_ON(local_paca->irq_happened != 0); - - return 0; -} - -/* - * This is specifically called by assembly code to re-enable interrupts - * if they are currently disabled. This is typically called before - * schedule() or do_signal() when returning to userspace. We do it - * in C to avoid the burden of dealing with lockdep etc... - * - * NOTE: This is called with interrupts hard disabled but not marked - * as such in paca->irq_happened, so we need to resync this. - */ -void notrace restore_interrupts(void) -{ - if (irqs_disabled()) { - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - local_irq_enable(); - } else - __hard_irq_enable(); -} - -#endif /* CONFIG_PPC_BOOK3E */ - void replay_soft_interrupts(void) { struct pt_regs regs; @@ -218,7 +142,7 @@ void replay_soft_interrupts(void) */ if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_HMI)) { local_paca->irq_happened &= ~PACA_IRQ_HMI; - regs.trap = 0xe60; + regs.trap = INTERRUPT_HMI; handle_hmi_exception(®s); if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) hard_irq_disable(); @@ -226,7 +150,7 @@ void replay_soft_interrupts(void) if (local_paca->irq_happened & PACA_IRQ_DEC) { local_paca->irq_happened &= ~PACA_IRQ_DEC; - regs.trap = 0x900; + regs.trap = INTERRUPT_DECREMENTER; timer_interrupt(®s); if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) hard_irq_disable(); @@ -234,7 +158,7 @@ void replay_soft_interrupts(void) if (local_paca->irq_happened & PACA_IRQ_EE) { local_paca->irq_happened &= ~PACA_IRQ_EE; - regs.trap = 0x500; + regs.trap = INTERRUPT_EXTERNAL; do_IRQ(®s); if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) hard_irq_disable(); @@ -242,10 +166,7 @@ void replay_soft_interrupts(void) if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (local_paca->irq_happened & PACA_IRQ_DBELL)) { local_paca->irq_happened &= ~PACA_IRQ_DBELL; - if (IS_ENABLED(CONFIG_PPC_BOOK3E)) - regs.trap = 0x280; - else - regs.trap = 0xa00; + regs.trap = INTERRUPT_DOORBELL; doorbell_exception(®s); if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) hard_irq_disable(); @@ -254,7 +175,7 @@ void replay_soft_interrupts(void) /* Book3E does not support soft-masking PMI interrupts */ if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_PMI)) { local_paca->irq_happened &= ~PACA_IRQ_PMI; - regs.trap = 0xf00; + regs.trap = INTERRUPT_PERFMON; performance_monitor_exception(®s); if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) hard_irq_disable(); @@ -282,7 +203,7 @@ static inline void replay_soft_interrupts_irqrestore(void) * and re-locking AMR but we shouldn't get here in the first place, * hence the warning. */ - kuap_check_amr(); + kuap_assert_locked(); if (kuap_state != AMR_KUAP_BLOCKED) set_kuap(AMR_KUAP_BLOCKED); @@ -667,6 +588,47 @@ static inline void check_stack_overflow(void) } } +static __always_inline void call_do_softirq(const void *sp) +{ + /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */ + asm volatile ( + PPC_STLU " %%r1, %[offset](%[sp]) ;" + "mr %%r1, %[sp] ;" + "bl %[callee] ;" + PPC_LL " %%r1, 0(%%r1) ;" + : // Outputs + : // Inputs + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), + [callee] "i" (__do_softirq) + : // Clobbers + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", + "cr7", "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" + ); +} + +static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) +{ + register unsigned long r3 asm("r3") = (unsigned long)regs; + + /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */ + asm volatile ( + PPC_STLU " %%r1, %[offset](%[sp]) ;" + "mr %%r1, %[sp] ;" + "bl %[callee] ;" + PPC_LL " %%r1, 0(%%r1) ;" + : // Outputs + "+r" (r3) + : // Inputs + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), + [callee] "i" (__do_irq) + : // Clobbers + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", + "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" + ); +} + void __do_irq(struct pt_regs *regs) { unsigned int irq; diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index 144858027fa3..ce87dc5ea23c 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -11,10 +11,10 @@ void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - struct ppc_inst *addr = (struct ppc_inst *)(unsigned long)entry->code; + struct ppc_inst *addr = (struct ppc_inst *)jump_entry_code(entry); if (type == JUMP_LABEL_JMP) - patch_branch(addr, entry->target, 0); + patch_branch(addr, jump_entry_target(entry), 0); else patch_instruction(addr, ppc_inst(PPC_INST_NOP)); } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 409080208a6c..7dd2ad3603ad 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -376,7 +376,7 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) } /* - * This function does PowerPC specific procesing for interfacing to gdb. + * This function does PowerPC specific processing for interfacing to gdb. */ int kgdb_arch_handle_exception(int vector, int signo, int err_code, char *remcom_in_buffer, char *remcom_out_buffer, diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index f061e06e9f51..8b2c1a8553a0 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -15,6 +15,7 @@ #include #include #include +#include #undef DEBUG @@ -34,6 +35,7 @@ static struct legacy_serial_info { unsigned int clock; int irq_check_parent; phys_addr_t taddr; + void __iomem *early_addr; } legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS]; static const struct of_device_id legacy_serial_parents[] __initconst = { @@ -325,17 +327,16 @@ static void __init setup_legacy_serial_console(int console) { struct legacy_serial_info *info = &legacy_serial_infos[console]; struct plat_serial8250_port *port = &legacy_serial_ports[console]; - void __iomem *addr; unsigned int stride; stride = 1 << port->regshift; /* Check if a translated MMIO address has been found */ if (info->taddr) { - addr = ioremap(info->taddr, 0x1000); - if (addr == NULL) + info->early_addr = early_ioremap(info->taddr, 0x1000); + if (info->early_addr == NULL) return; - udbg_uart_init_mmio(addr, stride); + udbg_uart_init_mmio(info->early_addr, stride); } else { /* Check if it's PIO and we support untranslated PIO */ if (port->iotype == UPIO_PORT && isa_io_special) @@ -353,6 +354,30 @@ static void __init setup_legacy_serial_console(int console) udbg_uart_setup(info->speed, info->clock); } +static int __init ioremap_legacy_serial_console(void) +{ + struct legacy_serial_info *info = &legacy_serial_infos[legacy_serial_console]; + struct plat_serial8250_port *port = &legacy_serial_ports[legacy_serial_console]; + void __iomem *vaddr; + + if (legacy_serial_console < 0) + return 0; + + if (!info->early_addr) + return 0; + + vaddr = ioremap(info->taddr, 0x1000); + if (WARN_ON(!vaddr)) + return -ENOMEM; + + udbg_uart_init_mmio(vaddr, 1 << port->regshift); + early_iounmap(info->early_addr, 0x1000); + info->early_addr = NULL; + + return 0; +} +early_initcall(ioremap_legacy_serial_console); + /* * This is called very early, as part of setup_system() or eventually * setup_arch(), basically before anything else in this file. This function diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 11f0cae086ed..9a3c2a84a2ac 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -40,7 +40,7 @@ static struct irq_work mce_ue_event_irq_work = { .func = machine_check_ue_irq_work, }; -DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); +static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); static BLOCKING_NOTIFIER_HEAD(mce_notifier_list); @@ -131,6 +131,8 @@ void save_mce_event(struct pt_regs *regs, long handled, * Populate the mce error_type and type-specific error_type. */ mce_set_error_info(mce, mce_err); + if (mce->error_type == MCE_ERROR_TYPE_UE) + mce->u.ue_error.ignore_event = mce_err->ignore_event; if (!addr) return; @@ -159,7 +161,6 @@ void save_mce_event(struct pt_regs *regs, long handled, if (phys_addr != ULONG_MAX) { mce->u.ue_error.physical_address_provided = true; mce->u.ue_error.physical_address = phys_addr; - mce->u.ue_error.ignore_event = mce_err->ignore_event; machine_check_ue_event(mce); } } diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 717e658b90fd..6a076bef2932 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -27,45 +27,6 @@ .text -/* - * We store the saved ksp_limit in the unused part - * of the STACK_FRAME_OVERHEAD - */ -_GLOBAL(call_do_softirq) - mflr r0 - stw r0,4(r1) - lwz r10,THREAD+KSP_LIMIT(r2) - stw r3, THREAD+KSP_LIMIT(r2) - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) - mr r1,r3 - stw r10,8(r1) - bl __do_softirq - lwz r10,8(r1) - lwz r1,0(r1) - lwz r0,4(r1) - stw r10,THREAD+KSP_LIMIT(r2) - mtlr r0 - blr - -/* - * void call_do_irq(struct pt_regs *regs, void *sp); - */ -_GLOBAL(call_do_irq) - mflr r0 - stw r0,4(r1) - lwz r10,THREAD+KSP_LIMIT(r2) - stw r4, THREAD+KSP_LIMIT(r2) - stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) - mr r1,r4 - stw r10,8(r1) - bl __do_irq - lwz r10,8(r1) - lwz r1,0(r1) - lwz r0,4(r1) - stw r10,THREAD+KSP_LIMIT(r2) - mtlr r0 - blr - /* * This returns the high 64 bits of the product of two 64-bit numbers. */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 070465825c21..4b761a18a74d 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -27,28 +27,6 @@ .text -_GLOBAL(call_do_softirq) - mflr r0 - std r0,16(r1) - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) - mr r1,r3 - bl __do_softirq - ld r1,0(r1) - ld r0,16(r1) - mtlr r0 - blr - -_GLOBAL(call_do_irq) - mflr r0 - std r0,16(r1) - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) - mr r1,r4 - bl __do_irq - ld r1,0(r1) - ld r0,16(r1) - mtlr r0 - blr - _GLOBAL(__bswapdi2) EXPORT_SYMBOL(__bswapdi2) srdi r8,r3,32 diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index a211b0253cdb..fab84024650c 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -14,6 +14,7 @@ #include #include #include +#include static LIST_HEAD(module_bug_list); @@ -88,12 +89,28 @@ int module_finalize(const Elf_Ehdr *hdr, } #ifdef MODULES_VADDR -void *module_alloc(unsigned long size) +static __always_inline void * +__module_alloc(unsigned long size, unsigned long start, unsigned long end) { - BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); - - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, + return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, __builtin_return_address(0)); } + +void *module_alloc(unsigned long size) +{ + unsigned long limit = (unsigned long)_etext - SZ_32M; + void *ptr = NULL; + + BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); + + /* First try within 32M limit from _etext to avoid branch trampolines */ + if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) + ptr = __module_alloc(size, limit, MODULES_END); + + if (!ptr) + ptr = __module_alloc(size, MODULES_VADDR, MODULES_END); + + return ptr; +} #endif diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 7f7cdbeacd1a..cdf87086fa33 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -141,11 +141,21 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op) } } +static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) +{ + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_RAW_LIS(reg, IMM_H(val)))); + addr++; + + patch_instruction((struct ppc_inst *)addr, + ppc_inst(PPC_RAW_ORI(reg, reg, IMM_L(val)))); +} + /* * Generate instructions to load provided immediate 64-bit value * to register 'reg' and patch these instructions at 'addr'. */ -static void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) +static void patch_imm64_load_insns(unsigned long long val, int reg, kprobe_opcode_t *addr) { /* lis reg,(op)@highest */ patch_instruction((struct ppc_inst *)addr, @@ -177,6 +187,14 @@ static void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t * ___PPC_RS(reg) | (val & 0xffff))); } +static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) +{ + if (IS_ENABLED(CONFIG_PPC64)) + patch_imm64_load_insns(val, reg, addr); + else + patch_imm32_load_insns(val, reg, addr); +} + int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) { struct ppc_inst branch_op_callback, branch_emulate_step, temp; @@ -230,7 +248,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) * Fixup the template with instructions to: * 1. load the address of the actual probepoint */ - patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX); + patch_imm_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX); /* * 2. branch to optimized_callback() and emulate_step() @@ -264,7 +282,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) * 3. load instruction to be emulated into relevant register, and */ temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); - patch_imm64_load_insns(ppc_inst_as_u64(temp), 4, buff + TMPL_INSN_IDX); + patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); /* * 4. branch back from trampoline diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index ff8ba4d3824d..19ea3312403c 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -9,6 +9,16 @@ #include #include +#ifdef CONFIG_PPC64 +#define SAVE_30GPRS(base) SAVE_10GPRS(2,base); SAVE_10GPRS(12,base); SAVE_10GPRS(22,base) +#define REST_30GPRS(base) REST_10GPRS(2,base); REST_10GPRS(12,base); REST_10GPRS(22,base) +#define TEMPLATE_FOR_IMM_LOAD_INSNS nop; nop; nop; nop; nop +#else +#define SAVE_30GPRS(base) stmw r2, GPR2(base) +#define REST_30GPRS(base) lmw r2, GPR2(base) +#define TEMPLATE_FOR_IMM_LOAD_INSNS nop; nop; nop +#endif + #define OPT_SLOT_SIZE 65536 .balign 4 @@ -30,39 +40,41 @@ optinsn_slot: .global optprobe_template_entry optprobe_template_entry: /* Create an in-memory pt_regs */ - stdu r1,-INT_FRAME_SIZE(r1) + PPC_STLU r1,-INT_FRAME_SIZE(r1) SAVE_GPR(0,r1) /* Save the previous SP into stack */ addi r0,r1,INT_FRAME_SIZE - std r0,GPR1(r1) - SAVE_10GPRS(2,r1) - SAVE_10GPRS(12,r1) - SAVE_10GPRS(22,r1) + PPC_STL r0,GPR1(r1) + SAVE_30GPRS(r1) /* Save SPRS */ mfmsr r5 - std r5,_MSR(r1) + PPC_STL r5,_MSR(r1) li r5,0x700 - std r5,_TRAP(r1) + PPC_STL r5,_TRAP(r1) li r5,0 - std r5,ORIG_GPR3(r1) - std r5,RESULT(r1) + PPC_STL r5,ORIG_GPR3(r1) + PPC_STL r5,RESULT(r1) mfctr r5 - std r5,_CTR(r1) + PPC_STL r5,_CTR(r1) mflr r5 - std r5,_LINK(r1) + PPC_STL r5,_LINK(r1) mfspr r5,SPRN_XER - std r5,_XER(r1) + PPC_STL r5,_XER(r1) mfcr r5 - std r5,_CCR(r1) + PPC_STL r5,_CCR(r1) +#ifdef CONFIG_PPC64 lbz r5,PACAIRQSOFTMASK(r13) std r5,SOFTE(r1) +#endif /* * We may get here from a module, so load the kernel TOC in r2. * The original TOC gets restored when pt_regs is restored * further below. */ +#ifdef CONFIG_PPC64 ld r2,PACATOC(r13) +#endif .global optprobe_template_op_address optprobe_template_op_address: @@ -70,11 +82,8 @@ optprobe_template_op_address: * Parameters to optimized_callback(): * 1. optimized_kprobe structure in r3 */ - nop - nop - nop - nop - nop + TEMPLATE_FOR_IMM_LOAD_INSNS + /* 2. pt_regs pointer in r4 */ addi r4,r1,STACK_FRAME_OVERHEAD @@ -92,11 +101,7 @@ optprobe_template_call_handler: .global optprobe_template_insn optprobe_template_insn: /* 2, Pass instruction to be emulated in r4 */ - nop - nop - nop - nop - nop + TEMPLATE_FOR_IMM_LOAD_INSNS .global optprobe_template_call_emulate optprobe_template_call_emulate: @@ -107,20 +112,18 @@ optprobe_template_call_emulate: * All done. * Now, restore the registers... */ - ld r5,_MSR(r1) + PPC_LL r5,_MSR(r1) mtmsr r5 - ld r5,_CTR(r1) + PPC_LL r5,_CTR(r1) mtctr r5 - ld r5,_LINK(r1) + PPC_LL r5,_LINK(r1) mtlr r5 - ld r5,_XER(r1) + PPC_LL r5,_XER(r1) mtxer r5 - ld r5,_CCR(r1) + PPC_LL r5,_CCR(r1) mtcr r5 REST_GPR(0,r1) - REST_10GPRS(2,r1) - REST_10GPRS(12,r1) - REST_10GPRS(22,r1) + REST_30GPRS(r1) /* Restore the previous SP */ addi r1,r1,INT_FRAME_SIZE diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3231c2df9e26..89e34aa273e2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1117,9 +1117,10 @@ void restore_tm_state(struct pt_regs *regs) regs->msr |= msr_diff; } -#else +#else /* !CONFIG_PPC_TRANSACTIONAL_MEM */ #define tm_recheckpoint_new_task(new) #define __switch_to_tm(prev, new) +void tm_reclaim_current(uint8_t cause) {} #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ static inline void save_sprs(struct thread_struct *t) @@ -1255,6 +1256,9 @@ struct task_struct *__switch_to(struct task_struct *prev, */ restore_sprs(old_thread, new_thread); +#ifdef CONFIG_PPC32 + kuap_assert_locked(); +#endif last = _switch(old_thread, new_thread); #ifdef CONFIG_PPC_BOOK3S_64 @@ -1444,11 +1448,9 @@ static void print_msr_bits(unsigned long val) #ifdef CONFIG_PPC64 #define REG "%016lx" #define REGS_PER_LINE 4 -#define LAST_VOLATILE 13 #else #define REG "%08lx" #define REGS_PER_LINE 8 -#define LAST_VOLATILE 12 #endif static void __show_regs(struct pt_regs *regs) @@ -1465,7 +1467,9 @@ static void __show_regs(struct pt_regs *regs) trap = TRAP(regs); if (!trap_is_syscall(regs) && cpu_has_feature(CPU_FTR_CFAR)) pr_cont("CFAR: "REG" ", regs->orig_gpr3); - if (trap == 0x200 || trap == 0x300 || trap == 0x600) { + if (trap == INTERRUPT_MACHINE_CHECK || + trap == INTERRUPT_DATA_STORAGE || + trap == INTERRUPT_ALIGNMENT) { if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE)) pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); else @@ -1484,8 +1488,6 @@ static void __show_regs(struct pt_regs *regs) if ((i % REGS_PER_LINE) == 0) pr_cont("\nGPR%02d: ", i); pr_cont(REG " ", regs->gpr[i]); - if (i == LAST_VOLATILE && !FULL_REGS(regs)) - break; } pr_cont("\n"); /* @@ -1688,7 +1690,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, } else { /* user thread */ struct pt_regs *regs = current_pt_regs(); - CHECK_FULL_REGS(regs); *childregs = *regs; if (usp) childregs->gpr[1] = usp; @@ -1724,9 +1725,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, kregs = (struct pt_regs *) sp; sp -= STACK_FRAME_OVERHEAD; p->thread.ksp = sp; -#ifdef CONFIG_PPC32 - p->thread.ksp_limit = (unsigned long)end_of_stack(p); -#endif #ifdef CONFIG_HAVE_HW_BREAKPOINT for (i = 0; i < nr_wp_slots(); i++) p->thread.ptrace_bps[i] = NULL; @@ -1796,13 +1794,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) regs->ccr = 0; regs->gpr[1] = sp; - /* - * We have just cleared all the nonvolatile GPRs, so make - * FULL_REGS(regs) return true. This is necessary to allow - * ptrace to examine the thread immediately after exec. - */ - SET_FULL_REGS(regs); - #ifdef CONFIG_PPC32 regs->mq = 0; regs->nip = start; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 9a4797d1d40d..fbe9deebc8e1 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -65,6 +65,8 @@ #define DBG(fmt...) #endif +int *chip_id_lookup_table; + #ifdef CONFIG_PPC64 int __initdata iommu_is_off; int __initdata iommu_force_on; @@ -267,7 +269,7 @@ static struct feature_property { }; #if defined(CONFIG_44x) && defined(CONFIG_PPC_FPU) -static inline void identical_pvr_fixup(unsigned long node) +static __init void identical_pvr_fixup(unsigned long node) { unsigned int pvr; const char *model = of_get_flat_dt_prop(node, "model", NULL); @@ -914,13 +916,22 @@ EXPORT_SYMBOL(of_get_ibm_chip_id); int cpu_to_chip_id(int cpu) { struct device_node *np; + int ret = -1, idx; + + idx = cpu / threads_per_core; + if (chip_id_lookup_table && chip_id_lookup_table[idx] != -1) + return chip_id_lookup_table[idx]; np = of_get_cpu_node(cpu, NULL); - if (!np) - return -1; + if (np) { + ret = of_get_ibm_chip_id(np); + of_node_put(np); - of_node_put(np); - return of_get_ibm_chip_id(np); + if (chip_id_lookup_table) + chip_id_lookup_table[idx] = ret; + } + + return ret; } EXPORT_SYMBOL(cpu_to_chip_id); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index ccf77b985c8f..41ed7e33d897 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2983,7 +2983,7 @@ static void __init fixup_device_tree_efika_add_phy(void) " 0x3 encode-int encode+" " s\" interrupts\" property" " finish-device"); - }; + } /* Check for a PHY device node - if missing then create one and * give it's phandle to the ethernet node */ diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 6ccffc65ac97..773bcc4ca843 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -111,7 +111,7 @@ static unsigned long get_user_msr(struct task_struct *task) return task->thread.regs->msr | task->thread.fpexc_mode; } -static int set_user_msr(struct task_struct *task, unsigned long msr) +static __always_inline int set_user_msr(struct task_struct *task, unsigned long msr) { task->thread.regs->msr &= ~MSR_DEBUGCHANGE; task->thread.regs->msr |= msr & MSR_DEBUGCHANGE; @@ -147,7 +147,7 @@ static int set_user_dscr(struct task_struct *task, unsigned long dscr) * We prevent mucking around with the reserved area of trap * which are used internally by the kernel. */ -static int set_user_trap(struct task_struct *task, unsigned long trap) +static __always_inline int set_user_trap(struct task_struct *task, unsigned long trap) { set_trap(task->thread.regs, trap); return 0; @@ -221,17 +221,9 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset, #ifdef CONFIG_PPC64 struct membuf to_softe = membuf_at(&to, offsetof(struct pt_regs, softe)); #endif - int i; - if (target->thread.regs == NULL) return -EIO; - if (!FULL_REGS(target->thread.regs)) { - /* We have a partial register set. Fill 14-31 with bogus values */ - for (i = 14; i < 32; i++) - target->thread.regs->gpr[i] = NV_REG_POISON; - } - membuf_write(&to, target->thread.regs, sizeof(struct user_pt_regs)); membuf_store(&to_msr, get_user_msr(target)); @@ -252,8 +244,6 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, if (target->thread.regs == NULL) return -EIO; - CHECK_FULL_REGS(target->thread.regs); - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, target->thread.regs, 0, PT_MSR * sizeof(reg)); @@ -659,6 +649,9 @@ int gpr32_set_common(struct task_struct *target, const compat_ulong_t __user *u = ubuf; compat_ulong_t reg; + if (!kbuf && !user_read_access_begin(u, count)) + return -EFAULT; + pos /= sizeof(reg); count /= sizeof(reg); @@ -667,8 +660,7 @@ int gpr32_set_common(struct task_struct *target, regs[pos++] = *k++; else for (; count > 0 && pos < PT_MSR; --count) { - if (__get_user(reg, u++)) - return -EFAULT; + unsafe_get_user(reg, u++, Efault); regs[pos++] = reg; } @@ -676,8 +668,8 @@ int gpr32_set_common(struct task_struct *target, if (count > 0 && pos == PT_MSR) { if (kbuf) reg = *k++; - else if (__get_user(reg, u++)) - return -EFAULT; + else + unsafe_get_user(reg, u++, Efault); set_user_msr(target, reg); ++pos; --count; @@ -690,24 +682,24 @@ int gpr32_set_common(struct task_struct *target, ++k; } else { for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) { - if (__get_user(reg, u++)) - return -EFAULT; + unsafe_get_user(reg, u++, Efault); regs[pos++] = reg; } for (; count > 0 && pos < PT_TRAP; --count, ++pos) - if (__get_user(reg, u++)) - return -EFAULT; + unsafe_get_user(reg, u++, Efault); } if (count > 0 && pos == PT_TRAP) { if (kbuf) reg = *k++; - else if (__get_user(reg, u++)) - return -EFAULT; + else + unsafe_get_user(reg, u++, Efault); set_user_trap(target, reg); ++pos; --count; } + if (!kbuf) + user_read_access_end(); kbuf = k; ubuf = u; @@ -715,25 +707,19 @@ int gpr32_set_common(struct task_struct *target, count *= sizeof(reg); return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, (PT_TRAP + 1) * sizeof(reg), -1); + +Efault: + user_read_access_end(); + return -EFAULT; } static int gpr32_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { - int i; - if (target->thread.regs == NULL) return -EIO; - if (!FULL_REGS(target->thread.regs)) { - /* - * We have a partial register set. - * Fill 14-31 with bogus values. - */ - for (i = 14; i < 32; i++) - target->thread.regs->gpr[i] = NV_REG_POISON; - } return gpr32_get_common(target, regset, to, &target->thread.regs->gpr[0]); } @@ -746,7 +732,6 @@ static int gpr32_set(struct task_struct *target, if (target->thread.regs == NULL) return -EIO; - CHECK_FULL_REGS(target->thread.regs); return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, &target->thread.regs->gpr[0]); } diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 4f3d4ff3728c..0a0a33eb0d28 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -59,7 +59,6 @@ long arch_ptrace(struct task_struct *child, long request, if ((addr & (sizeof(long) - 1)) || !child->thread.regs) break; - CHECK_FULL_REGS(child->thread.regs); if (index < PT_FPR0) ret = ptrace_get_reg(child, (int) index, &tmp); else @@ -81,7 +80,6 @@ long arch_ptrace(struct task_struct *child, long request, if ((addr & (sizeof(long) - 1)) || !child->thread.regs) break; - CHECK_FULL_REGS(child->thread.regs); if (index < PT_FPR0) ret = ptrace_put_reg(child, index, data); else @@ -354,8 +352,6 @@ void __init pt_regs_check(void) offsetof(struct user_pt_regs, nip)); BUILD_BUG_ON(offsetof(struct pt_regs, msr) != offsetof(struct user_pt_regs, msr)); - BUILD_BUG_ON(offsetof(struct pt_regs, msr) != - offsetof(struct user_pt_regs, msr)); BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != offsetof(struct user_pt_regs, orig_gpr3)); BUILD_BUG_ON(offsetof(struct pt_regs, ctr) != diff --git a/arch/powerpc/kernel/ptrace/ptrace32.c b/arch/powerpc/kernel/ptrace/ptrace32.c index d30b9ad70edc..19c224808982 100644 --- a/arch/powerpc/kernel/ptrace/ptrace32.c +++ b/arch/powerpc/kernel/ptrace/ptrace32.c @@ -83,7 +83,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if ((addr & 3) || (index > PT_FPSCR32)) break; - CHECK_FULL_REGS(child->thread.regs); if (index < PT_FPR0) { ret = ptrace_get_reg(child, index, &tmp); if (ret) @@ -133,7 +132,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if ((addr & 3) || numReg > PT_FPSCR) break; - CHECK_FULL_REGS(child->thread.regs); if (numReg >= PT_FPR0) { flush_fp_to_thread(child); /* get 64 bit FPR */ @@ -187,7 +185,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if ((addr & 3) || (index > PT_FPSCR32)) break; - CHECK_FULL_REGS(child->thread.regs); if (index < PT_FPR0) { ret = ptrace_put_reg(child, index, data); } else { @@ -226,7 +223,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, */ if ((addr & 3) || (numReg > PT_FPSCR)) break; - CHECK_FULL_REGS(child->thread.regs); if (numReg < PT_FPR0) { unsigned long freg; ret = ptrace_get_reg(child, numReg, &freg); diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 2d33f342a293..6857a5b0a1c3 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -755,11 +755,18 @@ static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v) return 0; } -#define RMO_READ_BUF_MAX 30 - -/* RTAS Userspace access */ +/** + * ppc_rtas_rmo_buf_show() - Describe RTAS-addressable region for user space. + * + * Base + size description of a range of RTAS-addressable memory set + * aside for user space to use as work area(s) for certain RTAS + * functions. User space accesses this region via /dev/mem. Apart from + * security policies, the kernel does not arbitrate or serialize + * access to this region, and user space must ensure that concurrent + * users do not interfere with each other. + */ static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v) { - seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_RMOBUF_MAX); + seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_USER_REGION_SIZE); return 0; } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index d126d71ea5bd..6bada744402b 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -828,7 +828,6 @@ void rtas_activate_firmware(void) pr_err("ibm,activate-firmware failed (%i)\n", fwrc); } -static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE; #ifdef CONFIG_PPC_PSERIES /** * rtas_call_reentrant() - Used for reentrant rtas calls @@ -988,10 +987,10 @@ static struct rtas_filter rtas_filters[] __ro_after_init = { static bool in_rmo_buf(u32 base, u32 end) { return base >= rtas_rmo_buf && - base < (rtas_rmo_buf + RTAS_RMOBUF_MAX) && + base < (rtas_rmo_buf + RTAS_USER_REGION_SIZE) && base <= end && end >= rtas_rmo_buf && - end < (rtas_rmo_buf + RTAS_RMOBUF_MAX); + end < (rtas_rmo_buf + RTAS_USER_REGION_SIZE); } static bool block_rtas_call(int token, int nargs, @@ -1052,6 +1051,14 @@ static bool block_rtas_call(int token, int nargs, return true; } +static void __init rtas_syscall_filter_init(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) + rtas_filters[i].token = rtas_token(rtas_filters[i].name); +} + #else static bool block_rtas_call(int token, int nargs, @@ -1060,6 +1067,10 @@ static bool block_rtas_call(int token, int nargs, return false; } +static void __init rtas_syscall_filter_init(void) +{ +} + #endif /* CONFIG_PPC_RTAS_FILTER */ /* We assume to be passed big endian arguments */ @@ -1103,7 +1114,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) return -EINVAL; /* Need to handle ibm,suspend_me call specially */ - if (token == ibm_suspend_me_token) { + if (token == rtas_token("ibm,suspend-me")) { /* * rtas_ibm_suspend_me assumes the streamid handle is in cpu @@ -1163,9 +1174,6 @@ void __init rtas_initialize(void) unsigned long rtas_region = RTAS_INSTANTIATE_MAX; u32 base, size, entry; int no_base, no_size, no_entry; -#ifdef CONFIG_PPC_RTAS_FILTER - int i; -#endif /* Get RTAS dev node and fill up our "rtas" structure with infos * about it. @@ -1191,12 +1199,10 @@ void __init rtas_initialize(void) * the stop-self token if any */ #ifdef CONFIG_PPC64 - if (firmware_has_feature(FW_FEATURE_LPAR)) { + if (firmware_has_feature(FW_FEATURE_LPAR)) rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX); - ibm_suspend_me_token = rtas_token("ibm,suspend-me"); - } #endif - rtas_rmo_buf = memblock_phys_alloc_range(RTAS_RMOBUF_MAX, PAGE_SIZE, + rtas_rmo_buf = memblock_phys_alloc_range(RTAS_USER_REGION_SIZE, PAGE_SIZE, 0, rtas_region); if (!rtas_rmo_buf) panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n", @@ -1206,11 +1212,7 @@ void __init rtas_initialize(void) rtas_last_error_token = rtas_token("rtas-last-error"); #endif -#ifdef CONFIG_PPC_RTAS_FILTER - for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { - rtas_filters[i].token = rtas_token(rtas_filters[i].name); - } -#endif + rtas_syscall_filter_init(); } int __init early_init_dt_scan_rtas(unsigned long node, diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index e4e1a94ccf6a..0fdfcdd9d880 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -18,6 +19,7 @@ #include #include +#include "setup.h" u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; @@ -250,7 +252,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c static enum stf_barrier_type stf_enabled_flush_types; static bool no_stf_barrier; -bool stf_barrier; +static bool stf_barrier; static int __init handle_no_stf_barrier(char *p) { @@ -541,6 +543,178 @@ void setup_count_cache_flush(void) toggle_branch_cache_flush(enable); } +static enum l1d_flush_type enabled_flush_types; +static void *l1d_flush_fallback_area; +static bool no_rfi_flush; +static bool no_entry_flush; +static bool no_uaccess_flush; +bool rfi_flush; +static bool entry_flush; +static bool uaccess_flush; +DEFINE_STATIC_KEY_FALSE(uaccess_flush_key); +EXPORT_SYMBOL(uaccess_flush_key); + +static int __init handle_no_rfi_flush(char *p) +{ + pr_info("rfi-flush: disabled on command line."); + no_rfi_flush = true; + return 0; +} +early_param("no_rfi_flush", handle_no_rfi_flush); + +static int __init handle_no_entry_flush(char *p) +{ + pr_info("entry-flush: disabled on command line."); + no_entry_flush = true; + return 0; +} +early_param("no_entry_flush", handle_no_entry_flush); + +static int __init handle_no_uaccess_flush(char *p) +{ + pr_info("uaccess-flush: disabled on command line."); + no_uaccess_flush = true; + return 0; +} +early_param("no_uaccess_flush", handle_no_uaccess_flush); + +/* + * The RFI flush is not KPTI, but because users will see doco that says to use + * nopti we hijack that option here to also disable the RFI flush. + */ +static int __init handle_no_pti(char *p) +{ + pr_info("rfi-flush: disabling due to 'nopti' on command line.\n"); + handle_no_rfi_flush(NULL); + return 0; +} +early_param("nopti", handle_no_pti); + +static void do_nothing(void *unused) +{ + /* + * We don't need to do the flush explicitly, just enter+exit kernel is + * sufficient, the RFI exit handlers will do the right thing. + */ +} + +void rfi_flush_enable(bool enable) +{ + if (enable) { + do_rfi_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else + do_rfi_flush_fixups(L1D_FLUSH_NONE); + + rfi_flush = enable; +} + +static void entry_flush_enable(bool enable) +{ + if (enable) { + do_entry_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else { + do_entry_flush_fixups(L1D_FLUSH_NONE); + } + + entry_flush = enable; +} + +static void uaccess_flush_enable(bool enable) +{ + if (enable) { + do_uaccess_flush_fixups(enabled_flush_types); + static_branch_enable(&uaccess_flush_key); + on_each_cpu(do_nothing, NULL, 1); + } else { + static_branch_disable(&uaccess_flush_key); + do_uaccess_flush_fixups(L1D_FLUSH_NONE); + } + + uaccess_flush = enable; +} + +static void __ref init_fallback_flush(void) +{ + u64 l1d_size, limit; + int cpu; + + /* Only allocate the fallback flush area once (at boot time). */ + if (l1d_flush_fallback_area) + return; + + l1d_size = ppc64_caches.l1d.size; + + /* + * If there is no d-cache-size property in the device tree, l1d_size + * could be zero. That leads to the loop in the asm wrapping around to + * 2^64-1, and then walking off the end of the fallback area and + * eventually causing a page fault which is fatal. Just default to + * something vaguely sane. + */ + if (!l1d_size) + l1d_size = (64 * 1024); + + limit = min(ppc64_bolted_size(), ppc64_rma_size); + + /* + * Align to L1d size, and size it at 2x L1d size, to catch possible + * hardware prefetch runoff. We don't have a recipe for load patterns to + * reliably avoid the prefetcher. + */ + l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2, + l1d_size, MEMBLOCK_LOW_LIMIT, + limit, NUMA_NO_NODE); + if (!l1d_flush_fallback_area) + panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n", + __func__, l1d_size * 2, l1d_size, &limit); + + + for_each_possible_cpu(cpu) { + struct paca_struct *paca = paca_ptrs[cpu]; + paca->rfi_flush_fallback_area = l1d_flush_fallback_area; + paca->l1d_flush_size = l1d_size; + } +} + +void setup_rfi_flush(enum l1d_flush_type types, bool enable) +{ + if (types & L1D_FLUSH_FALLBACK) { + pr_info("rfi-flush: fallback displacement flush available\n"); + init_fallback_flush(); + } + + if (types & L1D_FLUSH_ORI) + pr_info("rfi-flush: ori type flush available\n"); + + if (types & L1D_FLUSH_MTTRIG) + pr_info("rfi-flush: mttrig type flush available\n"); + + enabled_flush_types = types; + + if (!cpu_mitigations_off() && !no_rfi_flush) + rfi_flush_enable(enable); +} + +void setup_entry_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_entry_flush) + entry_flush_enable(enable); +} + +void setup_uaccess_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_uaccess_flush) + uaccess_flush_enable(enable); +} + #ifdef CONFIG_DEBUG_FS static int count_cache_flush_set(void *data, u64 val) { @@ -579,5 +753,92 @@ static __init int count_cache_flush_debugfs_init(void) return 0; } device_initcall(count_cache_flush_debugfs_init); + +static int rfi_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != rfi_flush) + rfi_flush_enable(enable); + + return 0; +} + +static int rfi_flush_get(void *data, u64 *val) +{ + *val = rfi_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); + +static int entry_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != entry_flush) + entry_flush_enable(enable); + + return 0; +} + +static int entry_flush_get(void *data, u64 *val) +{ + *val = entry_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n"); + +static int uaccess_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != uaccess_flush) + uaccess_flush_enable(enable); + + return 0; +} + +static int uaccess_flush_get(void *data, u64 *val) +{ + *val = uaccess_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n"); + +static __init int rfi_flush_debugfs_init(void) +{ + debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); + debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush); + return 0; +} +device_initcall(rfi_flush_debugfs_init); #endif /* CONFIG_DEBUG_FS */ #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index bee984b1887b..74a98fff2c2f 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -69,7 +69,6 @@ #include "setup.h" #ifdef DEBUG -#include #define DBG(fmt...) udbg_printf(fmt) #else #define DBG(fmt...) @@ -829,7 +828,7 @@ static __init void print_system_info(void) } #ifdef CONFIG_SMP -static void smp_setup_pacas(void) +static void __init smp_setup_pacas(void) { int cpu; diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 8ba49a6bf515..d7c1f92152af 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -164,7 +164,7 @@ void __init irqstack_early_init(void) } #ifdef CONFIG_VMAP_STACK -void *emergency_ctx[NR_CPUS] __ro_after_init; +void *emergency_ctx[NR_CPUS] __ro_after_init = {[0] = &init_stack}; void __init emergency_stack_init(void) { diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 560ed8b975e7..b779d25761cf 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -232,10 +232,23 @@ static void cpu_ready_for_interrupts(void) * If we are not in hypervisor mode the job is done once for * the whole partition in configure_exceptions(). */ - if (cpu_has_feature(CPU_FTR_HVMODE) && - cpu_has_feature(CPU_FTR_ARCH_207S)) { + if (cpu_has_feature(CPU_FTR_HVMODE)) { unsigned long lpcr = mfspr(SPRN_LPCR); - mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + unsigned long new_lpcr = lpcr; + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + /* P10 DD1 does not have HAIL */ + if (pvr_version_is(PVR_POWER10) && + (mfspr(SPRN_PVR) & 0xf00) == 0x100) + new_lpcr |= LPCR_AIL_3; + else + new_lpcr |= LPCR_HAIL; + } else if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + new_lpcr |= LPCR_AIL_3; + } + + if (new_lpcr != lpcr) + mtspr(SPRN_LPCR, new_lpcr); } /* @@ -941,266 +954,3 @@ static int __init disable_hardlockup_detector(void) return 0; } early_initcall(disable_hardlockup_detector); - -#ifdef CONFIG_PPC_BOOK3S_64 -static enum l1d_flush_type enabled_flush_types; -static void *l1d_flush_fallback_area; -static bool no_rfi_flush; -static bool no_entry_flush; -static bool no_uaccess_flush; -bool rfi_flush; -bool entry_flush; -bool uaccess_flush; -DEFINE_STATIC_KEY_FALSE(uaccess_flush_key); -EXPORT_SYMBOL(uaccess_flush_key); - -static int __init handle_no_rfi_flush(char *p) -{ - pr_info("rfi-flush: disabled on command line."); - no_rfi_flush = true; - return 0; -} -early_param("no_rfi_flush", handle_no_rfi_flush); - -static int __init handle_no_entry_flush(char *p) -{ - pr_info("entry-flush: disabled on command line."); - no_entry_flush = true; - return 0; -} -early_param("no_entry_flush", handle_no_entry_flush); - -static int __init handle_no_uaccess_flush(char *p) -{ - pr_info("uaccess-flush: disabled on command line."); - no_uaccess_flush = true; - return 0; -} -early_param("no_uaccess_flush", handle_no_uaccess_flush); - -/* - * The RFI flush is not KPTI, but because users will see doco that says to use - * nopti we hijack that option here to also disable the RFI flush. - */ -static int __init handle_no_pti(char *p) -{ - pr_info("rfi-flush: disabling due to 'nopti' on command line.\n"); - handle_no_rfi_flush(NULL); - return 0; -} -early_param("nopti", handle_no_pti); - -static void do_nothing(void *unused) -{ - /* - * We don't need to do the flush explicitly, just enter+exit kernel is - * sufficient, the RFI exit handlers will do the right thing. - */ -} - -void rfi_flush_enable(bool enable) -{ - if (enable) { - do_rfi_flush_fixups(enabled_flush_types); - on_each_cpu(do_nothing, NULL, 1); - } else - do_rfi_flush_fixups(L1D_FLUSH_NONE); - - rfi_flush = enable; -} - -static void entry_flush_enable(bool enable) -{ - if (enable) { - do_entry_flush_fixups(enabled_flush_types); - on_each_cpu(do_nothing, NULL, 1); - } else { - do_entry_flush_fixups(L1D_FLUSH_NONE); - } - - entry_flush = enable; -} - -static void uaccess_flush_enable(bool enable) -{ - if (enable) { - do_uaccess_flush_fixups(enabled_flush_types); - static_branch_enable(&uaccess_flush_key); - on_each_cpu(do_nothing, NULL, 1); - } else { - static_branch_disable(&uaccess_flush_key); - do_uaccess_flush_fixups(L1D_FLUSH_NONE); - } - - uaccess_flush = enable; -} - -static void __ref init_fallback_flush(void) -{ - u64 l1d_size, limit; - int cpu; - - /* Only allocate the fallback flush area once (at boot time). */ - if (l1d_flush_fallback_area) - return; - - l1d_size = ppc64_caches.l1d.size; - - /* - * If there is no d-cache-size property in the device tree, l1d_size - * could be zero. That leads to the loop in the asm wrapping around to - * 2^64-1, and then walking off the end of the fallback area and - * eventually causing a page fault which is fatal. Just default to - * something vaguely sane. - */ - if (!l1d_size) - l1d_size = (64 * 1024); - - limit = min(ppc64_bolted_size(), ppc64_rma_size); - - /* - * Align to L1d size, and size it at 2x L1d size, to catch possible - * hardware prefetch runoff. We don't have a recipe for load patterns to - * reliably avoid the prefetcher. - */ - l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2, - l1d_size, MEMBLOCK_LOW_LIMIT, - limit, NUMA_NO_NODE); - if (!l1d_flush_fallback_area) - panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n", - __func__, l1d_size * 2, l1d_size, &limit); - - - for_each_possible_cpu(cpu) { - struct paca_struct *paca = paca_ptrs[cpu]; - paca->rfi_flush_fallback_area = l1d_flush_fallback_area; - paca->l1d_flush_size = l1d_size; - } -} - -void setup_rfi_flush(enum l1d_flush_type types, bool enable) -{ - if (types & L1D_FLUSH_FALLBACK) { - pr_info("rfi-flush: fallback displacement flush available\n"); - init_fallback_flush(); - } - - if (types & L1D_FLUSH_ORI) - pr_info("rfi-flush: ori type flush available\n"); - - if (types & L1D_FLUSH_MTTRIG) - pr_info("rfi-flush: mttrig type flush available\n"); - - enabled_flush_types = types; - - if (!cpu_mitigations_off() && !no_rfi_flush) - rfi_flush_enable(enable); -} - -void setup_entry_flush(bool enable) -{ - if (cpu_mitigations_off()) - return; - - if (!no_entry_flush) - entry_flush_enable(enable); -} - -void setup_uaccess_flush(bool enable) -{ - if (cpu_mitigations_off()) - return; - - if (!no_uaccess_flush) - uaccess_flush_enable(enable); -} - -#ifdef CONFIG_DEBUG_FS -static int rfi_flush_set(void *data, u64 val) -{ - bool enable; - - if (val == 1) - enable = true; - else if (val == 0) - enable = false; - else - return -EINVAL; - - /* Only do anything if we're changing state */ - if (enable != rfi_flush) - rfi_flush_enable(enable); - - return 0; -} - -static int rfi_flush_get(void *data, u64 *val) -{ - *val = rfi_flush ? 1 : 0; - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); - -static int entry_flush_set(void *data, u64 val) -{ - bool enable; - - if (val == 1) - enable = true; - else if (val == 0) - enable = false; - else - return -EINVAL; - - /* Only do anything if we're changing state */ - if (enable != entry_flush) - entry_flush_enable(enable); - - return 0; -} - -static int entry_flush_get(void *data, u64 *val) -{ - *val = entry_flush ? 1 : 0; - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n"); - -static int uaccess_flush_set(void *data, u64 val) -{ - bool enable; - - if (val == 1) - enable = true; - else if (val == 0) - enable = false; - else - return -EINVAL; - - /* Only do anything if we're changing state */ - if (enable != uaccess_flush) - uaccess_flush_enable(enable); - - return 0; -} - -static int uaccess_flush_get(void *data, u64 *val) -{ - *val = uaccess_flush ? 1 : 0; - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n"); - -static __init int rfi_flush_debugfs_init(void) -{ - debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); - debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); - debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush); - return 0; -} -device_initcall(rfi_flush_debugfs_init); -#endif -#endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 2559a681536e..f4aafa337c2e 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -19,6 +19,15 @@ extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset, extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, struct task_struct *tsk); +static inline int __get_user_sigset(sigset_t *dst, const sigset_t __user *src) +{ + BUILD_BUG_ON(sizeof(sigset_t) != sizeof(u64)); + + return __get_user(dst->sig[0], (u64 __user *)&src->sig[0]); +} +#define unsafe_get_user_sigset(dst, src, label) \ + unsafe_get_user((dst)->sig[0], (u64 __user *)&(src)->sig[0], label) + #ifdef CONFIG_VSX extern unsigned long copy_vsx_to_user(void __user *to, struct task_struct *task); @@ -53,6 +62,26 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from); &buf[i], label);\ } while (0) +#define unsafe_copy_fpr_from_user(task, from, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)from; \ + int i; \ + \ + for (i = 0; i < ELF_NFPREG - 1; i++) \ + unsafe_get_user(__t->thread.TS_FPR(i), &buf[i], label); \ + unsafe_get_user(__t->thread.fp_state.fpscr, &buf[i], label); \ +} while (0) + +#define unsafe_copy_vsx_from_user(task, from, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)from; \ + int i; \ + \ + for (i = 0; i < ELF_NVSRHALFREG ; i++) \ + unsafe_get_user(__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET], \ + &buf[i], label); \ +} while (0) + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM #define unsafe_copy_ckfpr_to_user(to, task, label) do { \ struct task_struct *__t = task; \ @@ -73,6 +102,26 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from); unsafe_put_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \ &buf[i], label);\ } while (0) + +#define unsafe_copy_ckfpr_from_user(task, from, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)from; \ + int i; \ + \ + for (i = 0; i < ELF_NFPREG - 1 ; i++) \ + unsafe_get_user(__t->thread.TS_CKFPR(i), &buf[i], label);\ + unsafe_get_user(__t->thread.ckfp_state.fpscr, &buf[i], failed); \ +} while (0) + +#define unsafe_copy_ckvsx_from_user(task, from, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)from; \ + int i; \ + \ + for (i = 0; i < ELF_NVSRHALFREG ; i++) \ + unsafe_get_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \ + &buf[i], label); \ +} while (0) #endif #elif defined(CONFIG_PPC_FPU_REGS) @@ -80,6 +129,10 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from); unsafe_copy_to_user(to, (task)->thread.fp_state.fpr, \ ELF_NFPREG * sizeof(double), label) +#define unsafe_copy_fpr_from_user(task, from, label) \ + unsafe_copy_from_user((task)->thread.fp_state.fpr, from, \ + ELF_NFPREG * sizeof(double), label) + static inline unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) { @@ -115,6 +168,8 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from) #else #define unsafe_copy_fpr_to_user(to, task, label) do { } while (0) +#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0) + static inline unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) { diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index f651b992fe01..8f05ed0da292 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -83,24 +83,17 @@ * implementation that makes things simple for little endian only) */ #define unsafe_put_sigset_t unsafe_put_compat_sigset - -static inline int get_sigset_t(sigset_t *set, - const compat_sigset_t __user *uset) -{ - return get_compat_sigset(set, uset); -} +#define unsafe_get_sigset_t unsafe_get_compat_sigset #define to_user_ptr(p) ptr_to_compat(p) #define from_user_ptr(p) compat_ptr(p) static __always_inline int -save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame) +__unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame) { elf_greg_t64 *gregs = (elf_greg_t64 *)regs; int val, i; - WARN_ON(!FULL_REGS(regs)); - for (i = 0; i <= PT_RESULT; i ++) { /* Force usr to alway see softe as 1 (interrupts enabled) */ if (i == PT_SOFTE) @@ -116,8 +109,8 @@ save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame) return 1; } -static inline int restore_general_regs(struct pt_regs *regs, - struct mcontext __user *sr) +static __always_inline int +__unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr) { elf_greg_t64 *gregs = (elf_greg_t64 *)regs; int i; @@ -125,10 +118,12 @@ static inline int restore_general_regs(struct pt_regs *regs, for (i = 0; i <= PT_RESULT; i++) { if ((i == PT_MSR) || (i == PT_SOFTE)) continue; - if (__get_user(gregs[i], &sr->mc_gregs[i])) - return -EFAULT; + unsafe_get_user(gregs[i], &sr->mc_gregs[i], failed); } return 0; + +failed: + return 1; } #else /* CONFIG_PPC64 */ @@ -142,18 +137,14 @@ static inline int restore_general_regs(struct pt_regs *regs, unsafe_copy_to_user(__us, __s, sizeof(*__us), label); \ } while (0) -static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset) -{ - return copy_from_user(set, uset, sizeof(*uset)); -} +#define unsafe_get_sigset_t unsafe_get_user_sigset #define to_user_ptr(p) ((unsigned long)(p)) #define from_user_ptr(p) ((void __user *)(p)) static __always_inline int -save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame) +__unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame) { - WARN_ON(!FULL_REGS(regs)); unsafe_copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE, failed); return 0; @@ -161,23 +152,30 @@ save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame) return 1; } -static inline int restore_general_regs(struct pt_regs *regs, - struct mcontext __user *sr) +static __always_inline +int __unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr) { /* copy up to but not including MSR */ - if (__copy_from_user(regs, &sr->mc_gregs, - PT_MSR * sizeof(elf_greg_t))) - return -EFAULT; + unsafe_copy_from_user(regs, &sr->mc_gregs, PT_MSR * sizeof(elf_greg_t), failed); + /* copy from orig_r3 (the word after the MSR) up to the end */ - if (__copy_from_user(®s->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3], - GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t))) - return -EFAULT; + unsafe_copy_from_user(®s->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3], + GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t), failed); + return 0; + +failed: + return 1; } #endif #define unsafe_save_general_regs(regs, frame, label) do { \ - if (save_general_regs_unsafe(regs, frame)) \ + if (__unsafe_save_general_regs(regs, frame)) \ + goto label; \ +} while (0) + +#define unsafe_restore_general_regs(regs, frame, label) do { \ + if (__unsafe_restore_general_regs(regs, frame)) \ goto label; \ } while (0) @@ -260,8 +258,8 @@ static void prepare_save_user_regs(int ctx_has_vsx_region) #endif } -static int save_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, int ctx_has_vsx_region) +static int __unsafe_save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, int ctx_has_vsx_region) { unsigned long msr = regs->msr; @@ -338,7 +336,7 @@ static int save_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *f } #define unsafe_save_user_regs(regs, frame, tm_frame, has_vsx, label) do { \ - if (save_user_regs_unsafe(regs, frame, tm_frame, has_vsx)) \ + if (__unsafe_save_user_regs(regs, frame, tm_frame, has_vsx)) \ goto label; \ } while (0) @@ -350,7 +348,7 @@ static int save_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *f * We also save the transactional registers to a second ucontext in the * frame. * - * See save_user_regs_unsafe() and signal_64.c:setup_tm_sigcontexts(). + * See __unsafe_save_user_regs() and signal_64.c:setup_tm_sigcontexts(). */ static void prepare_save_tm_user_regs(void) { @@ -441,7 +439,7 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE /* SPE regs are not checkpointed with TM, so this section is - * simply the same as in save_user_regs_unsafe(). + * simply the same as in __unsafe_save_user_regs(). */ if (current->thread.used_spe) { unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr, @@ -485,26 +483,25 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user static long restore_user_regs(struct pt_regs *regs, struct mcontext __user *sr, int sig) { - long err; unsigned int save_r2 = 0; unsigned long msr; #ifdef CONFIG_VSX int i; #endif + if (!user_read_access_begin(sr, sizeof(*sr))) + return 1; /* * restore general registers but not including MSR or SOFTE. Also * take care of keeping r2 (TLS) intact if not a signal */ if (!sig) save_r2 = (unsigned int)regs->gpr[2]; - err = restore_general_regs(regs, sr); + unsafe_restore_general_regs(regs, sr, failed); set_trap_norestart(regs); - err |= __get_user(msr, &sr->mc_gregs[PT_MSR]); + unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed); if (!sig) regs->gpr[2] = (unsigned long) save_r2; - if (err) - return 1; /* if doing signal return, restore the previous little-endian mode */ if (sig) @@ -518,22 +515,19 @@ static long restore_user_regs(struct pt_regs *regs, regs->msr &= ~MSR_VEC; if (msr & MSR_VEC) { /* restore altivec registers from the stack */ - if (__copy_from_user(¤t->thread.vr_state, &sr->mc_vregs, - sizeof(sr->mc_vregs))) - return 1; + unsafe_copy_from_user(¤t->thread.vr_state, &sr->mc_vregs, + sizeof(sr->mc_vregs), failed); current->thread.used_vr = true; } else if (current->thread.used_vr) memset(¤t->thread.vr_state, 0, ELF_NVRREG * sizeof(vector128)); /* Always get VRSAVE back */ - if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32])) - return 1; + unsafe_get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32], failed); if (cpu_has_feature(CPU_FTR_ALTIVEC)) mtspr(SPRN_VRSAVE, current->thread.vrsave); #endif /* CONFIG_ALTIVEC */ - if (copy_fpr_from_user(current, &sr->mc_fregs)) - return 1; + unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed); #ifdef CONFIG_VSX /* @@ -546,8 +540,7 @@ static long restore_user_regs(struct pt_regs *regs, * Restore altivec registers from the stack to a local * buffer, then write this out to the thread_struct */ - if (copy_vsx_from_user(current, &sr->mc_vsregs)) - return 1; + unsafe_copy_vsx_from_user(current, &sr->mc_vsregs, failed); current->thread.used_vsr = true; } else if (current->thread.used_vsr) for (i = 0; i < 32 ; i++) @@ -565,19 +558,22 @@ static long restore_user_regs(struct pt_regs *regs, regs->msr &= ~MSR_SPE; if (msr & MSR_SPE) { /* restore spe registers from the stack */ - if (__copy_from_user(current->thread.evr, &sr->mc_vregs, - ELF_NEVRREG * sizeof(u32))) - return 1; + unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, + ELF_NEVRREG * sizeof(u32), failed); current->thread.used_spe = true; } else if (current->thread.used_spe) memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); /* Always get SPEFSCR back */ - if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG)) - return 1; + unsafe_get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed); #endif /* CONFIG_SPE */ + user_read_access_end(); return 0; + +failed: + user_read_access_end(); + return 1; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -590,7 +586,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *sr, struct mcontext __user *tm_sr) { - long err; unsigned long msr, msr_hi; #ifdef CONFIG_VSX int i; @@ -605,15 +600,13 @@ static long restore_tm_user_regs(struct pt_regs *regs, * TFHAR is restored from the checkpointed NIP; TEXASR and TFIAR * were set by the signal delivery. */ - err = restore_general_regs(regs, tm_sr); - err |= restore_general_regs(¤t->thread.ckpt_regs, sr); - - err |= __get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP]); - - err |= __get_user(msr, &sr->mc_gregs[PT_MSR]); - if (err) + if (!user_read_access_begin(sr, sizeof(*sr))) return 1; + unsafe_restore_general_regs(¤t->thread.ckpt_regs, sr, failed); + unsafe_get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP], failed); + unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed); + /* Restore the previous little-endian mode */ regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); @@ -621,12 +614,8 @@ static long restore_tm_user_regs(struct pt_regs *regs, regs->msr &= ~MSR_VEC; if (msr & MSR_VEC) { /* restore altivec registers from the stack */ - if (__copy_from_user(¤t->thread.ckvr_state, &sr->mc_vregs, - sizeof(sr->mc_vregs)) || - __copy_from_user(¤t->thread.vr_state, - &tm_sr->mc_vregs, - sizeof(sr->mc_vregs))) - return 1; + unsafe_copy_from_user(¤t->thread.ckvr_state, &sr->mc_vregs, + sizeof(sr->mc_vregs), failed); current->thread.used_vr = true; } else if (current->thread.used_vr) { memset(¤t->thread.vr_state, 0, @@ -636,20 +625,15 @@ static long restore_tm_user_regs(struct pt_regs *regs, } /* Always get VRSAVE back */ - if (__get_user(current->thread.ckvrsave, - (u32 __user *)&sr->mc_vregs[32]) || - __get_user(current->thread.vrsave, - (u32 __user *)&tm_sr->mc_vregs[32])) - return 1; + unsafe_get_user(current->thread.ckvrsave, + (u32 __user *)&sr->mc_vregs[32], failed); if (cpu_has_feature(CPU_FTR_ALTIVEC)) mtspr(SPRN_VRSAVE, current->thread.ckvrsave); #endif /* CONFIG_ALTIVEC */ regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); - if (copy_fpr_from_user(current, &sr->mc_fregs) || - copy_ckfpr_from_user(current, &tm_sr->mc_fregs)) - return 1; + unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed); #ifdef CONFIG_VSX regs->msr &= ~MSR_VSX; @@ -658,9 +642,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, * Restore altivec registers from the stack to a local * buffer, then write this out to the thread_struct */ - if (copy_vsx_from_user(current, &tm_sr->mc_vsregs) || - copy_ckvsx_from_user(current, &sr->mc_vsregs)) - return 1; + unsafe_copy_ckvsx_from_user(current, &sr->mc_vsregs, failed); current->thread.used_vsr = true; } else if (current->thread.used_vsr) for (i = 0; i < 32 ; i++) { @@ -675,23 +657,54 @@ static long restore_tm_user_regs(struct pt_regs *regs, */ regs->msr &= ~MSR_SPE; if (msr & MSR_SPE) { - if (__copy_from_user(current->thread.evr, &sr->mc_vregs, - ELF_NEVRREG * sizeof(u32))) - return 1; + unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, + ELF_NEVRREG * sizeof(u32), failed); current->thread.used_spe = true; } else if (current->thread.used_spe) memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); /* Always get SPEFSCR back */ - if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs - + ELF_NEVRREG)) - return 1; + unsafe_get_user(current->thread.spefscr, + (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed); #endif /* CONFIG_SPE */ - /* Get the top half of the MSR from the user context */ - if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR])) + user_read_access_end(); + + if (!user_read_access_begin(tm_sr, sizeof(*tm_sr))) return 1; + + unsafe_restore_general_regs(regs, tm_sr, failed); + +#ifdef CONFIG_ALTIVEC + /* restore altivec registers from the stack */ + if (msr & MSR_VEC) + unsafe_copy_from_user(¤t->thread.vr_state, &tm_sr->mc_vregs, + sizeof(sr->mc_vregs), failed); + + /* Always get VRSAVE back */ + unsafe_get_user(current->thread.vrsave, + (u32 __user *)&tm_sr->mc_vregs[32], failed); +#endif /* CONFIG_ALTIVEC */ + + unsafe_copy_ckfpr_from_user(current, &tm_sr->mc_fregs, failed); + +#ifdef CONFIG_VSX + if (msr & MSR_VSX) { + /* + * Restore altivec registers from the stack to a local + * buffer, then write this out to the thread_struct + */ + unsafe_copy_vsx_from_user(current, &tm_sr->mc_vsregs, failed); + current->thread.used_vsr = true; + } +#endif /* CONFIG_VSX */ + + /* Get the top half of the MSR from the user context */ + unsafe_get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR], failed); msr_hi <<= 32; + + user_read_access_end(); + /* If TM bits are set to the reserved value, it's an invalid context */ if (MSR_TM_RESV(msr_hi)) return 1; @@ -739,6 +752,16 @@ static long restore_tm_user_regs(struct pt_regs *regs, preempt_enable(); return 0; + +failed: + user_read_access_end(); + return 1; +} +#else +static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *sr, + struct mcontext __user *tm_sr) +{ + return 0; } #endif @@ -944,28 +967,31 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int sigset_t set; struct mcontext __user *mcp; - if (get_sigset_t(&set, &ucp->uc_sigmask)) + if (!user_read_access_begin(ucp, sizeof(*ucp))) return -EFAULT; + + unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed); #ifdef CONFIG_PPC64 { u32 cmcp; - if (__get_user(cmcp, &ucp->uc_regs)) - return -EFAULT; + unsafe_get_user(cmcp, &ucp->uc_regs, failed); mcp = (struct mcontext __user *)(u64)cmcp; - /* no need to check access_ok(mcp), since mcp < 4GB */ } #else - if (__get_user(mcp, &ucp->uc_regs)) - return -EFAULT; - if (!access_ok(mcp, sizeof(*mcp))) - return -EFAULT; + unsafe_get_user(mcp, &ucp->uc_regs, failed); #endif + user_read_access_end(); + set_current_blocked(&set); if (restore_user_regs(regs, mcp, sig)) return -EFAULT; return 0; + +failed: + user_read_access_end(); + return -EFAULT; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -979,11 +1005,15 @@ static int do_setcontext_tm(struct ucontext __user *ucp, u32 cmcp; u32 tm_cmcp; - if (get_sigset_t(&set, &ucp->uc_sigmask)) + if (!user_read_access_begin(ucp, sizeof(*ucp))) return -EFAULT; - if (__get_user(cmcp, &ucp->uc_regs) || - __get_user(tm_cmcp, &tm_ucp->uc_regs)) + unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed); + unsafe_get_user(cmcp, &ucp->uc_regs, failed); + + user_read_access_end(); + + if (__get_user(tm_cmcp, &tm_ucp->uc_regs)) return -EFAULT; mcp = (struct mcontext __user *)(u64)cmcp; tm_mcp = (struct mcontext __user *)(u64)tm_cmcp; @@ -994,6 +1024,10 @@ static int do_setcontext_tm(struct ucontext __user *ucp, return -EFAULT; return 0; + +failed: + user_read_access_end(); + return -EFAULT; } #endif @@ -1311,19 +1345,16 @@ SYSCALL_DEFINE0(sigreturn) struct sigcontext __user *sc; struct sigcontext sigctx; struct mcontext __user *sr; - void __user *addr; sigset_t set; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - struct mcontext __user *mcp, *tm_mcp; - unsigned long msr_hi; -#endif + struct mcontext __user *mcp; + struct mcontext __user *tm_mcp = NULL; + unsigned long long msr_hi = 0; /* Always make any pending restarted system calls return -EINTR */ current->restart_block.fn = do_no_restart_syscall; sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); sc = &sf->sctx; - addr = sc; if (copy_from_user(&sigctx, sc, sizeof(sigctx))) goto badframe; @@ -1339,31 +1370,32 @@ SYSCALL_DEFINE0(sigreturn) #endif set_current_blocked(&set); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM mcp = (struct mcontext __user *)&sf->mctx; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_mcp = (struct mcontext __user *)&sf->mctx_transact; if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR])) goto badframe; +#endif if (MSR_TM_ACTIVE(msr_hi<<32)) { if (!cpu_has_feature(CPU_FTR_TM)) goto badframe; if (restore_tm_user_regs(regs, mcp, tm_mcp)) goto badframe; - } else -#endif - { + } else { sr = (struct mcontext __user *)from_user_ptr(sigctx.regs); - addr = sr; - if (!access_ok(sr, sizeof(*sr)) - || restore_user_regs(regs, sr, 1)) - goto badframe; + if (restore_user_regs(regs, sr, 1)) { + signal_fault(current, regs, "sys_sigreturn", sr); + + force_sig(SIGSEGV); + return 0; + } } set_thread_flag(TIF_RESTOREALL); return 0; badframe: - signal_fault(current, regs, "sys_sigreturn", addr); + signal_fault(current, regs, "sys_sigreturn", sc); force_sig(SIGSEGV); return 0; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index f9e4a1ac440f..dca66481d0c2 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -79,13 +79,36 @@ static elf_vrreg_t __user *sigcontext_vmx_regs(struct sigcontext __user *sc) } #endif +static void prepare_setup_sigcontext(struct task_struct *tsk) +{ +#ifdef CONFIG_ALTIVEC + /* save altivec registers */ + if (tsk->thread.used_vr) + flush_altivec_to_thread(tsk); + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + tsk->thread.vrsave = mfspr(SPRN_VRSAVE); +#endif /* CONFIG_ALTIVEC */ + + flush_fp_to_thread(tsk); + +#ifdef CONFIG_VSX + if (tsk->thread.used_vsr) + flush_vsx_to_thread(tsk); +#endif /* CONFIG_VSX */ +} + /* * Set up the sigcontext for the signal frame. */ -static long setup_sigcontext(struct sigcontext __user *sc, - struct task_struct *tsk, int signr, sigset_t *set, - unsigned long handler, int ctx_has_vsx_region) +#define unsafe_setup_sigcontext(sc, tsk, signr, set, handler, ctx_has_vsx_region, label)\ +do { \ + if (__unsafe_setup_sigcontext(sc, tsk, signr, set, handler, ctx_has_vsx_region))\ + goto label; \ +} while (0) +static long notrace __unsafe_setup_sigcontext(struct sigcontext __user *sc, + struct task_struct *tsk, int signr, sigset_t *set, + unsigned long handler, int ctx_has_vsx_region) { /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the * process never used altivec yet (MSR_VEC is zero in pt_regs of @@ -97,25 +120,22 @@ static long setup_sigcontext(struct sigcontext __user *sc, */ #ifdef CONFIG_ALTIVEC elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc); - unsigned long vrsave; #endif struct pt_regs *regs = tsk->thread.regs; unsigned long msr = regs->msr; - long err = 0; /* Force usr to alway see softe as 1 (interrupts enabled) */ unsigned long softe = 0x1; BUG_ON(tsk != current); #ifdef CONFIG_ALTIVEC - err |= __put_user(v_regs, &sc->v_regs); + unsafe_put_user(v_regs, &sc->v_regs, efault_out); /* save altivec registers */ if (tsk->thread.used_vr) { - flush_altivec_to_thread(tsk); /* Copy 33 vec registers (vr0..31 and vscr) to the stack */ - err |= __copy_to_user(v_regs, &tsk->thread.vr_state, - 33 * sizeof(vector128)); + unsafe_copy_to_user(v_regs, &tsk->thread.vr_state, + 33 * sizeof(vector128), efault_out); /* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg) * contains valid data. */ @@ -124,19 +144,12 @@ static long setup_sigcontext(struct sigcontext __user *sc, /* We always copy to/from vrsave, it's 0 if we don't have or don't * use altivec. */ - vrsave = 0; - if (cpu_has_feature(CPU_FTR_ALTIVEC)) { - vrsave = mfspr(SPRN_VRSAVE); - tsk->thread.vrsave = vrsave; - } - - err |= __put_user(vrsave, (u32 __user *)&v_regs[33]); + unsafe_put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out); #else /* CONFIG_ALTIVEC */ - err |= __put_user(0, &sc->v_regs); + unsafe_put_user(0, &sc->v_regs, efault_out); #endif /* CONFIG_ALTIVEC */ - flush_fp_to_thread(tsk); /* copy fpr regs and fpscr */ - err |= copy_fpr_to_user(&sc->fp_regs, tsk); + unsafe_copy_fpr_to_user(&sc->fp_regs, tsk, efault_out); /* * Clear the MSR VSX bit to indicate there is no valid state attached @@ -150,26 +163,27 @@ static long setup_sigcontext(struct sigcontext __user *sc, * VMX data. */ if (tsk->thread.used_vsr && ctx_has_vsx_region) { - flush_vsx_to_thread(tsk); v_regs += ELF_NVRREG; - err |= copy_vsx_to_user(v_regs, tsk); + unsafe_copy_vsx_to_user(v_regs, tsk, efault_out); /* set MSR_VSX in the MSR value in the frame to * indicate that sc->vs_reg) contains valid data. */ msr |= MSR_VSX; } #endif /* CONFIG_VSX */ - err |= __put_user(&sc->gp_regs, &sc->regs); - WARN_ON(!FULL_REGS(regs)); - err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE); - err |= __put_user(msr, &sc->gp_regs[PT_MSR]); - err |= __put_user(softe, &sc->gp_regs[PT_SOFTE]); - err |= __put_user(signr, &sc->signal); - err |= __put_user(handler, &sc->handler); + unsafe_put_user(&sc->gp_regs, &sc->regs, efault_out); + unsafe_copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE, efault_out); + unsafe_put_user(msr, &sc->gp_regs[PT_MSR], efault_out); + unsafe_put_user(softe, &sc->gp_regs[PT_SOFTE], efault_out); + unsafe_put_user(signr, &sc->signal, efault_out); + unsafe_put_user(handler, &sc->handler, efault_out); if (set != NULL) - err |= __put_user(set->sig[0], &sc->oldmask); + unsafe_put_user(set->sig[0], &sc->oldmask, efault_out); - return err; + return 0; + +efault_out: + return -EFAULT; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -294,7 +308,6 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, err |= __put_user(&sc->gp_regs, &sc->regs); err |= __put_user(&tm_sc->gp_regs, &tm_sc->regs); - WARN_ON(!FULL_REGS(regs)); err |= __copy_to_user(&tm_sc->gp_regs, regs, GP_REGS_SIZE); err |= __copy_to_user(&sc->gp_regs, &tsk->thread.ckpt_regs, GP_REGS_SIZE); @@ -312,14 +325,16 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, /* * Restore the sigcontext from the signal frame. */ - -static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig, - struct sigcontext __user *sc) +#define unsafe_restore_sigcontext(tsk, set, sig, sc, label) do { \ + if (__unsafe_restore_sigcontext(tsk, set, sig, sc)) \ + goto label; \ +} while (0) +static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_t *set, + int sig, struct sigcontext __user *sc) { #ifdef CONFIG_ALTIVEC elf_vrreg_t __user *v_regs; #endif - unsigned long err = 0; unsigned long save_r13 = 0; unsigned long msr; struct pt_regs *regs = tsk->thread.regs; @@ -334,27 +349,27 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig, save_r13 = regs->gpr[13]; /* copy the GPRs */ - err |= __copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr)); - err |= __get_user(regs->nip, &sc->gp_regs[PT_NIP]); + unsafe_copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr), efault_out); + unsafe_get_user(regs->nip, &sc->gp_regs[PT_NIP], efault_out); /* get MSR separately, transfer the LE bit if doing signal return */ - err |= __get_user(msr, &sc->gp_regs[PT_MSR]); + unsafe_get_user(msr, &sc->gp_regs[PT_MSR], efault_out); if (sig) regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); - err |= __get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3]); - err |= __get_user(regs->ctr, &sc->gp_regs[PT_CTR]); - err |= __get_user(regs->link, &sc->gp_regs[PT_LNK]); - err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]); - err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]); + unsafe_get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3], efault_out); + unsafe_get_user(regs->ctr, &sc->gp_regs[PT_CTR], efault_out); + unsafe_get_user(regs->link, &sc->gp_regs[PT_LNK], efault_out); + unsafe_get_user(regs->xer, &sc->gp_regs[PT_XER], efault_out); + unsafe_get_user(regs->ccr, &sc->gp_regs[PT_CCR], efault_out); /* Don't allow userspace to set SOFTE */ set_trap_norestart(regs); - err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); - err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); - err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]); + unsafe_get_user(regs->dar, &sc->gp_regs[PT_DAR], efault_out); + unsafe_get_user(regs->dsisr, &sc->gp_regs[PT_DSISR], efault_out); + unsafe_get_user(regs->result, &sc->gp_regs[PT_RESULT], efault_out); if (!sig) regs->gpr[13] = save_r13; if (set != NULL) - err |= __get_user(set->sig[0], &sc->oldmask); + unsafe_get_user(set->sig[0], &sc->oldmask, efault_out); /* * Force reload of FP/VEC. @@ -364,29 +379,27 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig, regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX); #ifdef CONFIG_ALTIVEC - err |= __get_user(v_regs, &sc->v_regs); - if (err) - return err; + unsafe_get_user(v_regs, &sc->v_regs, efault_out); if (v_regs && !access_ok(v_regs, 34 * sizeof(vector128))) return -EFAULT; /* Copy 33 vec registers (vr0..31 and vscr) from the stack */ if (v_regs != NULL && (msr & MSR_VEC) != 0) { - err |= __copy_from_user(&tsk->thread.vr_state, v_regs, - 33 * sizeof(vector128)); + unsafe_copy_from_user(&tsk->thread.vr_state, v_regs, + 33 * sizeof(vector128), efault_out); tsk->thread.used_vr = true; } else if (tsk->thread.used_vr) { memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128)); } /* Always get VRSAVE back */ if (v_regs != NULL) - err |= __get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]); + unsafe_get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out); else tsk->thread.vrsave = 0; if (cpu_has_feature(CPU_FTR_ALTIVEC)) mtspr(SPRN_VRSAVE, tsk->thread.vrsave); #endif /* CONFIG_ALTIVEC */ /* restore floating point */ - err |= copy_fpr_from_user(tsk, &sc->fp_regs); + unsafe_copy_fpr_from_user(tsk, &sc->fp_regs, efault_out); #ifdef CONFIG_VSX /* * Get additional VSX data. Update v_regs to point after the @@ -395,14 +408,17 @@ static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig, */ v_regs += ELF_NVRREG; if ((msr & MSR_VSX) != 0) { - err |= copy_vsx_from_user(tsk, v_regs); + unsafe_copy_vsx_from_user(tsk, v_regs, efault_out); tsk->thread.used_vsr = true; } else { for (i = 0; i < 32 ; i++) tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; } #endif - return err; + return 0; + +efault_out: + return -EFAULT; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -586,6 +602,12 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, return err; } +#else /* !CONFIG_PPC_TRANSACTIONAL_MEM */ +static long restore_tm_sigcontexts(struct task_struct *tsk, struct sigcontext __user *sc, + struct sigcontext __user *tm_sc) +{ + return -EINVAL; +} #endif /* @@ -655,12 +677,16 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, ctx_has_vsx_region = 1; if (old_ctx != NULL) { - if (!access_ok(old_ctx, ctx_size) - || setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0, - ctx_has_vsx_region) - || __copy_to_user(&old_ctx->uc_sigmask, - ¤t->blocked, sizeof(sigset_t))) + prepare_setup_sigcontext(current); + if (!user_write_access_begin(old_ctx, ctx_size)) return -EFAULT; + + unsafe_setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, + 0, ctx_has_vsx_region, efault_out); + unsafe_copy_to_user(&old_ctx->uc_sigmask, ¤t->blocked, + sizeof(sigset_t), efault_out); + + user_write_access_end(); } if (new_ctx == NULL) return 0; @@ -680,15 +706,25 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, * We kill the task with a SIGSEGV in this situation. */ - if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set))) + if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) do_exit(SIGSEGV); set_current_blocked(&set); - if (restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) + + if (!user_read_access_begin(new_ctx, ctx_size)) + return -EFAULT; + if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) { + user_read_access_end(); do_exit(SIGSEGV); + } + user_read_access_end(); /* This returns like rt_sigreturn */ set_thread_flag(TIF_RESTOREALL); return 0; + +efault_out: + user_write_access_end(); + return -EFAULT; } @@ -701,9 +737,7 @@ SYSCALL_DEFINE0(rt_sigreturn) struct pt_regs *regs = current_pt_regs(); struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1]; sigset_t set; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM unsigned long msr; -#endif /* Always make any pending restarted system calls return -EINTR */ current->restart_block.fn = do_no_restart_syscall; @@ -711,52 +745,54 @@ SYSCALL_DEFINE0(rt_sigreturn) if (!access_ok(uc, sizeof(*uc))) goto badframe; - if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set))) + if (__get_user_sigset(&set, &uc->uc_sigmask)) goto badframe; set_current_blocked(&set); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* - * If there is a transactional state then throw it away. - * The purpose of a sigreturn is to destroy all traces of the - * signal frame, this includes any transactional state created - * within in. We only check for suspended as we can never be - * active in the kernel, we are active, there is nothing better to - * do than go ahead and Bad Thing later. - * The cause is not important as there will never be a - * recheckpoint so it's not user visible. - */ - if (MSR_TM_SUSPENDED(mfmsr())) - tm_reclaim_current(0); + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM)) { + /* + * If there is a transactional state then throw it away. + * The purpose of a sigreturn is to destroy all traces of the + * signal frame, this includes any transactional state created + * within in. We only check for suspended as we can never be + * active in the kernel, we are active, there is nothing better to + * do than go ahead and Bad Thing later. + * The cause is not important as there will never be a + * recheckpoint so it's not user visible. + */ + if (MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(0); - /* - * Disable MSR[TS] bit also, so, if there is an exception in the - * code below (as a page fault in copy_ckvsx_to_user()), it does - * not recheckpoint this task if there was a context switch inside - * the exception. - * - * A major page fault can indirectly call schedule(). A reschedule - * process in the middle of an exception can have a side effect - * (Changing the CPU MSR[TS] state), since schedule() is called - * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended - * (switch_to() calls tm_recheckpoint() for the 'new' process). In - * this case, the process continues to be the same in the CPU, but - * the CPU state just changed. - * - * This can cause a TM Bad Thing, since the MSR in the stack will - * have the MSR[TS]=0, and this is what will be used to RFID. - * - * Clearing MSR[TS] state here will avoid a recheckpoint if there - * is any process reschedule in kernel space. The MSR[TS] state - * does not need to be saved also, since it will be replaced with - * the MSR[TS] that came from user context later, at - * restore_tm_sigcontexts. - */ - regs->msr &= ~MSR_TS_MASK; + /* + * Disable MSR[TS] bit also, so, if there is an exception in the + * code below (as a page fault in copy_ckvsx_to_user()), it does + * not recheckpoint this task if there was a context switch inside + * the exception. + * + * A major page fault can indirectly call schedule(). A reschedule + * process in the middle of an exception can have a side effect + * (Changing the CPU MSR[TS] state), since schedule() is called + * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended + * (switch_to() calls tm_recheckpoint() for the 'new' process). In + * this case, the process continues to be the same in the CPU, but + * the CPU state just changed. + * + * This can cause a TM Bad Thing, since the MSR in the stack will + * have the MSR[TS]=0, and this is what will be used to RFID. + * + * Clearing MSR[TS] state here will avoid a recheckpoint if there + * is any process reschedule in kernel space. The MSR[TS] state + * does not need to be saved also, since it will be replaced with + * the MSR[TS] that came from user context later, at + * restore_tm_sigcontexts. + */ + regs->msr &= ~MSR_TS_MASK; - if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) - goto badframe; - if (MSR_TM_ACTIVE(msr)) { + if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) + goto badframe; + } + + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && MSR_TM_ACTIVE(msr)) { /* We recheckpoint on return. */ struct ucontext __user *uc_transact; @@ -769,9 +805,7 @@ SYSCALL_DEFINE0(rt_sigreturn) if (restore_tm_sigcontexts(current, &uc->uc_mcontext, &uc_transact->uc_mcontext)) goto badframe; - } else -#endif - { + } else { /* * Fall through, for non-TM restore * @@ -785,8 +819,13 @@ SYSCALL_DEFINE0(rt_sigreturn) * causing a TM bad thing. */ current->thread.regs->msr &= ~MSR_TS_MASK; - if (restore_sigcontext(current, NULL, 1, &uc->uc_mcontext)) + if (!user_read_access_begin(&uc->uc_mcontext, sizeof(uc->uc_mcontext))) goto badframe; + + unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext, + badframe_block); + + user_read_access_end(); } if (restore_altstack(&uc->uc_stack)) @@ -795,6 +834,8 @@ SYSCALL_DEFINE0(rt_sigreturn) set_thread_flag(TIF_RESTOREALL); return 0; +badframe_block: + user_read_access_end(); badframe: signal_fault(current, regs, "rt_sigreturn", uc); @@ -809,46 +850,57 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, unsigned long newsp = 0; long err = 0; struct pt_regs *regs = tsk->thread.regs; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* Save the thread's msr before get_tm_stackpointer() changes it */ unsigned long msr = regs->msr; -#endif frame = get_sigframe(ksig, tsk, sizeof(*frame), 0); - if (!access_ok(frame, sizeof(*frame))) + + /* + * This only applies when calling unsafe_setup_sigcontext() and must be + * called before opening the uaccess window. + */ + if (!MSR_TM_ACTIVE(msr)) + prepare_setup_sigcontext(tsk); + + if (!user_write_access_begin(frame, sizeof(*frame))) goto badframe; - err |= __put_user(&frame->info, &frame->pinfo); - err |= __put_user(&frame->uc, &frame->puc); - err |= copy_siginfo_to_user(&frame->info, &ksig->info); - if (err) - goto badframe; + unsafe_put_user(&frame->info, &frame->pinfo, badframe_block); + unsafe_put_user(&frame->uc, &frame->puc, badframe_block); /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); - err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + unsafe_put_user(0, &frame->uc.uc_flags, badframe_block); + unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe_block); + if (MSR_TM_ACTIVE(msr)) { +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* The ucontext_t passed to userland points to the second * ucontext_t (for transactional state) with its uc_link ptr. */ - err |= __put_user(&frame->uc_transact, &frame->uc.uc_link); + unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe_block); + + user_write_access_end(); + err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext, &frame->uc_transact.uc_mcontext, tsk, ksig->sig, NULL, (unsigned long)ksig->ka.sa.sa_handler, msr); - } else + + if (!user_write_access_begin(&frame->uc.uc_sigmask, + sizeof(frame->uc.uc_sigmask))) + goto badframe; + #endif - { - err |= __put_user(0, &frame->uc.uc_link); - err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig, + } else { + unsafe_put_user(0, &frame->uc.uc_link, badframe_block); + unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig, NULL, (unsigned long)ksig->ka.sa.sa_handler, - 1); + 1, badframe_block); } - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) - goto badframe; + + unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block); + user_write_access_end(); /* Make sure signal handler doesn't get spurious FP exceptions */ tsk->thread.fp_state.fpscr = 0; @@ -863,6 +915,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, regs->nip = (unsigned long) &frame->tramp[0]; } + + /* Save the siginfo outside of the unsafe block. */ + if (copy_siginfo_to_user(&frame->info, &ksig->info)) + goto badframe; + /* Allocate a dummy caller frame for the signal handler. */ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; err |= put_user(regs->gpr[1], (unsigned long __user *)newsp); @@ -902,6 +959,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, return 0; +badframe_block: + user_write_access_end(); badframe: signal_fault(current, regs, "handle_rt_signal64", frame); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 5a4d59a1070d..2e05c783440a 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -83,7 +83,7 @@ DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map); DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); -DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map); +static DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map); @@ -122,14 +122,14 @@ static struct thread_groups_list tgl[NR_CPUS] __initdata; * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to * the set its siblings that share the L1-cache. */ -DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); +static DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); /* * On some big-cores system, thread_group_l2_cache_map for each CPU * corresponds to the set its siblings within the core that share the * L2-cache. */ -DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); +static DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); /* SMP operations for this machine */ struct smp_ops_t *smp_ops; @@ -1057,17 +1057,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus) local_memory_node(numa_cpu_lookup_table[cpu])); } #endif - /* - * cpu_core_map is now more updated and exists only since - * its been exported for long. It only will have a snapshot - * of cpu_cpu_mask. - */ - cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu)); } /* Init the cpumasks so the boot CPU is related to itself */ cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid)); + cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); if (has_coregroup_support()) cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid)); @@ -1078,6 +1073,20 @@ void __init smp_prepare_cpus(unsigned int max_cpus) cpu_smallcore_mask(boot_cpuid)); } + if (cpu_to_chip_id(boot_cpuid) != -1) { + int idx = num_possible_cpus() / threads_per_core; + + /* + * All threads of a core will all belong to the same core, + * chip_id_lookup_table will have one entry per core. + * Assumption: if boot_cpuid doesn't have a chip-id, then no + * other CPUs, will also not have chip-id. + */ + chip_id_lookup_table = kcalloc(idx, sizeof(int), GFP_KERNEL); + if (chip_id_lookup_table) + memset(chip_id_lookup_table, -1, sizeof(int) * idx); + } + if (smp_ops && smp_ops->probe) smp_ops->probe(); } @@ -1408,6 +1417,9 @@ static void remove_cpu_from_masks(int cpu) set_cpus_unrelated(cpu, i, cpu_smallcore_mask); } + for_each_cpu(i, cpu_core_mask(cpu)) + set_cpus_unrelated(cpu, i, cpu_core_mask); + if (has_coregroup_support()) { for_each_cpu(i, cpu_coregroup_mask(cpu)) set_cpus_unrelated(cpu, i, cpu_coregroup_mask); @@ -1468,8 +1480,11 @@ static void update_coregroup_mask(int cpu, cpumask_var_t *mask) static void add_cpu_to_masks(int cpu) { + struct cpumask *(*submask_fn)(int) = cpu_sibling_mask; int first_thread = cpu_first_thread_sibling(cpu); cpumask_var_t mask; + int chip_id = -1; + bool ret; int i; /* @@ -1485,12 +1500,39 @@ static void add_cpu_to_masks(int cpu) add_cpu_to_smallcore_masks(cpu); /* In CPU-hotplug path, hence use GFP_ATOMIC */ - alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu)); + ret = alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu)); update_mask_by_l2(cpu, &mask); if (has_coregroup_support()) update_coregroup_mask(cpu, &mask); + if (chip_id_lookup_table && ret) + chip_id = cpu_to_chip_id(cpu); + + if (chip_id == -1) { + cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu)); + goto out; + } + + if (shared_caches) + submask_fn = cpu_l2_cache_mask; + + /* Update core_mask with all the CPUs that are part of submask */ + or_cpumasks_related(cpu, cpu, submask_fn, cpu_core_mask); + + /* Skip all CPUs already part of current CPU core mask */ + cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu)); + + for_each_cpu(i, mask) { + if (chip_id == cpu_to_chip_id(i)) { + or_cpumasks_related(cpu, i, submask_fn, cpu_core_mask); + cpumask_andnot(mask, mask, submask_fn(i)); + } else { + cpumask_andnot(mask, mask, cpu_core_mask(i)); + } + } + +out: free_cpumask_var(mask); } @@ -1521,6 +1563,9 @@ void start_secondary(void *unused) vdso_getcpu_init(); #endif + set_numa_node(numa_cpu_lookup_table[cpu]); + set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); + /* Update topology CPU masks */ add_cpu_to_masks(cpu); @@ -1539,9 +1584,6 @@ void start_secondary(void *unused) shared_caches = true; } - set_numa_node(numa_cpu_lookup_table[cpu]); - set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); - smp_wmb(); notify_cpu_starting(cpu); set_cpu_online(cpu, true); diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index b6440657ef92..1deb1bf331dd 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -23,90 +23,56 @@ #include -/* - * Save stack-backtrace addresses into a stack_trace buffer. - */ -static void save_context_stack(struct stack_trace *trace, unsigned long sp, - struct task_struct *tsk, int savesched) +void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, + struct task_struct *task, struct pt_regs *regs) { + unsigned long sp; + + if (regs && !consume_entry(cookie, regs->nip)) + return; + + if (regs) + sp = regs->gpr[1]; + else if (task == current) + sp = current_stack_frame(); + else + sp = task->thread.ksp; + for (;;) { unsigned long *stack = (unsigned long *) sp; unsigned long newsp, ip; - if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, task, STACK_FRAME_OVERHEAD)) return; newsp = stack[0]; ip = stack[STACK_FRAME_LR_SAVE]; - if (savesched || !in_sched_functions(ip)) { - if (!trace->skip) - trace->entries[trace->nr_entries++] = ip; - else - trace->skip--; - } - - if (trace->nr_entries >= trace->max_entries) + if (!consume_entry(cookie, ip)) return; sp = newsp; } } -void save_stack_trace(struct stack_trace *trace) -{ - unsigned long sp; - - sp = current_stack_frame(); - - save_context_stack(trace, sp, current, 1); -} -EXPORT_SYMBOL_GPL(save_stack_trace); - -void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) -{ - unsigned long sp; - - if (!try_get_task_stack(tsk)) - return; - - if (tsk == current) - sp = current_stack_frame(); - else - sp = tsk->thread.ksp; - - save_context_stack(trace, sp, tsk, 0); - - put_task_stack(tsk); -} -EXPORT_SYMBOL_GPL(save_stack_trace_tsk); - -void -save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) -{ - save_context_stack(trace, regs->gpr[1], current, 0); -} -EXPORT_SYMBOL_GPL(save_stack_trace_regs); - -#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE /* * This function returns an error if it detects any unreliable features of the * stack. Otherwise it guarantees that the stack trace is reliable. * * If the task is not 'current', the caller *must* ensure the task is inactive. */ -static int __save_stack_trace_tsk_reliable(struct task_struct *tsk, - struct stack_trace *trace) +int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, + void *cookie, struct task_struct *task) { unsigned long sp; unsigned long newsp; - unsigned long stack_page = (unsigned long)task_stack_page(tsk); + unsigned long stack_page = (unsigned long)task_stack_page(task); unsigned long stack_end; int graph_idx = 0; bool firstframe; stack_end = stack_page + THREAD_SIZE; - if (!is_idle_task(tsk)) { + if (!is_idle_task(task)) { /* * For user tasks, this is the SP value loaded on * kernel entry, see "PACAKSAVE(r13)" in _switch() and @@ -130,10 +96,10 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk, stack_end -= STACK_FRAME_OVERHEAD; } - if (tsk == current) + if (task == current) sp = current_stack_frame(); else - sp = tsk->thread.ksp; + sp = task->thread.ksp; if (sp < stack_page + sizeof(struct thread_struct) || sp > stack_end - STACK_FRAME_MIN_SIZE) { @@ -182,7 +148,7 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk, * FIXME: IMHO these tests do not belong in * arch-dependent code, they are generic. */ - ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, stack); + ip = ftrace_graph_ret_addr(task, &graph_idx, ip, stack); #ifdef CONFIG_KPROBES /* * Mark stacktraces with kretprobed functions on them @@ -192,36 +158,12 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk, return -EINVAL; #endif - if (trace->nr_entries >= trace->max_entries) - return -E2BIG; - if (!trace->skip) - trace->entries[trace->nr_entries++] = ip; - else - trace->skip--; + if (!consume_entry(cookie, ip)) + return -EINVAL; } return 0; } -int save_stack_trace_tsk_reliable(struct task_struct *tsk, - struct stack_trace *trace) -{ - int ret; - - /* - * If the task doesn't have a stack (e.g., a zombie), the stack is - * "reliably" empty. - */ - if (!try_get_task_stack(tsk)) - return 0; - - ret = __save_stack_trace_tsk_reliable(tsk, trace); - - put_task_stack(tsk); - - return ret; -} -#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */ - #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) static void handle_backtrace_ipi(struct pt_regs *regs) { diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 078608ec2e92..a552c9e68d7e 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -82,16 +82,8 @@ int ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp) { if ( (unsigned long)n >= 4096 ) - { - unsigned long __user *buffer = (unsigned long __user *)n; - if (!access_ok(buffer, 5*sizeof(unsigned long)) - || __get_user(n, buffer) - || __get_user(inp, ((fd_set __user * __user *)(buffer+1))) - || __get_user(outp, ((fd_set __user * __user *)(buffer+2))) - || __get_user(exp, ((fd_set __user * __user *)(buffer+3))) - || __get_user(tvp, ((struct __kernel_old_timeval __user * __user *)(buffer+4)))) - return -EFAULT; - } + return sys_old_select((void __user *)n); + return sys_select(n, inp, outp, exp, tvp); } #endif diff --git a/arch/powerpc/kernel/syscalls/Makefile b/arch/powerpc/kernel/syscalls/Makefile index 9e3be295dbba..5476f62eb80f 100644 --- a/arch/powerpc/kernel/syscalls/Makefile +++ b/arch/powerpc/kernel/syscalls/Makefile @@ -6,53 +6,38 @@ _dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \ $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') syscall := $(src)/syscall.tbl -syshdr := $(srctree)/$(src)/syscallhdr.sh -systbl := $(srctree)/$(src)/syscalltbl.sh +syshdr := $(srctree)/scripts/syscallhdr.sh +systbl := $(srctree)/scripts/syscalltbl.sh quiet_cmd_syshdr = SYSHDR $@ - cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \ - '$(syshdr_abis_$(basetarget))' \ - '$(syshdr_pfx_$(basetarget))' \ - '$(syshdr_offset_$(basetarget))' + cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr --abis $(abis) $< $@ quiet_cmd_systbl = SYSTBL $@ - cmd_systbl = $(CONFIG_SHELL) '$(systbl)' '$<' '$@' \ - '$(systbl_abis_$(basetarget))' \ - '$(systbl_abi_$(basetarget))' \ - '$(systbl_offset_$(basetarget))' + cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis $(abis) $< $@ -syshdr_abis_unistd_32 := common,nospu,32 +$(uapi)/unistd_32.h: abis := common,nospu,32 $(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE $(call if_changed,syshdr) -syshdr_abis_unistd_64 := common,nospu,64 +$(uapi)/unistd_64.h: abis := common,nospu,64 $(uapi)/unistd_64.h: $(syscall) $(syshdr) FORCE $(call if_changed,syshdr) -systbl_abis_syscall_table_32 := common,nospu,32 -systbl_abi_syscall_table_32 := 32 +$(kapi)/syscall_table_32.h: abis := common,nospu,32 $(kapi)/syscall_table_32.h: $(syscall) $(systbl) FORCE $(call if_changed,systbl) -systbl_abis_syscall_table_64 := common,nospu,64 -systbl_abi_syscall_table_64 := 64 +$(kapi)/syscall_table_64.h: abis := common,nospu,64 $(kapi)/syscall_table_64.h: $(syscall) $(systbl) FORCE $(call if_changed,systbl) -systbl_abis_syscall_table_c32 := common,nospu,32 -systbl_abi_syscall_table_c32 := c32 -$(kapi)/syscall_table_c32.h: $(syscall) $(systbl) FORCE - $(call if_changed,systbl) - -systbl_abis_syscall_table_spu := common,spu -systbl_abi_syscall_table_spu := spu +$(kapi)/syscall_table_spu.h: abis := common,spu $(kapi)/syscall_table_spu.h: $(syscall) $(systbl) FORCE $(call if_changed,systbl) uapisyshdr-y += unistd_32.h unistd_64.h kapisyshdr-y += syscall_table_32.h \ syscall_table_64.h \ - syscall_table_c32.h \ syscall_table_spu.h uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y)) diff --git a/arch/powerpc/kernel/syscalls/syscallhdr.sh b/arch/powerpc/kernel/syscalls/syscallhdr.sh deleted file mode 100644 index 02d6751f3be3..000000000000 --- a/arch/powerpc/kernel/syscalls/syscallhdr.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -in="$1" -out="$2" -my_abis=`echo "($3)" | tr ',' '|'` -prefix="$4" -offset="$5" - -fileguard=_UAPI_ASM_POWERPC_`basename "$out" | sed \ - -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \ - -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'` -grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | ( - printf "#ifndef %s\n" "${fileguard}" - printf "#define %s\n" "${fileguard}" - printf "\n" - - nxt=0 - while read nr abi name entry compat ; do - if [ -z "$offset" ]; then - printf "#define __NR_%s%s\t%s\n" \ - "${prefix}" "${name}" "${nr}" - else - printf "#define __NR_%s%s\t(%s + %s)\n" \ - "${prefix}" "${name}" "${offset}" "${nr}" - fi - nxt=$((nr+1)) - done - - printf "\n" - printf "#ifdef __KERNEL__\n" - printf "#define __NR_syscalls\t%s\n" "${nxt}" - printf "#endif\n" - printf "\n" - printf "#endif /* %s */\n" "${fileguard}" -) > "$out" diff --git a/arch/powerpc/kernel/syscalls/syscalltbl.sh b/arch/powerpc/kernel/syscalls/syscalltbl.sh deleted file mode 100644 index f7393a7b18aa..000000000000 --- a/arch/powerpc/kernel/syscalls/syscalltbl.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -in="$1" -out="$2" -my_abis=`echo "($3)" | tr ',' '|'` -my_abi="$4" -offset="$5" - -emit() { - t_nxt="$1" - t_nr="$2" - t_entry="$3" - - while [ $t_nxt -lt $t_nr ]; do - printf "__SYSCALL(%s,sys_ni_syscall)\n" "${t_nxt}" - t_nxt=$((t_nxt+1)) - done - printf "__SYSCALL(%s,%s)\n" "${t_nxt}" "${t_entry}" -} - -grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | ( - nxt=0 - if [ -z "$offset" ]; then - offset=0 - fi - - while read nr abi name entry compat ; do - if [ "$my_abi" = "c32" ] && [ ! -z "$compat" ]; then - emit $((nxt+offset)) $((nr+offset)) $compat - else - emit $((nxt+offset)) $((nr+offset)) $entry - fi - nxt=$((nr+1)) - done -) > "$out" diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index d34276f3c495..cb3358886203 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -21,6 +21,7 @@ #define __SYSCALL(nr, entry) .long entry #endif +#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native) .globl sys_call_table sys_call_table: #ifdef CONFIG_PPC64 @@ -30,8 +31,10 @@ sys_call_table: #endif #ifdef CONFIG_COMPAT +#undef __SYSCALL_WITH_COMPAT +#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat) .globl compat_sys_call_table compat_sys_call_table: #define compat_sys_sigsuspend sys_sigsuspend -#include +#include #endif diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 42761ebec9f7..ffe9537195aa 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -68,7 +68,7 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) */ /* read the text we want to modify */ - if (probe_kernel_read_inst(&replaced, (void *)ip)) + if (copy_inst_from_kernel_nofault(&replaced, (void *)ip)) return -EFAULT; /* Make sure it is what we expect it to be */ @@ -130,7 +130,7 @@ __ftrace_make_nop(struct module *mod, struct ppc_inst op, pop; /* read where this goes */ - if (probe_kernel_read_inst(&op, (void *)ip)) { + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); return -EFAULT; } @@ -164,7 +164,7 @@ __ftrace_make_nop(struct module *mod, /* When using -mkernel_profile there is no load to jump over */ pop = ppc_inst(PPC_INST_NOP); - if (probe_kernel_read_inst(&op, (void *)(ip - 4))) { + if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) { pr_err("Fetching instruction at %lx failed.\n", ip - 4); return -EFAULT; } @@ -197,7 +197,7 @@ __ftrace_make_nop(struct module *mod, * Check what is in the next instruction. We can see ld r2,40(r1), but * on first pass after boot we will see mflr r0. */ - if (probe_kernel_read_inst(&op, (void *)(ip + 4))) { + if (copy_inst_from_kernel_nofault(&op, (void *)(ip + 4))) { pr_err("Fetching op failed.\n"); return -EFAULT; } @@ -349,7 +349,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) return -1; /* New trampoline -- read where this goes */ - if (probe_kernel_read_inst(&op, (void *)tramp)) { + if (copy_inst_from_kernel_nofault(&op, (void *)tramp)) { pr_debug("Fetching opcode failed.\n"); return -1; } @@ -399,7 +399,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) struct ppc_inst op; /* Read where this goes */ - if (probe_kernel_read_inst(&op, (void *)ip)) { + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); return -EFAULT; } @@ -526,10 +526,10 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) struct module *mod = rec->arch.mod; /* read where this goes */ - if (probe_kernel_read_inst(op, ip)) + if (copy_inst_from_kernel_nofault(op, ip)) return -EFAULT; - if (probe_kernel_read_inst(op + 1, ip + 4)) + if (copy_inst_from_kernel_nofault(op + 1, ip + 4)) return -EFAULT; if (!expected_nop_sequence(ip, op[0], op[1])) { @@ -592,7 +592,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) unsigned long ip = rec->ip; /* read where this goes */ - if (probe_kernel_read_inst(&op, (void *)ip)) + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) return -EFAULT; /* It should be pointing to a nop */ @@ -648,7 +648,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) } /* Make sure we have a nop */ - if (probe_kernel_read_inst(&op, ip)) { + if (copy_inst_from_kernel_nofault(&op, ip)) { pr_err("Unable to read ftrace location %p\n", ip); return -EFAULT; } @@ -726,7 +726,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } /* read where this goes */ - if (probe_kernel_read_inst(&op, (void *)ip)) { + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); return -EFAULT; } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index a44a30b0688c..b4ab95c9e94a 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -53,7 +53,6 @@ #ifdef CONFIG_PPC64 #include #include -#include #endif #include #include @@ -222,7 +221,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, /* * system_reset_excption handles debugger, crash dump, panic, for 0x100 */ - if (TRAP(regs) == 0x100) + if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) return; crash_fadump(regs, "die oops"); @@ -290,7 +289,7 @@ void die(const char *str, struct pt_regs *regs, long err) /* * system_reset_excption handles debugger, crash dump, panic, for 0x100 */ - if (TRAP(regs) != 0x100) { + if (TRAP(regs) != INTERRUPT_SYSTEM_RESET) { if (debugger(regs)) return; } @@ -405,7 +404,7 @@ void hv_nmi_check_nonrecoverable(struct pt_regs *regs) * Now test if the interrupt has hit a range that may be using * HSPRG1 without having RI=0 (i.e., an HSRR interrupt). The * problem ranges all run un-relocated. Test real and virt modes - * at the same time by droping the high bit of the nip (virt mode + * at the same time by dropping the high bit of the nip (virt mode * entry points still have the +0x4000 offset). */ nip &= ~0xc000000000000000ULL; @@ -864,7 +863,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs) unsigned long ea, msr, msr_mask; bool swap; - if (__get_user_inatomic(instr, (unsigned int __user *)regs->nip)) + if (__get_user(instr, (unsigned int __user *)regs->nip)) return; /* @@ -1079,6 +1078,16 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception) _exception(SIGTRAP, regs, TRAP_UNK, 0); } +DEFINE_INTERRUPT_HANDLER_NMI(unknown_nmi_exception) +{ + printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", + regs->nip, regs->msr, regs->trap); + + _exception(SIGTRAP, regs, TRAP_UNK, 0); + + return 0; +} + DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception) { if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, @@ -1309,7 +1318,6 @@ static int emulate_instruction(struct pt_regs *regs) if (!user_mode(regs)) return -EINVAL; - CHECK_FULL_REGS(regs); if (get_user(instword, (u32 __user *)(regs->nip))) return -EFAULT; @@ -1406,7 +1414,6 @@ int is_valid_bugaddr(unsigned long addr) static int emulate_math(struct pt_regs *regs) { int ret; - extern int do_mathemu(struct pt_regs *regs); ret = do_mathemu(regs); if (ret >= 0) @@ -1606,15 +1613,6 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception) bad_page_fault(regs, sig); } -DEFINE_INTERRUPT_HANDLER(StackOverflow) -{ - pr_crit("Kernel stack overflow in process %s[%d], r1=%lx\n", - current->comm, task_pid_nr(current), regs->gpr[1]); - debugger(regs); - show_regs(regs); - panic("kernel stack overflow"); -} - DEFINE_INTERRUPT_HANDLER(stack_overflow_exception) { die("Kernel stack overflow", regs, SIGSEGV); @@ -1693,7 +1691,7 @@ DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception) u8 status; bool hv; - hv = (TRAP(regs) == 0xf80); + hv = (TRAP(regs) == INTERRUPT_H_FAC_UNAVAIL); if (hv) value = mfspr(SPRN_HFSCR); else @@ -2170,11 +2168,14 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException) * in the MSR is 0. This indicates that SRR0/1 are live, and that * we therefore lost state by taking this exception. */ -void unrecoverable_exception(struct pt_regs *regs) +void __noreturn unrecoverable_exception(struct pt_regs *regs) { pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n", regs->trap, regs->nip, regs->msr); die("Unrecoverable exception", regs, SIGABRT); + /* die() should not return */ + for (;;) + ; } #if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x) @@ -2189,10 +2190,11 @@ void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs) return; } -DEFINE_INTERRUPT_HANDLER(WatchdogException) /* XXX NMI? async? */ +DEFINE_INTERRUPT_HANDLER_NMI(WatchdogException) { printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n"); WatchdogHandler(regs); + return 0; } #endif diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index e8a63713e655..186f69b11e94 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -41,6 +41,13 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, if (addr & 0x03) return -EINVAL; + if (cpu_has_feature(CPU_FTR_ARCH_31) && + ppc_inst_prefixed(auprobe->insn) && + (addr & 0x3f) == 60) { + pr_info_ratelimited("Cannot register a uprobe on 64 byte unaligned prefixed instruction\n"); + return -EINVAL; + } + return 0; } diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index e839a906fdf2..717f2c9a7573 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -50,15 +51,21 @@ static union { } vdso_data_store __page_aligned_data; struct vdso_arch_data *vdso_data = &vdso_data_store.data; +enum vvar_pages { + VVAR_DATA_PAGE_OFFSET, + VVAR_TIMENS_PAGE_OFFSET, + VVAR_NR_PAGES, +}; + static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma, unsigned long text_size) { unsigned long new_size = new_vma->vm_end - new_vma->vm_start; - if (new_size != text_size + PAGE_SIZE) + if (new_size != text_size) return -EINVAL; - current->mm->context.vdso = (void __user *)new_vma->vm_start + PAGE_SIZE; + current->mm->context.vdso = (void __user *)new_vma->vm_start; return 0; } @@ -73,6 +80,14 @@ static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_str return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start); } +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf); + +static struct vm_special_mapping vvar_spec __ro_after_init = { + .name = "[vvar]", + .fault = vvar_fault, +}; + static struct vm_special_mapping vdso32_spec __ro_after_init = { .name = "[vdso]", .mremap = vdso32_mremap, @@ -83,17 +98,105 @@ static struct vm_special_mapping vdso64_spec __ro_after_init = { .mremap = vdso64_mremap, }; +#ifdef CONFIG_TIME_NS +struct vdso_data *arch_get_vdso_data(void *vvar_page) +{ + return ((struct vdso_arch_data *)vvar_page)->data; +} + +/* + * The vvar mapping contains data for a specific time namespace, so when a task + * changes namespace we must unmap its vvar data for the old namespace. + * Subsequent faults will map in data for the new namespace. + * + * For more details see timens_setup_vdso_data(). + */ +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + + mmap_read_lock(mm); + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + unsigned long size = vma->vm_end - vma->vm_start; + + if (vma_is_special_mapping(vma, &vvar_spec)) + zap_page_range(vma, vma->vm_start, size); + } + + mmap_read_unlock(mm); + return 0; +} + +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + if (likely(vma->vm_mm == current->mm)) + return current->nsproxy->time_ns->vvar_page; + + /* + * VM_PFNMAP | VM_IO protect .fault() handler from being called + * through interfaces like /proc/$pid/mem or + * process_vm_{readv,writev}() as long as there's no .access() + * in special_mapping_vmops. + * For more details check_vma_flags() and __access_remote_vm() + */ + WARN(1, "vvar_page accessed remotely"); + + return NULL; +} +#else +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} +#endif + +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct page *timens_page = find_timens_vvar_page(vma); + unsigned long pfn; + + switch (vmf->pgoff) { + case VVAR_DATA_PAGE_OFFSET: + if (timens_page) + pfn = page_to_pfn(timens_page); + else + pfn = virt_to_pfn(vdso_data); + break; +#ifdef CONFIG_TIME_NS + case VVAR_TIMENS_PAGE_OFFSET: + /* + * If a task belongs to a time namespace then a namespace + * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and + * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET + * offset. + * See also the comment near timens_setup_vdso_data(). + */ + if (!timens_page) + return VM_FAULT_SIGBUS; + pfn = virt_to_pfn(vdso_data); + break; +#endif /* CONFIG_TIME_NS */ + default: + return VM_FAULT_SIGBUS; + } + + return vmf_insert_pfn(vma, vmf->address, pfn); +} + /* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree */ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { - struct mm_struct *mm = current->mm; + unsigned long vdso_size, vdso_base, mappings_size; struct vm_special_mapping *vdso_spec; + unsigned long vvar_size = VVAR_NR_PAGES * PAGE_SIZE; + struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long vdso_size; - unsigned long vdso_base; if (is_32bit_task()) { vdso_spec = &vdso32_spec; @@ -110,8 +213,8 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int vdso_base = 0; } - /* Add a page to the vdso size for the data page */ - vdso_size += PAGE_SIZE; + mappings_size = vdso_size + vvar_size; + mappings_size += (VDSO_ALIGNMENT - 1) & PAGE_MASK; /* * pick a base address for the vDSO in process space. We try to put it @@ -119,9 +222,7 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int * and end up putting it elsewhere. * Add enough to the size so that the result can be aligned. */ - vdso_base = get_unmapped_area(NULL, vdso_base, - vdso_size + ((VDSO_ALIGNMENT - 1) & PAGE_MASK), - 0, 0); + vdso_base = get_unmapped_area(NULL, vdso_base, mappings_size, 0, 0); if (IS_ERR_VALUE(vdso_base)) return vdso_base; @@ -133,7 +234,13 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int * install_special_mapping or the perf counter mmap tracking code * will fail to recognise it as a vDSO. */ - mm->context.vdso = (void __user *)vdso_base + PAGE_SIZE; + mm->context.vdso = (void __user *)vdso_base + vvar_size; + + vma = _install_special_mapping(mm, vdso_base, vvar_size, + VM_READ | VM_MAYREAD | VM_IO | + VM_DONTDUMP | VM_PFNMAP, &vvar_spec); + if (IS_ERR(vma)) + return PTR_ERR(vma); /* * our vma flags don't have VM_WRITE so by default, the process isn't @@ -145,9 +252,12 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int * It's fine to use that for setting breakpoints in the vDSO code * pages though. */ - vma = _install_special_mapping(mm, vdso_base, vdso_size, + vma = _install_special_mapping(mm, vdso_base + vvar_size, vdso_size, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, vdso_spec); + if (IS_ERR(vma)) + do_munmap(mm, vdso_base, vvar_size, NULL); + return PTR_ERR_OR_ZERO(vma); } @@ -249,10 +359,8 @@ static struct page ** __init vdso_setup_pages(void *start, void *end) if (!pagelist) panic("%s: Cannot allocate page list for VDSO", __func__); - pagelist[0] = virt_to_page(vdso_data); - for (i = 0; i < pages; i++) - pagelist[i + 1] = virt_to_page(start + i * PAGE_SIZE); + pagelist[i] = virt_to_page(start + i * PAGE_SIZE); return pagelist; } diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index a4b806b0d618..58e0099f70f4 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -17,7 +17,7 @@ ENTRY(_start) SECTIONS { - PROVIDE(_vdso_datapage = . - PAGE_SIZE); + PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE); . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 2f3c359cacd3..0288cad428b0 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -17,7 +17,7 @@ ENTRY(_start) SECTIONS { - PROVIDE(_vdso_datapage = . - PAGE_SIZE); + PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE); . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 801dc28fdcca..f5a52f444e36 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -67,9 +67,7 @@ _GLOBAL(load_up_altivec) #ifdef CONFIG_PPC32 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ oris r9,r9,MSR_VEC@h -#ifdef CONFIG_VMAP_STACK tovirt(r5, r5) -#endif #else ld r4,PACACURRENT(r13) addi r5,r4,THREAD /* Get THREAD */ diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index c9a889880214..0196d0c211ac 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -24,6 +24,7 @@ #include #include #include +#include /* * The primary CPU waits a while for all secondary CPUs to enter. This is to @@ -336,7 +337,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) * If we came in via system reset, wait a while for the secondary * CPUs to enter. */ - if (TRAP(regs) == 0x100) + if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) mdelay(PRIMARY_TIMEOUT); crash_kexec_prepare_cpus(crashing_cpu); diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index e452158a18d7..c3e31fef0be1 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -8,6 +8,7 @@ */ #include +#include #include #include @@ -133,6 +134,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, else kvmppc_mmu_flush_icache(pfn); + rflags |= pte_to_hpte_pkey_bits(0, HPTE_USE_KERNEL_KEY); rflags = (rflags & ~HPTE_R_WIMG) | orig_pte->wimg; /* diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 13bad6bf4c95..4a532410e128 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -803,7 +803,10 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, vcpu->arch.dawrx1 = value2; return H_SUCCESS; case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE: - /* KVM does not support mflags=2 (AIL=2) */ + /* + * KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved. + * Keep this in synch with kvmppc_filter_guest_lpcr_hv. + */ if (mflags != 0 && mflags != 3) return H_UNSUPPORTED_FLAG_START; return H_TOO_HARD; @@ -1635,6 +1638,41 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, return 0; } +/* + * Enforce limits on guest LPCR values based on hardware availability, + * guest configuration, and possibly hypervisor support and security + * concerns. + */ +unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr) +{ + /* LPCR_TC only applies to HPT guests */ + if (kvm_is_radix(kvm)) + lpcr &= ~LPCR_TC; + + /* On POWER8 and above, userspace can modify AIL */ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + lpcr &= ~LPCR_AIL; + if ((lpcr & LPCR_AIL) != LPCR_AIL_3) + lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */ + + /* + * On POWER9, allow userspace to enable large decrementer for the + * guest, whether or not the host has it enabled. + */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + lpcr &= ~LPCR_LD; + + return lpcr; +} + +static void verify_lpcr(struct kvm *kvm, unsigned long lpcr) +{ + if (lpcr != kvmppc_filter_lpcr_hv(kvm, lpcr)) { + WARN_ONCE(1, "lpcr 0x%lx differs from filtered 0x%lx\n", + lpcr, kvmppc_filter_lpcr_hv(kvm, lpcr)); + } +} + static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, bool preserve_top32) { @@ -1643,6 +1681,23 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, u64 mask; spin_lock(&vc->lock); + + /* + * Userspace can only modify + * DPFD (default prefetch depth), ILE (interrupt little-endian), + * TC (translation control), AIL (alternate interrupt location), + * LD (large decrementer). + * These are subject to restrictions from kvmppc_filter_lcpr_hv(). + */ + mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD; + + /* Broken 32-bit version of LPCR must not clear top bits */ + if (preserve_top32) + mask &= 0xFFFFFFFF; + + new_lpcr = kvmppc_filter_lpcr_hv(kvm, + (vc->lpcr & ~mask) | (new_lpcr & mask)); + /* * If ILE (interrupt little-endian) has changed, update the * MSR_LE bit in the intr_msr for each vcpu in this vcore. @@ -1661,25 +1716,8 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, } } - /* - * Userspace can only modify DPFD (default prefetch depth), - * ILE (interrupt little-endian) and TC (translation control). - * On POWER8 and POWER9 userspace can also modify AIL (alt. interrupt loc.). - */ - mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; - if (cpu_has_feature(CPU_FTR_ARCH_207S)) - mask |= LPCR_AIL; - /* - * On POWER9, allow userspace to enable large decrementer for the - * guest, whether or not the host has it enabled. - */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) - mask |= LPCR_LD; + vc->lpcr = new_lpcr; - /* Broken 32-bit version of LPCR must not clear top bits */ - if (preserve_top32) - mask &= 0xFFFFFFFF; - vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); spin_unlock(&vc->lock); } @@ -3728,7 +3766,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vcpu->arch.dec_expires = dec + tb; vcpu->cpu = -1; vcpu->arch.thread_cpu = -1; + /* Save guest CTRL register, set runlatch to 1 */ vcpu->arch.ctrl = mfspr(SPRN_CTRLF); + if (!(vcpu->arch.ctrl & 1)) + mtspr(SPRN_CTRLT, vcpu->arch.ctrl | 1); vcpu->arch.iamr = mfspr(SPRN_IAMR); vcpu->arch.pspb = mfspr(SPRN_PSPB); @@ -3749,7 +3790,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, mtspr(SPRN_DSCR, host_dscr); mtspr(SPRN_TIDR, host_tidr); mtspr(SPRN_IAMR, host_iamr); - mtspr(SPRN_PSPB, 0); if (host_amr != vcpu->arch.amr) mtspr(SPRN_AMR, host_amr); @@ -4641,8 +4681,10 @@ void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask) struct kvmppc_vcore *vc = kvm->arch.vcores[i]; if (!vc) continue; + spin_lock(&vc->lock); vc->lpcr = (vc->lpcr & ~mask) | lpcr; + verify_lpcr(kvm, vc->lpcr); spin_unlock(&vc->lock); if (++cores_done >= kvm->arch.online_vcores) break; @@ -4970,6 +5012,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) kvmppc_setup_partition_table(kvm); } + verify_lpcr(kvm, lpcr); kvm->arch.lpcr = lpcr; /* Initialization for future HPT resizes */ @@ -5369,8 +5412,10 @@ static unsigned int default_hcall_list[] = { H_READ, H_PROTECT, H_BULK_REMOVE, +#ifdef CONFIG_SPAPR_TCE_IOMMU H_GET_TCE, H_PUT_TCE, +#endif H_SET_DABR, H_SET_XDABR, H_CEDE, diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 158d309b42a3..7a0e33a9c980 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -662,6 +662,9 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu) void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) { + /* Guest must always run with ME enabled, HV disabled. */ + msr = (msr | MSR_ME) & ~MSR_HV; + /* * Check for illegal transactional state bit combination * and if we find it, force the TS field to a safe state. diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 0cd0e7aad588..60724f674421 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -132,8 +132,33 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap, } } +/* + * This can result in some L0 HV register state being leaked to an L1 + * hypervisor when the hv_guest_state is copied back to the guest after + * being modified here. + * + * There is no known problem with such a leak, and in many cases these + * register settings could be derived by the guest by observing behaviour + * and timing, interrupts, etc., but it is an issue to consider. + */ static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) { + struct kvmppc_vcore *vc = vcpu->arch.vcore; + u64 mask; + + /* + * Don't let L1 change LPCR bits for the L2 except these: + */ + mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | + LPCR_LPES | LPCR_MER; + + /* + * Additional filtering is required depending on hardware + * and configuration. + */ + hr->lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm, + (vc->lpcr & ~mask) | (hr->lpcr & mask)); + /* * Don't let L1 enable features for L2 which we've disabled for L1, * but preserve the interrupt cause field. @@ -271,8 +296,6 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) u64 hv_ptr, regs_ptr; u64 hdec_exp; s64 delta_purr, delta_spurr, delta_ic, delta_vtb; - u64 mask; - unsigned long lpcr; if (vcpu->kvm->arch.l1_ptcr == 0) return H_NOT_AVAILABLE; @@ -320,10 +343,10 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) vcpu->arch.nested = l2; vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token; vcpu->arch.regs = l2_regs; - vcpu->arch.shregs.msr = vcpu->arch.regs.msr; - mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | - LPCR_LPES | LPCR_MER; - lpcr = (vc->lpcr & ~mask) | (l2_hv.lpcr & mask); + + /* Guest must always run with ME enabled, HV disabled. */ + vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV; + sanitise_hv_regs(vcpu, &l2_hv); restore_hv_regs(vcpu, &l2_hv); @@ -335,7 +358,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) r = RESUME_HOST; break; } - r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr); + r = kvmhv_run_single_vcpu(vcpu, hdec_exp, l2_hv.lpcr); } while (is_kvmppc_resume_guest(r)); /* save L2 state for return */ diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 88da2764c1bb..7af7c70f1468 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -673,8 +673,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) } long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, - unsigned long pte_index, unsigned long avpn, - unsigned long va) + unsigned long pte_index, unsigned long avpn) { struct kvm *kvm = vcpu->kvm; __be64 *hpte; diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index d4efc182662a..f2c690ee75d1 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -16,7 +16,7 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING endif -obj-y += alloc.o code-patching.o feature-fixups.o pmem.o inst.o test_code-patching.o +obj-y += alloc.o code-patching.o feature-fixups.o pmem.o test_code-patching.o ifndef CONFIG_KASAN obj-y += string.o memcmp_$(BITS).o diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c index b895166afc82..f3999cbb2fcc 100644 --- a/arch/powerpc/lib/checksum_wrappers.c +++ b/arch/powerpc/lib/checksum_wrappers.c @@ -16,16 +16,12 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst, { __wsum csum; - might_sleep(); - - if (unlikely(!access_ok(src, len))) + if (unlikely(!user_read_access_begin(src, len))) return 0; - allow_read_from_user(src, len); - csum = csum_partial_copy_generic((void __force *)src, dst, len); - prevent_read_from_user(src, len); + user_read_access_end(); return csum; } EXPORT_SYMBOL(csum_and_copy_from_user); @@ -34,15 +30,12 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len) { __wsum csum; - might_sleep(); - if (unlikely(!access_ok(dst, len))) + if (unlikely(!user_write_access_begin(dst, len))) return 0; - allow_write_to_user(dst, len); - csum = csum_partial_copy_generic(src, (void __force *)dst, len); - prevent_write_to_user(dst, len); + user_write_access_end(); return csum; } EXPORT_SYMBOL(csum_and_copy_to_user); diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 2333625b5e31..870b30d9be2f 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -21,10 +21,15 @@ static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr, struct ppc_inst *patch_addr) { - if (!ppc_inst_prefixed(instr)) - __put_user_asm_goto(ppc_inst_val(instr), patch_addr, failed, "stw"); - else - __put_user_asm_goto(ppc_inst_as_u64(instr), patch_addr, failed, "std"); + if (!ppc_inst_prefixed(instr)) { + u32 val = ppc_inst_val(instr); + + __put_kernel_nofault(patch_addr, &val, u32, failed); + } else { + u64 val = ppc_inst_as_ulong(instr); + + __put_kernel_nofault(patch_addr, &val, u64, failed); + } asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), "r" (exec_addr)); diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c deleted file mode 100644 index 9cc17eb62462..000000000000 --- a/arch/powerpc/lib/inst.c +++ /dev/null @@ -1,73 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2020, IBM Corporation. - */ - -#include -#include -#include -#include - -#ifdef CONFIG_PPC64 -int probe_user_read_inst(struct ppc_inst *inst, - struct ppc_inst __user *nip) -{ - unsigned int val, suffix; - int err; - - err = copy_from_user_nofault(&val, nip, sizeof(val)); - if (err) - return err; - if (get_op(val) == OP_PREFIX) { - err = copy_from_user_nofault(&suffix, (void __user *)nip + 4, 4); - *inst = ppc_inst_prefix(val, suffix); - } else { - *inst = ppc_inst(val); - } - return err; -} - -int probe_kernel_read_inst(struct ppc_inst *inst, - struct ppc_inst *src) -{ - unsigned int val, suffix; - int err; - - err = copy_from_kernel_nofault(&val, src, sizeof(val)); - if (err) - return err; - if (get_op(val) == OP_PREFIX) { - err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4); - *inst = ppc_inst_prefix(val, suffix); - } else { - *inst = ppc_inst(val); - } - return err; -} -#else /* !CONFIG_PPC64 */ -int probe_user_read_inst(struct ppc_inst *inst, - struct ppc_inst __user *nip) -{ - unsigned int val; - int err; - - err = copy_from_user_nofault(&val, nip, sizeof(val)); - if (!err) - *inst = ppc_inst(val); - - return err; -} - -int probe_kernel_read_inst(struct ppc_inst *inst, - struct ppc_inst *src) -{ - unsigned int val; - int err; - - err = copy_from_kernel_nofault(&val, src, sizeof(val)); - if (!err) - *inst = ppc_inst(val); - - return err; -} -#endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index c6aebc149d14..45bda2520755 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1401,10 +1401,6 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; } - /* Following cases refer to regs->gpr[], so we need all regs */ - if (!FULL_REGS(regs)) - return -1; - rd = (word >> 21) & 0x1f; ra = (word >> 16) & 0x1f; rb = (word >> 11) & 0x1f; @@ -3086,15 +3082,6 @@ NOKPROBE_SYMBOL(analyse_instr); */ static nokprobe_inline int handle_stack_update(unsigned long ea, struct pt_regs *regs) { -#ifdef CONFIG_PPC32 - /* - * Check if we will touch kernel stack overflow - */ - if (ea - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) { - printk(KERN_CRIT "Can't kprobe this since kernel stack would overflow.\n"); - return -EINVAL; - } -#endif /* CONFIG_PPC32 */ /* * Check if we already set since that means we'll * lose the previous value. diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c index 30b4b69c6941..327165f26ca6 100644 --- a/arch/powerpc/math-emu/math.c +++ b/arch/powerpc/math-emu/math.c @@ -225,7 +225,7 @@ record_exception(struct pt_regs *regs, int eflag) int do_mathemu(struct pt_regs *regs) { - void *op0 = 0, *op1 = 0, *op2 = 0, *op3 = 0; + void *op0 = NULL, *op1 = NULL, *op2 = NULL, *op3 = NULL; unsigned long pc = regs->nip; signed short sdisp; u32 insn = 0; @@ -234,7 +234,7 @@ do_mathemu(struct pt_regs *regs) int type = 0; int eflag, trap; - if (get_user(insn, (u32 *)pc)) + if (get_user(insn, (u32 __user *)pc)) return -EFAULT; switch (insn >> 26) { diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 3b4e9e4e25ea..c3df3a8501d4 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -8,7 +8,8 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y := fault.o mem.o pgtable.o mmap.o maccess.o \ init_$(BITS).o pgtable_$(BITS).o \ pgtable-frag.o ioremap.o ioremap_$(BITS).o \ - init-common.o mmu_context.o drmem.o + init-common.o mmu_context.o drmem.o \ + cacheflush.o obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/ obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/ obj-$(CONFIG_PPC_BOOK3S_64) += book3s64/ diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile index 446d9de88ce4..7f0c8a78ba0c 100644 --- a/arch/powerpc/mm/book3s32/Makefile +++ b/arch/powerpc/mm/book3s32/Makefile @@ -9,3 +9,4 @@ endif obj-y += mmu.o mmu_context.o obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o +obj-$(CONFIG_PPC_KUEP) += kuep.o diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 0e6dc830c38b..fb4233a5bdf7 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -140,10 +140,6 @@ _GLOBAL(hash_page) bne- .Lretry /* retry if someone got there first */ mfsrin r3,r4 /* get segment reg for segment */ -#ifndef CONFIG_VMAP_STACK - mfctr r0 - stw r0,_CTR(r11) -#endif bl create_hpte /* add the hash table entry */ #ifdef CONFIG_SMP @@ -152,17 +148,7 @@ _GLOBAL(hash_page) li r0,0 stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8) #endif - -#ifdef CONFIG_VMAP_STACK b fast_hash_page_return -#else - /* Return from the exception */ - lwz r5,_CTR(r11) - mtctr r5 - lwz r0,GPR0(r11) - lwz r8,GPR8(r11) - b fast_exception_return -#endif #ifdef CONFIG_SMP .Lhash_page_out: diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c new file mode 100644 index 000000000000..8ed1b8634839 --- /dev/null +++ b/arch/powerpc/mm/book3s32/kuep.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include + +#define KUEP_UPDATE_TWO_USER_SEGMENTS(n) do { \ + if (TASK_SIZE > ((n) << 28)) \ + mtsr(val1, (n) << 28); \ + if (TASK_SIZE > (((n) + 1) << 28)) \ + mtsr(val2, ((n) + 1) << 28); \ + val1 = (val1 + 0x222) & 0xf0ffffff; \ + val2 = (val2 + 0x222) & 0xf0ffffff; \ +} while (0) + +static __always_inline void kuep_update(u32 val) +{ + int val1 = val; + int val2 = (val + 0x111) & 0xf0ffffff; + + KUEP_UPDATE_TWO_USER_SEGMENTS(0); + KUEP_UPDATE_TWO_USER_SEGMENTS(2); + KUEP_UPDATE_TWO_USER_SEGMENTS(4); + KUEP_UPDATE_TWO_USER_SEGMENTS(6); + KUEP_UPDATE_TWO_USER_SEGMENTS(8); + KUEP_UPDATE_TWO_USER_SEGMENTS(10); + KUEP_UPDATE_TWO_USER_SEGMENTS(12); + KUEP_UPDATE_TWO_USER_SEGMENTS(14); +} + +void kuep_lock(void) +{ + kuep_update(mfsr(0) | SR_NX); +} + +void kuep_unlock(void) +{ + kuep_update(mfsr(0) & ~SR_NX); +} diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index d7eb266a3f7a..159930351d9f 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -162,7 +162,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; - if (debug_pagealloc_enabled() || __map_without_bats) { + if (debug_pagealloc_enabled_or_kfence() || __map_without_bats) { pr_debug_once("Read-Write memory mapped without BATs\n"); if (base >= border) return base; @@ -184,17 +184,10 @@ static bool is_module_segment(unsigned long addr) { if (!IS_ENABLED(CONFIG_MODULES)) return false; -#ifdef MODULES_VADDR if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M)) return false; if (addr > ALIGN(MODULES_END, SZ_256M) - 1) return false; -#else - if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M)) - return false; - if (addr > ALIGN(VMALLOC_END, SZ_256M) - 1) - return false; -#endif return true; } diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 567e0c6b3978..ad5eff097d31 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -400,10 +401,103 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_STRICT_KERNEL_RWX + +struct change_memory_parms { + unsigned long start, end, newpp; + unsigned int step, nr_cpus, master_cpu; + atomic_t cpu_counter; +}; + +// We'd rather this was on the stack but it has to be in the RMO +static struct change_memory_parms chmem_parms; + +// And therefore we need a lock to protect it from concurrent use +static DEFINE_MUTEX(chmem_lock); + +static void change_memory_range(unsigned long start, unsigned long end, + unsigned int step, unsigned long newpp) +{ + unsigned long idx; + + pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n", + start, end, newpp, step); + + for (idx = start; idx < end; idx += step) + /* Not sure if we can do much with the return value */ + mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize, + mmu_kernel_ssize); +} + +static int notrace chmem_secondary_loop(struct change_memory_parms *parms) +{ + unsigned long msr, tmp, flags; + int *p; + + p = &parms->cpu_counter.counter; + + local_irq_save(flags); + hard_irq_disable(); + + asm volatile ( + // Switch to real mode and leave interrupts off + "mfmsr %[msr] ;" + "li %[tmp], %[MSR_IR_DR] ;" + "andc %[tmp], %[msr], %[tmp] ;" + "mtmsrd %[tmp] ;" + + // Tell the master we are in real mode + "1: " + "lwarx %[tmp], 0, %[p] ;" + "addic %[tmp], %[tmp], -1 ;" + "stwcx. %[tmp], 0, %[p] ;" + "bne- 1b ;" + + // Spin until the counter goes to zero + "2: ;" + "lwz %[tmp], 0(%[p]) ;" + "cmpwi %[tmp], 0 ;" + "bne- 2b ;" + + // Switch back to virtual mode + "mtmsrd %[msr] ;" + + : // outputs + [msr] "=&r" (msr), [tmp] "=&b" (tmp), "+m" (*p) + : // inputs + [p] "b" (p), [MSR_IR_DR] "i" (MSR_IR | MSR_DR) + : // clobbers + "cc", "xer" + ); + + local_irq_restore(flags); + + return 0; +} + +static int change_memory_range_fn(void *data) +{ + struct change_memory_parms *parms = data; + + if (parms->master_cpu != smp_processor_id()) + return chmem_secondary_loop(parms); + + // Wait for all but one CPU (this one) to call-in + while (atomic_read(&parms->cpu_counter) > 1) + barrier(); + + change_memory_range(parms->start, parms->end, parms->step, parms->newpp); + + mb(); + + // Signal the other CPUs that we're done + atomic_dec(&parms->cpu_counter); + + return 0; +} + static bool hash__change_memory_range(unsigned long start, unsigned long end, unsigned long newpp) { - unsigned long idx; unsigned int step, shift; shift = mmu_psize_defs[mmu_linear_psize].shift; @@ -415,25 +509,43 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end, if (start >= end) return false; - pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n", - start, end, newpp, step); + if (firmware_has_feature(FW_FEATURE_LPAR)) { + mutex_lock(&chmem_lock); - for (idx = start; idx < end; idx += step) - /* Not sure if we can do much with the return value */ - mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize, - mmu_kernel_ssize); + chmem_parms.start = start; + chmem_parms.end = end; + chmem_parms.step = step; + chmem_parms.newpp = newpp; + chmem_parms.master_cpu = smp_processor_id(); + + cpus_read_lock(); + + atomic_set(&chmem_parms.cpu_counter, num_online_cpus()); + + // Ensure state is consistent before we call the other CPUs + mb(); + + stop_machine_cpuslocked(change_memory_range_fn, &chmem_parms, + cpu_online_mask); + + cpus_read_unlock(); + mutex_unlock(&chmem_lock); + } else + change_memory_range(start, end, step, newpp); return true; } void hash__mark_rodata_ro(void) { - unsigned long start, end; + unsigned long start, end, pp; start = (unsigned long)_stext; end = (unsigned long)__init_begin; - WARN_ON(!hash__change_memory_range(start, end, PP_RXXX)); + pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL_ROX), HPTE_USE_KERNEL_KEY); + + WARN_ON(!hash__change_memory_range(start, end, pp)); } void hash__mark_initmem_nx(void) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 581b20a2feaf..96d9aa164007 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -338,7 +338,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, int htab_remove_mapping(unsigned long vstart, unsigned long vend, int psize, int ssize) { - unsigned long vaddr; + unsigned long vaddr, time_limit; unsigned int step, shift; int rc; int ret = 0; @@ -351,8 +351,19 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, /* Unmap the full range specificied */ vaddr = ALIGN_DOWN(vstart, step); + time_limit = jiffies + HZ; + for (;vaddr < vend; vaddr += step) { rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize); + + /* + * For large number of mappings introduce a cond_resched() + * to prevent softlockup warnings. + */ + if (time_after(jiffies, time_limit)) { + cond_resched(); + time_limit = jiffies + HZ; + } if (rc == -ENOENT) { ret = -ENOENT; continue; @@ -1145,7 +1156,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) /* page is dirty */ if (!test_bit(PG_dcache_clean, &page->flags) && !PageReserved(page)) { - if (trap == 0x400) { + if (trap == INTERRUPT_INST_STORAGE) { flush_dcache_icache_page(page); set_bit(PG_dcache_clean, &page->flags); } else @@ -1545,10 +1556,10 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) if (user_mode(regs) || (region_id == USER_REGION_ID)) access &= ~_PAGE_PRIVILEGED; - if (regs->trap == 0x400) + if (TRAP(regs) == INTERRUPT_INST_STORAGE) access |= _PAGE_EXEC; - err = hash_page_mm(mm, ea, access, regs->trap, flags); + err = hash_page_mm(mm, ea, access, TRAP(regs), flags); if (unlikely(err < 0)) { // failed to instert a hash PTE due to an hypervisor error if (user_mode(regs)) { @@ -1572,10 +1583,11 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) { unsigned long dsisr = regs->dsisr; - long err; - if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) - goto page_fault; + if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) { + hash__do_page_fault(regs); + return 0; + } /* * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then @@ -1595,13 +1607,10 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) return 0; } - err = __do_hash_fault(regs); - if (err) { -page_fault: - err = hash__do_page_fault(regs); - } + if (__do_hash_fault(regs)) + hash__do_page_fault(regs); - return err; + return 0; } #ifdef CONFIG_PPC_MM_SLICES diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c index 0c8557220ae2..c10fc8a72fb3 100644 --- a/arch/powerpc/mm/book3s64/mmu_context.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -119,7 +119,7 @@ static int hash__init_new_context(struct mm_struct *mm) /* This is fork. Copy hash_context details from current->mm */ memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context)); #ifdef CONFIG_PPC_SUBPAGE_PROT - /* inherit subpage prot detalis if we have one. */ + /* inherit subpage prot details if we have one. */ if (current->mm->context.hash_context->spt) { mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table), GFP_KERNEL); diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 15dcc5ad91c5..a2d9ad138709 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -301,19 +301,6 @@ void setup_kuap(bool disabled) } #endif -static inline void update_current_thread_amr(u64 value) -{ - current->thread.regs->amr = value; -} - -static inline void update_current_thread_iamr(u64 value) -{ - if (!likely(pkey_execute_disable_supported)) - return; - - current->thread.regs->iamr = value; -} - #ifdef CONFIG_PPC_MEM_KEYS void pkey_mm_init(struct mm_struct *mm) { @@ -328,7 +315,7 @@ static inline void init_amr(int pkey, u8 init_bits) u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); u64 old_amr = current_thread_amr() & ~((u64)(0x3ul) << pkeyshift(pkey)); - update_current_thread_amr(old_amr | new_amr_bits); + current->thread.regs->amr = old_amr | new_amr_bits; } static inline void init_iamr(int pkey, u8 init_bits) @@ -336,7 +323,10 @@ static inline void init_iamr(int pkey, u8 init_bits) u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey)); u64 old_iamr = current_thread_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey)); - update_current_thread_iamr(old_iamr | new_iamr_bits); + if (!likely(pkey_execute_disable_supported)) + return; + + current->thread.regs->iamr = old_iamr | new_iamr_bits; } /* diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 98f0b243c1ab..50d536ecc89b 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -108,7 +108,7 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, set_the_pte: set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); - smp_wmb(); + asm volatile("ptesync": : :"memory"); return 0; } @@ -168,7 +168,7 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa, set_the_pte: set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); - smp_wmb(); + asm volatile("ptesync": : :"memory"); return 0; } @@ -180,8 +180,8 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa, } #ifdef CONFIG_STRICT_KERNEL_RWX -void radix__change_memory_range(unsigned long start, unsigned long end, - unsigned long clear) +static void radix__change_memory_range(unsigned long start, unsigned long end, + unsigned long clear) { unsigned long idx; pgd_t *pgdp; @@ -1058,7 +1058,7 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, * Book3S does not require a TLB flush when relaxing access * restrictions when the address space is not attached to a * NMMU, because the core MMU will reload the pte after taking - * an access fault, which is defined by the architectue. + * an access fault, which is defined by the architecture. */ } /* See ptesync comment in radix__set_pte_at */ diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c new file mode 100644 index 000000000000..63363787e000 --- /dev/null +++ b/arch/powerpc/mm/cacheflush.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include + +/** + * flush_coherent_icache() - if a CPU has a coherent icache, flush it + * Return true if the cache was flushed, false otherwise + */ +static inline bool flush_coherent_icache(void) +{ + /* + * For a snooping icache, we still need a dummy icbi to purge all the + * prefetched instructions from the ifetch buffers. We also need a sync + * before the icbi to order the the actual stores to memory that might + * have modified instructions with the icbi. + */ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { + mb(); /* sync */ + icbi((void *)PAGE_OFFSET); + mb(); /* sync */ + isync(); + return true; + } + + return false; +} + +/** + * invalidate_icache_range() - Flush the icache by issuing icbi across an address range + * @start: the start address + * @stop: the stop address (exclusive) + */ +static void invalidate_icache_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_icache_shift(); + unsigned long bytes = l1_icache_bytes(); + char *addr = (char *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + icbi(addr); + + mb(); /* sync */ + isync(); +} + +/** + * flush_icache_range: Write any modified data cache blocks out to memory + * and invalidate the corresponding blocks in the instruction cache + * + * Generic code will call this after writing memory, before executing from it. + * + * @start: the start address + * @stop: the stop address (exclusive) + */ +void flush_icache_range(unsigned long start, unsigned long stop) +{ + if (flush_coherent_icache()) + return; + + clean_dcache_range(start, stop); + + if (IS_ENABLED(CONFIG_44x)) { + /* + * Flash invalidate on 44x because we are passed kmapped + * addresses and this doesn't work for userspace pages due to + * the virtually tagged icache. + */ + iccci((void *)start); + mb(); /* sync */ + isync(); + } else + invalidate_icache_range(start, stop); +} +EXPORT_SYMBOL(flush_icache_range); + +#ifdef CONFIG_HIGHMEM +/** + * flush_dcache_icache_phys() - Flush a page by it's physical address + * @physaddr: the physical address of the page + */ +static void flush_dcache_icache_phys(unsigned long physaddr) +{ + unsigned long bytes = l1_dcache_bytes(); + unsigned long nb = PAGE_SIZE / bytes; + unsigned long addr = physaddr & PAGE_MASK; + unsigned long msr, msr0; + unsigned long loop1 = addr, loop2 = addr; + + msr0 = mfmsr(); + msr = msr0 & ~MSR_DR; + /* + * This must remain as ASM to prevent potential memory accesses + * while the data MMU is disabled + */ + asm volatile( + " mtctr %2;\n" + " mtmsr %3;\n" + " isync;\n" + "0: dcbst 0, %0;\n" + " addi %0, %0, %4;\n" + " bdnz 0b;\n" + " sync;\n" + " mtctr %2;\n" + "1: icbi 0, %1;\n" + " addi %1, %1, %4;\n" + " bdnz 1b;\n" + " sync;\n" + " mtmsr %5;\n" + " isync;\n" + : "+&r" (loop1), "+&r" (loop2) + : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0) + : "ctr", "memory"); +} +NOKPROBE_SYMBOL(flush_dcache_icache_phys) +#else +static void flush_dcache_icache_phys(unsigned long physaddr) +{ +} +#endif + +/** + * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. + * Note: this is necessary because the instruction cache does *not* + * snoop from the data cache. + * + * @p: the address of the page to flush + */ +static void __flush_dcache_icache(void *p) +{ + unsigned long addr = (unsigned long)p & PAGE_MASK; + + clean_dcache_range(addr, addr + PAGE_SIZE); + + /* + * We don't flush the icache on 44x. Those have a virtual icache and we + * don't have access to the virtual address here (it's not the page + * vaddr but where it's mapped in user space). The flushing of the + * icache on these is handled elsewhere, when a change in the address + * space occurs, before returning to user space. + */ + + if (mmu_has_feature(MMU_FTR_TYPE_44x)) + return; + + invalidate_icache_range(addr, addr + PAGE_SIZE); +} + +static void flush_dcache_icache_hugepage(struct page *page) +{ + int i; + int nr = compound_nr(page); + + if (!PageHighMem(page)) { + for (i = 0; i < nr; i++) + __flush_dcache_icache(lowmem_page_address(page + i)); + } else { + for (i = 0; i < nr; i++) { + void *start = kmap_local_page(page + i); + + __flush_dcache_icache(start); + kunmap_local(start); + } + } +} + +void flush_dcache_icache_page(struct page *page) +{ + if (flush_coherent_icache()) + return; + + if (PageCompound(page)) + return flush_dcache_icache_hugepage(page); + + if (!PageHighMem(page)) { + __flush_dcache_icache(lowmem_page_address(page)); + } else if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) { + void *start = kmap_local_page(page); + + __flush_dcache_icache(start); + kunmap_local(start); + } else { + flush_dcache_icache_phys(page_to_phys(page)); + } +} +EXPORT_SYMBOL(flush_dcache_icache_page); + +void clear_user_page(void *page, unsigned long vaddr, struct page *pg) +{ + clear_page(page); + + /* + * We shouldn't have to do this, but some versions of glibc + * require it (ld.so assumes zero filled pages are icache clean) + * - Anton + */ + flush_dcache_page(pg); +} +EXPORT_SYMBOL(clear_user_page); + +void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, + struct page *pg) +{ + copy_page(vto, vfrom); + + /* + * We should be able to use the following optimisation, however + * there are two problems. + * Firstly a bug in some versions of binutils meant PLT sections + * were not marked executable. + * Secondly the first word in the GOT section is blrl, used + * to establish the GOT address. Until recently the GOT was + * not marked executable. + * - Anton + */ +#if 0 + if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) + return; +#endif + + flush_dcache_page(pg); +} + +void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long addr, int len) +{ + void *maddr; + + maddr = kmap_local_page(page) + (addr & ~PAGE_MASK); + flush_icache_range((unsigned long)maddr, (unsigned long)maddr + len); + kunmap_local(maddr); +} diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index bb368257b55c..34f641d4a2fe 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -32,6 +32,8 @@ #include #include #include +#include +#include #include #include @@ -87,7 +89,6 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address) return __bad_area(regs, address, SEGV_MAPERR); } -#ifdef CONFIG_PPC_MEM_KEYS static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address, struct vm_area_struct *vma) { @@ -127,7 +128,6 @@ static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address, return 0; } -#endif static noinline int bad_access(struct pt_regs *regs, unsigned long address) { @@ -197,7 +197,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address, bool is_write) { - int is_exec = TRAP(regs) == 0x400; + int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE; /* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */ if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT | @@ -234,7 +234,6 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, return false; } -#ifdef CONFIG_PPC_MEM_KEYS static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey, struct vm_area_struct *vma) { @@ -248,7 +247,6 @@ static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey, return false; } -#endif static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma) { @@ -393,7 +391,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, struct vm_area_struct * vma; struct mm_struct *mm = current->mm; unsigned int flags = FAULT_FLAG_DEFAULT; - int is_exec = TRAP(regs) == 0x400; + int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE; int is_user = user_mode(regs); int is_write = page_fault_is_write(error_code); vm_fault_t fault, major = 0; @@ -418,8 +416,12 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, * take a page fault to a kernel address or a page fault to a user * address outside of dedicated places */ - if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) + if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) { + if (kfence_handle_page_fault(address, is_write, regs)) + return 0; + return SIGSEGV; + } /* * If we're in an interrupt, have no user context or are running @@ -492,11 +494,9 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, return bad_area(regs, address); } -#ifdef CONFIG_PPC_MEM_KEYS if (unlikely(access_pkey_error(is_write, is_exec, (error_code & DSISR_KEYFAULT), vma))) return bad_access_pkey(regs, address, vma); -#endif /* CONFIG_PPC_MEM_KEYS */ if (unlikely(access_error(is_write, is_exec, vma))) return bad_access(regs, address); @@ -539,39 +539,25 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, } NOKPROBE_SYMBOL(___do_page_fault); -static long __do_page_fault(struct pt_regs *regs) +static __always_inline void __do_page_fault(struct pt_regs *regs) { - const struct exception_table_entry *entry; long err; err = ___do_page_fault(regs, regs->dar, regs->dsisr); - if (likely(!err)) - return err; - - entry = search_exception_tables(regs->nip); - if (likely(entry)) { - instruction_pointer_set(regs, extable_fixup(entry)); - return 0; - } else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { - __bad_page_fault(regs, err); - return 0; - } else { - /* 32 and 64e handle the bad page fault in asm */ - return err; - } + if (unlikely(err)) + bad_page_fault(regs, err); } -NOKPROBE_SYMBOL(__do_page_fault); -DEFINE_INTERRUPT_HANDLER_RET(do_page_fault) +DEFINE_INTERRUPT_HANDLER(do_page_fault) { - return __do_page_fault(regs); + __do_page_fault(regs); } #ifdef CONFIG_PPC_BOOK3S_64 /* Same as do_page_fault but interrupt entry has already run in do_hash_fault */ -long hash__do_page_fault(struct pt_regs *regs) +void hash__do_page_fault(struct pt_regs *regs) { - return __do_page_fault(regs); + __do_page_fault(regs); } NOKPROBE_SYMBOL(hash__do_page_fault); #endif @@ -581,27 +567,27 @@ NOKPROBE_SYMBOL(hash__do_page_fault); * It is called from the DSI and ISI handlers in head.S and from some * of the procedures in traps.c. */ -void __bad_page_fault(struct pt_regs *regs, int sig) +static void __bad_page_fault(struct pt_regs *regs, int sig) { int is_write = page_fault_is_write(regs->dsisr); /* kernel has accessed a bad area */ switch (TRAP(regs)) { - case 0x300: - case 0x380: - case 0xe00: + case INTERRUPT_DATA_STORAGE: + case INTERRUPT_DATA_SEGMENT: + case INTERRUPT_H_DATA_STORAGE: pr_alert("BUG: %s on %s at 0x%08lx\n", regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" : "Unable to handle kernel data access", is_write ? "write" : "read", regs->dar); break; - case 0x400: - case 0x480: + case INTERRUPT_INST_STORAGE: + case INTERRUPT_INST_SEGMENT: pr_alert("BUG: Unable to handle kernel instruction fetch%s", regs->nip < PAGE_SIZE ? " (NULL pointer?)\n" : "\n"); break; - case 0x600: + case INTERRUPT_ALIGNMENT: pr_alert("BUG: Unable to handle kernel unaligned access at 0x%08lx\n", regs->dar); break; diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 02c7db4087cb..3d690be48e84 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -97,6 +97,9 @@ static void __init MMU_setup(void) if (IS_ENABLED(CONFIG_PPC_8xx)) return; + if (IS_ENABLED(CONFIG_KFENCE)) + __map_without_ltlbs = 1; + if (debug_pagealloc_enabled()) __map_without_ltlbs = 1; diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c index fa9a7a718fc6..a3c30a884076 100644 --- a/arch/powerpc/mm/maccess.c +++ b/arch/powerpc/mm/maccess.c @@ -3,7 +3,28 @@ #include #include +#include +#include +#include + bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { return is_kernel_addr((unsigned long)unsafe_src); } + +int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src) +{ + unsigned int val, suffix; + int err; + + err = copy_from_kernel_nofault(&val, src, sizeof(val)); + if (err) + return err; + if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { + err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4); + *inst = ppc_inst_prefix(val, suffix); + } else { + *inst = ppc_inst(val); + } + return err; +} diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 4e8ce6d85232..6564b4d81324 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -12,49 +12,18 @@ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include -#include -#include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include #include #include #include -#include #include -static DEFINE_MUTEX(linear_mapping_mutex); unsigned long long memory_limit; bool init_mem_is_free; @@ -72,6 +41,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, EXPORT_SYMBOL(phys_mem_access_prot); #ifdef CONFIG_MEMORY_HOTPLUG +static DEFINE_MUTEX(linear_mapping_mutex); #ifdef CONFIG_NUMA int memory_add_physaddr_to_nid(u64 start) @@ -340,257 +310,6 @@ void free_initmem(void) free_initmem_default(POISON_FREE_INITMEM); } -/** - * flush_coherent_icache() - if a CPU has a coherent icache, flush it - * @addr: The base address to use (can be any valid address, the whole cache will be flushed) - * Return true if the cache was flushed, false otherwise - */ -static inline bool flush_coherent_icache(unsigned long addr) -{ - /* - * For a snooping icache, we still need a dummy icbi to purge all the - * prefetched instructions from the ifetch buffers. We also need a sync - * before the icbi to order the the actual stores to memory that might - * have modified instructions with the icbi. - */ - if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { - mb(); /* sync */ - allow_read_from_user((const void __user *)addr, L1_CACHE_BYTES); - icbi((void *)addr); - prevent_read_from_user((const void __user *)addr, L1_CACHE_BYTES); - mb(); /* sync */ - isync(); - return true; - } - - return false; -} - -/** - * invalidate_icache_range() - Flush the icache by issuing icbi across an address range - * @start: the start address - * @stop: the stop address (exclusive) - */ -static void invalidate_icache_range(unsigned long start, unsigned long stop) -{ - unsigned long shift = l1_icache_shift(); - unsigned long bytes = l1_icache_bytes(); - char *addr = (char *)(start & ~(bytes - 1)); - unsigned long size = stop - (unsigned long)addr + (bytes - 1); - unsigned long i; - - for (i = 0; i < size >> shift; i++, addr += bytes) - icbi(addr); - - mb(); /* sync */ - isync(); -} - -/** - * flush_icache_range: Write any modified data cache blocks out to memory - * and invalidate the corresponding blocks in the instruction cache - * - * Generic code will call this after writing memory, before executing from it. - * - * @start: the start address - * @stop: the stop address (exclusive) - */ -void flush_icache_range(unsigned long start, unsigned long stop) -{ - if (flush_coherent_icache(start)) - return; - - clean_dcache_range(start, stop); - - if (IS_ENABLED(CONFIG_44x)) { - /* - * Flash invalidate on 44x because we are passed kmapped - * addresses and this doesn't work for userspace pages due to - * the virtually tagged icache. - */ - iccci((void *)start); - mb(); /* sync */ - isync(); - } else - invalidate_icache_range(start, stop); -} -EXPORT_SYMBOL(flush_icache_range); - -#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) -/** - * flush_dcache_icache_phys() - Flush a page by it's physical address - * @physaddr: the physical address of the page - */ -static void flush_dcache_icache_phys(unsigned long physaddr) -{ - unsigned long bytes = l1_dcache_bytes(); - unsigned long nb = PAGE_SIZE / bytes; - unsigned long addr = physaddr & PAGE_MASK; - unsigned long msr, msr0; - unsigned long loop1 = addr, loop2 = addr; - - msr0 = mfmsr(); - msr = msr0 & ~MSR_DR; - /* - * This must remain as ASM to prevent potential memory accesses - * while the data MMU is disabled - */ - asm volatile( - " mtctr %2;\n" - " mtmsr %3;\n" - " isync;\n" - "0: dcbst 0, %0;\n" - " addi %0, %0, %4;\n" - " bdnz 0b;\n" - " sync;\n" - " mtctr %2;\n" - "1: icbi 0, %1;\n" - " addi %1, %1, %4;\n" - " bdnz 1b;\n" - " sync;\n" - " mtmsr %5;\n" - " isync;\n" - : "+&r" (loop1), "+&r" (loop2) - : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0) - : "ctr", "memory"); -} -NOKPROBE_SYMBOL(flush_dcache_icache_phys) -#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) - -/* - * This is called when a page has been modified by the kernel. - * It just marks the page as not i-cache clean. We do the i-cache - * flush later when the page is given to a user process, if necessary. - */ -void flush_dcache_page(struct page *page) -{ - if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) - return; - /* avoid an atomic op if possible */ - if (test_bit(PG_dcache_clean, &page->flags)) - clear_bit(PG_dcache_clean, &page->flags); -} -EXPORT_SYMBOL(flush_dcache_page); - -static void flush_dcache_icache_hugepage(struct page *page) -{ - int i; - void *start; - - BUG_ON(!PageCompound(page)); - - for (i = 0; i < compound_nr(page); i++) { - if (!PageHighMem(page)) { - __flush_dcache_icache(page_address(page+i)); - } else { - start = kmap_atomic(page+i); - __flush_dcache_icache(start); - kunmap_atomic(start); - } - } -} - -void flush_dcache_icache_page(struct page *page) -{ - - if (PageCompound(page)) - return flush_dcache_icache_hugepage(page); - -#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64) - /* On 8xx there is no need to kmap since highmem is not supported */ - __flush_dcache_icache(page_address(page)); -#else - if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) { - void *start = kmap_atomic(page); - __flush_dcache_icache(start); - kunmap_atomic(start); - } else { - unsigned long addr = page_to_pfn(page) << PAGE_SHIFT; - - if (flush_coherent_icache(addr)) - return; - flush_dcache_icache_phys(addr); - } -#endif -} -EXPORT_SYMBOL(flush_dcache_icache_page); - -/** - * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. - * Note: this is necessary because the instruction cache does *not* - * snoop from the data cache. - * - * @page: the address of the page to flush - */ -void __flush_dcache_icache(void *p) -{ - unsigned long addr = (unsigned long)p; - - if (flush_coherent_icache(addr)) - return; - - clean_dcache_range(addr, addr + PAGE_SIZE); - - /* - * We don't flush the icache on 44x. Those have a virtual icache and we - * don't have access to the virtual address here (it's not the page - * vaddr but where it's mapped in user space). The flushing of the - * icache on these is handled elsewhere, when a change in the address - * space occurs, before returning to user space. - */ - - if (mmu_has_feature(MMU_FTR_TYPE_44x)) - return; - - invalidate_icache_range(addr, addr + PAGE_SIZE); -} - -void clear_user_page(void *page, unsigned long vaddr, struct page *pg) -{ - clear_page(page); - - /* - * We shouldn't have to do this, but some versions of glibc - * require it (ld.so assumes zero filled pages are icache clean) - * - Anton - */ - flush_dcache_page(pg); -} -EXPORT_SYMBOL(clear_user_page); - -void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, - struct page *pg) -{ - copy_page(vto, vfrom); - - /* - * We should be able to use the following optimisation, however - * there are two problems. - * Firstly a bug in some versions of binutils meant PLT sections - * were not marked executable. - * Secondly the first word in the GOT section is blrl, used - * to establish the GOT address. Until recently the GOT was - * not marked executable. - * - Anton - */ -#if 0 - if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) - return; -#endif - - flush_dcache_page(pg); -} - -void flush_icache_user_page(struct vm_area_struct *vma, struct page *page, - unsigned long addr, int len) -{ - unsigned long maddr; - - maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK); - flush_icache_range(maddr, maddr + len); - kunmap(page); -} - /* * System memory should not be in /proc/iomem but various tools expect it * (eg kdump). diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index 18f20da0d348..a857af401738 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -43,24 +43,26 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, /* * This full barrier orders the store to the cpumask above vs - * a subsequent operation which allows this CPU to begin loading - * translations for next. + * a subsequent load which allows this CPU/MMU to begin loading + * translations for 'next' from page table PTEs into the TLB. * - * When using the radix MMU that operation is the load of the + * When using the radix MMU, that operation is the load of the * MMU context id, which is then moved to SPRN_PID. * * For the hash MMU it is either the first load from slb_cache - * in switch_slb(), and/or the store of paca->mm_ctx_id in - * copy_mm_to_paca(). + * in switch_slb() to preload the SLBs, or the load of + * get_user_context which loads the context for the VSID hash + * to insert a new SLB, in the SLB fault handler. * * On the other side, the barrier is in mm/tlb-radix.c for - * radix which orders earlier stores to clear the PTEs vs - * the load of mm_cpumask. And pte_xchg which does the same - * thing for hash. + * radix which orders earlier stores to clear the PTEs before + * the load of mm_cpumask to check which CPU TLBs should be + * flushed. For hash, pte_xchg to clear the PTE includes the + * barrier. * - * This full barrier is needed by membarrier when switching - * between processes after store to rq->curr, before user-space - * memory accesses. + * This full barrier is also needed by membarrier when + * switching between processes after store to rq->curr, before + * user-space memory accesses. */ smp_mb(); diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 998810e68562..7dac910c0b21 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -185,3 +185,8 @@ void ptdump_check_wx(void); #else static inline void ptdump_check_wx(void) { } #endif + +static inline bool debug_pagealloc_enabled_or_kfence(void) +{ + return IS_ENABLED(CONFIG_KFENCE) || debug_pagealloc_enabled(); +} diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 19a3eec1d8c5..71bfdbedacee 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -149,7 +149,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) { unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); unsigned long sinittext = __pa(_sinittext); - bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled(); + bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled_or_kfence(); unsigned long boundary = strict_boundary ? sinittext : etext8; unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); @@ -161,7 +161,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) return 0; mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, true); - if (debug_pagealloc_enabled()) { + if (debug_pagealloc_enabled_or_kfence()) { top = boundary; } else { mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_TEXT, true); diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile index c2dec3a68d4c..8e60af32e51e 100644 --- a/arch/powerpc/net/Makefile +++ b/arch/powerpc/net/Makefile @@ -2,8 +2,4 @@ # # Arch-specific network modules # -ifdef CONFIG_PPC64 -obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o -else -obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o -endif +obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp$(BITS).o diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index d0a67a1bbaf1..99fad093f43e 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -26,6 +26,9 @@ /* Long jump; (unconditional 'branch') */ #define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \ (((dest) - (ctx->idx * 4)) & 0x03fffffc)) +/* blr; (unconditional 'branch' with link) to absolute address */ +#define PPC_BL_ABS(dest) EMIT(PPC_INST_BL | \ + (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc)) /* "cond" here covers BO:BI fields. */ #define PPC_BCC_SHORT(cond, dest) EMIT(PPC_INST_BRANCH_COND | \ (((cond) & 0x3ff) << 16) | \ @@ -42,6 +45,10 @@ EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \ } } while(0) +#ifdef CONFIG_PPC32 +#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0)) +#endif + #define PPC_LI64(d, i) do { \ if ((long)(i) >= -2147483648 && \ (long)(i) < 2147483648) \ @@ -108,6 +115,63 @@ static inline bool is_nearbranch(int offset) #define COND_LT (CR0_LT | COND_CMP_TRUE) #define COND_LE (CR0_GT | COND_CMP_FALSE) +#define SEEN_FUNC 0x20000000 /* might call external helpers */ +#define SEEN_STACK 0x40000000 /* uses BPF stack */ +#define SEEN_TAILCALL 0x80000000 /* uses tail calls */ + +#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */ +#define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */ + +#ifdef CONFIG_PPC64 +extern const int b2p[MAX_BPF_JIT_REG + 2]; +#else +extern const int b2p[MAX_BPF_JIT_REG + 1]; +#endif + +struct codegen_context { + /* + * This is used to track register usage as well + * as calls to external helpers. + * - register usage is tracked with corresponding + * bits (r3-r31) + * - rest of the bits can be used to track other + * things -- for now, we use bits 0 to 2 + * encoded in SEEN_* macros above + */ + unsigned int seen; + unsigned int idx; + unsigned int stack_size; + int b2p[ARRAY_SIZE(b2p)]; +}; + +static inline void bpf_flush_icache(void *start, void *end) +{ + smp_wmb(); /* smp write barrier */ + flush_icache_range((unsigned long)start, (unsigned long)end); +} + +static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i) +{ + return ctx->seen & (1 << (31 - i)); +} + +static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) +{ + ctx->seen |= 1 << (31 - i); +} + +static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i) +{ + ctx->seen &= ~(1 << (31 - i)); +} + +void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, + u32 *addrs, bool extra_pass); +void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); +void bpf_jit_realloc_regs(struct codegen_context *ctx); + #endif #endif diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h deleted file mode 100644 index 448dfd4d98e1..000000000000 --- a/arch/powerpc/net/bpf_jit32.h +++ /dev/null @@ -1,139 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * bpf_jit32.h: BPF JIT compiler for PPC - * - * Copyright 2011 Matt Evans , IBM Corporation - * - * Split from bpf_jit.h - */ -#ifndef _BPF_JIT32_H -#define _BPF_JIT32_H - -#include -#include "bpf_jit.h" - -#ifdef CONFIG_PPC64 -#define BPF_PPC_STACK_R3_OFF 48 -#define BPF_PPC_STACK_LOCALS 32 -#define BPF_PPC_STACK_BASIC (48+64) -#define BPF_PPC_STACK_SAVE (18*8) -#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \ - BPF_PPC_STACK_SAVE) -#define BPF_PPC_SLOWPATH_FRAME (48+64) -#else -#define BPF_PPC_STACK_R3_OFF 24 -#define BPF_PPC_STACK_LOCALS 16 -#define BPF_PPC_STACK_BASIC (24+32) -#define BPF_PPC_STACK_SAVE (18*4) -#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \ - BPF_PPC_STACK_SAVE) -#define BPF_PPC_SLOWPATH_FRAME (24+32) -#endif - -#define REG_SZ (BITS_PER_LONG/8) - -/* - * Generated code register usage: - * - * As normal PPC C ABI (e.g. r1=sp, r2=TOC), with: - * - * skb r3 (Entry parameter) - * A register r4 - * X register r5 - * addr param r6 - * r7-r10 scratch - * skb->data r14 - * skb headlen r15 (skb->len - skb->data_len) - * m[0] r16 - * m[...] ... - * m[15] r31 - */ -#define r_skb 3 -#define r_ret 3 -#define r_A 4 -#define r_X 5 -#define r_addr 6 -#define r_scratch1 7 -#define r_scratch2 8 -#define r_D 14 -#define r_HL 15 -#define r_M 16 - -#ifndef __ASSEMBLY__ - -/* - * Assembly helpers from arch/powerpc/net/bpf_jit.S: - */ -#define DECLARE_LOAD_FUNC(func) \ - extern u8 func[], func##_negative_offset[], func##_positive_offset[] - -DECLARE_LOAD_FUNC(sk_load_word); -DECLARE_LOAD_FUNC(sk_load_half); -DECLARE_LOAD_FUNC(sk_load_byte); -DECLARE_LOAD_FUNC(sk_load_byte_msh); - -#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LBZ(r, base, i)); \ - else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ - EMIT(PPC_RAW_LBZ(r, r, IMM_L(i))); } } while(0) - -#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LD(r, base, i)); \ - else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ - EMIT(PPC_RAW_LD(r, r, IMM_L(i))); } } while(0) - -#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LWZ(r, base, i)); \ - else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ - EMIT(PPC_RAW_LWZ(r, r, IMM_L(i))); } } while(0) - -#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LHZ(r, base, i)); \ - else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ - EMIT(PPC_RAW_LHZ(r, r, IMM_L(i))); } } while(0) - -#ifdef CONFIG_PPC64 -#define PPC_LL_OFFS(r, base, i) do { PPC_LD_OFFS(r, base, i); } while(0) -#else -#define PPC_LL_OFFS(r, base, i) do { PPC_LWZ_OFFS(r, base, i); } while(0) -#endif - -#ifdef CONFIG_SMP -#ifdef CONFIG_PPC64 -#define PPC_BPF_LOAD_CPU(r) \ - do { BUILD_BUG_ON(sizeof_field(struct paca_struct, paca_index) != 2); \ - PPC_LHZ_OFFS(r, 13, offsetof(struct paca_struct, paca_index)); \ - } while (0) -#else -#define PPC_BPF_LOAD_CPU(r) \ - do { BUILD_BUG_ON(sizeof_field(struct task_struct, cpu) != 4); \ - PPC_LHZ_OFFS(r, 2, offsetof(struct task_struct, cpu)); \ - } while(0) -#endif -#else -#define PPC_BPF_LOAD_CPU(r) do { EMIT(PPC_RAW_LI(r, 0)); } while(0) -#endif - -#define PPC_LHBRX_OFFS(r, base, i) \ - do { PPC_LI32(r, i); EMIT(PPC_RAW_LHBRX(r, r, base)); } while(0) -#ifdef __LITTLE_ENDIAN__ -#define PPC_NTOHS_OFFS(r, base, i) PPC_LHBRX_OFFS(r, base, i) -#else -#define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i) -#endif - -#define PPC_BPF_LL(r, base, i) do { EMIT(PPC_RAW_LWZ(r, base, i)); } while(0) -#define PPC_BPF_STL(r, base, i) do { EMIT(PPC_RAW_STW(r, base, i)); } while(0) -#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STWU(r, base, i)); } while(0) - -#define SEEN_DATAREF 0x10000 /* might call external helpers */ -#define SEEN_XREG 0x20000 /* X reg is used */ -#define SEEN_MEM 0x40000 /* SEEN_MEM+(1<, IBM Corporation - */ - -#include -#include -#include "bpf_jit32.h" - -/* - * All of these routines are called directly from generated code, - * whose register usage is: - * - * r3 skb - * r4,r5 A,X - * r6 *** address parameter to helper *** - * r7-r10 scratch - * r14 skb->data - * r15 skb headlen - * r16-31 M[] - */ - -/* - * To consider: These helpers are so small it could be better to just - * generate them inline. Inline code can do the simple headlen check - * then branch directly to slow_path_XXX if required. (In fact, could - * load a spare GPR with the address of slow_path_generic and pass size - * as an argument, making the call site a mtlr, li and bllr.) - */ - .globl sk_load_word -sk_load_word: - PPC_LCMPI r_addr, 0 - blt bpf_slow_path_word_neg - .globl sk_load_word_positive_offset -sk_load_word_positive_offset: - /* Are we accessing past headlen? */ - subi r_scratch1, r_HL, 4 - PPC_LCMP r_scratch1, r_addr - blt bpf_slow_path_word - /* Nope, just hitting the header. cr0 here is eq or gt! */ -#ifdef __LITTLE_ENDIAN__ - lwbrx r_A, r_D, r_addr -#else - lwzx r_A, r_D, r_addr -#endif - blr /* Return success, cr0 != LT */ - - .globl sk_load_half -sk_load_half: - PPC_LCMPI r_addr, 0 - blt bpf_slow_path_half_neg - .globl sk_load_half_positive_offset -sk_load_half_positive_offset: - subi r_scratch1, r_HL, 2 - PPC_LCMP r_scratch1, r_addr - blt bpf_slow_path_half -#ifdef __LITTLE_ENDIAN__ - lhbrx r_A, r_D, r_addr -#else - lhzx r_A, r_D, r_addr -#endif - blr - - .globl sk_load_byte -sk_load_byte: - PPC_LCMPI r_addr, 0 - blt bpf_slow_path_byte_neg - .globl sk_load_byte_positive_offset -sk_load_byte_positive_offset: - PPC_LCMP r_HL, r_addr - ble bpf_slow_path_byte - lbzx r_A, r_D, r_addr - blr - -/* - * BPF_LDX | BPF_B | BPF_MSH: ldxb 4*([offset]&0xf) - * r_addr is the offset value - */ - .globl sk_load_byte_msh -sk_load_byte_msh: - PPC_LCMPI r_addr, 0 - blt bpf_slow_path_byte_msh_neg - .globl sk_load_byte_msh_positive_offset -sk_load_byte_msh_positive_offset: - PPC_LCMP r_HL, r_addr - ble bpf_slow_path_byte_msh - lbzx r_X, r_D, r_addr - rlwinm r_X, r_X, 2, 32-4-2, 31-2 - blr - -/* Call out to skb_copy_bits: - * We'll need to back up our volatile regs first; we have - * local variable space at r1+(BPF_PPC_STACK_BASIC). - * Allocate a new stack frame here to remain ABI-compliant in - * stashing LR. - */ -#define bpf_slow_path_common(SIZE) \ - mflr r0; \ - PPC_STL r0, PPC_LR_STKOFF(r1); \ - /* R3 goes in parameter space of caller's frame */ \ - PPC_STL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \ - PPC_STL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \ - PPC_STL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \ - addi r5, r1, BPF_PPC_STACK_BASIC+(2*REG_SZ); \ - PPC_STLU r1, -BPF_PPC_SLOWPATH_FRAME(r1); \ - /* R3 = r_skb, as passed */ \ - mr r4, r_addr; \ - li r6, SIZE; \ - bl skb_copy_bits; \ - nop; \ - /* R3 = 0 on success */ \ - addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \ - PPC_LL r0, PPC_LR_STKOFF(r1); \ - PPC_LL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \ - PPC_LL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \ - mtlr r0; \ - PPC_LCMPI r3, 0; \ - blt bpf_error; /* cr0 = LT */ \ - PPC_LL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \ - /* Great success! */ - -bpf_slow_path_word: - bpf_slow_path_common(4) - /* Data value is on stack, and cr0 != LT */ - lwz r_A, BPF_PPC_STACK_BASIC+(2*REG_SZ)(r1) - blr - -bpf_slow_path_half: - bpf_slow_path_common(2) - lhz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1) - blr - -bpf_slow_path_byte: - bpf_slow_path_common(1) - lbz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1) - blr - -bpf_slow_path_byte_msh: - bpf_slow_path_common(1) - lbz r_X, BPF_PPC_STACK_BASIC+(2*8)(r1) - rlwinm r_X, r_X, 2, 32-4-2, 31-2 - blr - -/* Call out to bpf_internal_load_pointer_neg_helper: - * We'll need to back up our volatile regs first; we have - * local variable space at r1+(BPF_PPC_STACK_BASIC). - * Allocate a new stack frame here to remain ABI-compliant in - * stashing LR. - */ -#define sk_negative_common(SIZE) \ - mflr r0; \ - PPC_STL r0, PPC_LR_STKOFF(r1); \ - /* R3 goes in parameter space of caller's frame */ \ - PPC_STL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \ - PPC_STL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \ - PPC_STL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \ - PPC_STLU r1, -BPF_PPC_SLOWPATH_FRAME(r1); \ - /* R3 = r_skb, as passed */ \ - mr r4, r_addr; \ - li r5, SIZE; \ - bl bpf_internal_load_pointer_neg_helper; \ - nop; \ - /* R3 != 0 on success */ \ - addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \ - PPC_LL r0, PPC_LR_STKOFF(r1); \ - PPC_LL r_A, (BPF_PPC_STACK_BASIC+(0*REG_SZ))(r1); \ - PPC_LL r_X, (BPF_PPC_STACK_BASIC+(1*REG_SZ))(r1); \ - mtlr r0; \ - PPC_LCMPLI r3, 0; \ - beq bpf_error_slow; /* cr0 = EQ */ \ - mr r_addr, r3; \ - PPC_LL r_skb, (BPF_PPC_STACKFRAME+BPF_PPC_STACK_R3_OFF)(r1); \ - /* Great success! */ - -bpf_slow_path_word_neg: - lis r_scratch1,-32 /* SKF_LL_OFF */ - PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - .globl sk_load_word_negative_offset -sk_load_word_negative_offset: - sk_negative_common(4) - lwz r_A, 0(r_addr) - blr - -bpf_slow_path_half_neg: - lis r_scratch1,-32 /* SKF_LL_OFF */ - PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - .globl sk_load_half_negative_offset -sk_load_half_negative_offset: - sk_negative_common(2) - lhz r_A, 0(r_addr) - blr - -bpf_slow_path_byte_neg: - lis r_scratch1,-32 /* SKF_LL_OFF */ - PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - .globl sk_load_byte_negative_offset -sk_load_byte_negative_offset: - sk_negative_common(1) - lbz r_A, 0(r_addr) - blr - -bpf_slow_path_byte_msh_neg: - lis r_scratch1,-32 /* SKF_LL_OFF */ - PPC_LCMP r_addr, r_scratch1 /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - .globl sk_load_byte_msh_negative_offset -sk_load_byte_msh_negative_offset: - sk_negative_common(1) - lbz r_X, 0(r_addr) - rlwinm r_X, r_X, 2, 32-4-2, 31-2 - blr - -bpf_error_slow: - /* fabricate a cr0 = lt */ - li r_scratch1, -1 - PPC_LCMPI r_scratch1, 0 -bpf_error: - /* Entered with cr0 = lt */ - li r3, 0 - /* Generated code will 'blt epilogue', returning 0. */ - blr diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index e809cb5a1631..798ac4350a82 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -1,10 +1,11 @@ // SPDX-License-Identifier: GPL-2.0-only -/* bpf_jit_comp.c: BPF JIT compiler +/* + * eBPF JIT compiler * - * Copyright 2011 Matt Evans , IBM Corporation + * Copyright 2016 Naveen N. Rao + * IBM Corporation * - * Based on the x86 BPF compiler, by Eric Dumazet (eric.dumazet@gmail.com) - * Ported to ppc32 by Denis Kirjanov + * Based on the powerpc classic BPF JIT compiler by Matt Evans */ #include #include @@ -12,639 +13,204 @@ #include #include #include +#include +#include -#include "bpf_jit32.h" +#include "bpf_jit.h" -static inline void bpf_flush_icache(void *start, void *end) +static void bpf_jit_fill_ill_insns(void *area, unsigned int size) { - smp_wmb(); - flush_icache_range((unsigned long)start, (unsigned long)end); + memset32(area, BREAKPOINT_INSTRUCTION, size / 4); } -static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx) +/* Fix the branch target addresses for subprog calls */ +static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, + struct codegen_context *ctx, u32 *addrs) { - int i; - const struct sock_filter *filter = fp->insns; + const struct bpf_insn *insn = fp->insnsi; + bool func_addr_fixed; + u64 func_addr; + u32 tmp_idx; + int i, ret; - if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) { - /* Make stackframe */ - if (ctx->seen & SEEN_DATAREF) { - /* If we call any helpers (for loads), save LR */ - EMIT(PPC_INST_MFLR | __PPC_RT(R0)); - PPC_BPF_STL(0, 1, PPC_LR_STKOFF); - - /* Back up non-volatile regs. */ - PPC_BPF_STL(r_D, 1, -(REG_SZ*(32-r_D))); - PPC_BPF_STL(r_HL, 1, -(REG_SZ*(32-r_HL))); - } - if (ctx->seen & SEEN_MEM) { - /* - * Conditionally save regs r15-r31 as some will be used - * for M[] data. - */ - for (i = r_M; i < (r_M+16); i++) { - if (ctx->seen & (1 << (i-r_M))) - PPC_BPF_STL(i, 1, -(REG_SZ*(32-i))); - } - } - PPC_BPF_STLU(1, 1, -BPF_PPC_STACKFRAME); - } - - if (ctx->seen & SEEN_DATAREF) { + for (i = 0; i < fp->len; i++) { /* - * If this filter needs to access skb data, - * prepare r_D and r_HL: - * r_HL = skb->len - skb->data_len - * r_D = skb->data + * During the extra pass, only the branch target addresses for + * the subprog calls need to be fixed. All other instructions + * can left untouched. + * + * The JITed image length does not change because we already + * ensure that the JITed instruction sequence for these calls + * are of fixed length by padding them with NOPs. */ - PPC_LWZ_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, - data_len)); - PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len)); - EMIT(PPC_RAW_SUB(r_HL, r_HL, r_scratch1)); - PPC_LL_OFFS(r_D, r_skb, offsetof(struct sk_buff, data)); - } + if (insn[i].code == (BPF_JMP | BPF_CALL) && + insn[i].src_reg == BPF_PSEUDO_CALL) { + ret = bpf_jit_get_func_addr(fp, &insn[i], true, + &func_addr, + &func_addr_fixed); + if (ret < 0) + return ret; - if (ctx->seen & SEEN_XREG) { - /* - * TODO: Could also detect whether first instr. sets X and - * avoid this (as below, with A). - */ - EMIT(PPC_RAW_LI(r_X, 0)); - } + /* + * Save ctx->idx as this would currently point to the + * end of the JITed image and set it to the offset of + * the instruction sequence corresponding to the + * subprog call temporarily. + */ + tmp_idx = ctx->idx; + ctx->idx = addrs[i] / 4; + bpf_jit_emit_func_call_rel(image, ctx, func_addr); - /* make sure we dont leak kernel information to user */ - if (bpf_needs_clear_a(&filter[0])) - EMIT(PPC_RAW_LI(r_A, 0)); -} - -static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) -{ - int i; - - if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) { - EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME)); - if (ctx->seen & SEEN_DATAREF) { - PPC_BPF_LL(0, 1, PPC_LR_STKOFF); - EMIT(PPC_RAW_MTLR(0)); - PPC_BPF_LL(r_D, 1, -(REG_SZ*(32-r_D))); - PPC_BPF_LL(r_HL, 1, -(REG_SZ*(32-r_HL))); - } - if (ctx->seen & SEEN_MEM) { - /* Restore any saved non-vol registers */ - for (i = r_M; i < (r_M+16); i++) { - if (ctx->seen & (1 << (i-r_M))) - PPC_BPF_LL(i, 1, -(REG_SZ*(32-i))); - } + /* + * Restore ctx->idx here. This is safe as the length + * of the JITed sequence remains unchanged. + */ + ctx->idx = tmp_idx; } } - /* The RETs have left a return value in R3. */ - - EMIT(PPC_RAW_BLR()); -} - -#define CHOOSE_LOAD_FUNC(K, func) \ - ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) - -/* Assemble the body code between the prologue & epilogue. */ -static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx, - unsigned int *addrs) -{ - const struct sock_filter *filter = fp->insns; - int flen = fp->len; - u8 *func; - unsigned int true_cond; - int i; - - /* Start of epilogue code */ - unsigned int exit_addr = addrs[flen]; - - for (i = 0; i < flen; i++) { - unsigned int K = filter[i].k; - u16 code = bpf_anc_helper(&filter[i]); - - /* - * addrs[] maps a BPF bytecode address into a real offset from - * the start of the body code. - */ - addrs[i] = ctx->idx * 4; - - switch (code) { - /*** ALU ops ***/ - case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_ADD(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */ - if (!K) - break; - EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(K))); - if (K >= 32768) - EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(K))); - break; - case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_SUB(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ - if (!K) - break; - EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(-K))); - if (K >= 32768) - EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(-K))); - break; - case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_MULW(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ - if (K < 32768) - EMIT(PPC_RAW_MULI(r_A, r_A, K)); - else { - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_MULW(r_A, r_A, r_scratch1)); - } - break; - case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */ - case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_CMPWI(r_X, 0)); - if (ctx->pc_ret0 != -1) { - PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); - } else { - PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12); - EMIT(PPC_RAW_LI(r_ret, 0)); - PPC_JMP(exit_addr); - } - if (code == (BPF_ALU | BPF_MOD | BPF_X)) { - EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_X)); - EMIT(PPC_RAW_MULW(r_scratch1, r_X, r_scratch1)); - EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1)); - } else { - EMIT(PPC_RAW_DIVWU(r_A, r_A, r_X)); - } - break; - case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */ - PPC_LI32(r_scratch2, K); - EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_scratch2)); - EMIT(PPC_RAW_MULW(r_scratch1, r_scratch2, r_scratch1)); - EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1)); - break; - case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */ - if (K == 1) - break; - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_DIVWU(r_A, r_A, r_scratch1)); - break; - case BPF_ALU | BPF_AND | BPF_X: - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_AND(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_AND | BPF_K: - if (!IMM_H(K)) - EMIT(PPC_RAW_ANDI(r_A, r_A, K)); - else { - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_AND(r_A, r_A, r_scratch1)); - } - break; - case BPF_ALU | BPF_OR | BPF_X: - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_OR(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_OR | BPF_K: - if (IMM_L(K)) - EMIT(PPC_RAW_ORI(r_A, r_A, IMM_L(K))); - if (K >= 65536) - EMIT(PPC_RAW_ORIS(r_A, r_A, IMM_H(K))); - break; - case BPF_ANC | SKF_AD_ALU_XOR_X: - case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_XOR(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ - if (IMM_L(K)) - EMIT(PPC_RAW_XORI(r_A, r_A, IMM_L(K))); - if (K >= 65536) - EMIT(PPC_RAW_XORIS(r_A, r_A, IMM_H(K))); - break; - case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_SLW(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_LSH | BPF_K: - if (K == 0) - break; - else - EMIT(PPC_RAW_SLWI(r_A, r_A, K)); - break; - case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_SRW(r_A, r_A, r_X)); - break; - case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */ - if (K == 0) - break; - else - EMIT(PPC_RAW_SRWI(r_A, r_A, K)); - break; - case BPF_ALU | BPF_NEG: - EMIT(PPC_RAW_NEG(r_A, r_A)); - break; - case BPF_RET | BPF_K: - PPC_LI32(r_ret, K); - if (!K) { - if (ctx->pc_ret0 == -1) - ctx->pc_ret0 = i; - } - /* - * If this isn't the very last instruction, branch to - * the epilogue if we've stuff to clean up. Otherwise, - * if there's nothing to tidy, just return. If we /are/ - * the last instruction, we're about to fall through to - * the epilogue to return. - */ - if (i != flen - 1) { - /* - * Note: 'seen' is properly valid only on pass - * #2. Both parts of this conditional are the - * same instruction size though, meaning the - * first pass will still correctly determine the - * code size/addresses. - */ - if (ctx->seen) - PPC_JMP(exit_addr); - else - EMIT(PPC_RAW_BLR()); - } - break; - case BPF_RET | BPF_A: - EMIT(PPC_RAW_MR(r_ret, r_A)); - if (i != flen - 1) { - if (ctx->seen) - PPC_JMP(exit_addr); - else - EMIT(PPC_RAW_BLR()); - } - break; - case BPF_MISC | BPF_TAX: /* X = A */ - EMIT(PPC_RAW_MR(r_X, r_A)); - break; - case BPF_MISC | BPF_TXA: /* A = X */ - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_MR(r_A, r_X)); - break; - - /*** Constant loads/M[] access ***/ - case BPF_LD | BPF_IMM: /* A = K */ - PPC_LI32(r_A, K); - break; - case BPF_LDX | BPF_IMM: /* X = K */ - PPC_LI32(r_X, K); - break; - case BPF_LD | BPF_MEM: /* A = mem[K] */ - EMIT(PPC_RAW_MR(r_A, r_M + (K & 0xf))); - ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); - break; - case BPF_LDX | BPF_MEM: /* X = mem[K] */ - EMIT(PPC_RAW_MR(r_X, r_M + (K & 0xf))); - ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); - break; - case BPF_ST: /* mem[K] = A */ - EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_A)); - ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); - break; - case BPF_STX: /* mem[K] = X */ - EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_X)); - ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf)); - break; - case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */ - BUILD_BUG_ON(sizeof_field(struct sk_buff, len) != 4); - PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len)); - break; - case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */ - PPC_LWZ_OFFS(r_A, r_skb, K); - break; - case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */ - PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len)); - break; - - /*** Ancillary info loads ***/ - case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */ - BUILD_BUG_ON(sizeof_field(struct sk_buff, - protocol) != 2); - PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff, - protocol)); - break; - case BPF_ANC | SKF_AD_IFINDEX: - case BPF_ANC | SKF_AD_HATYPE: - BUILD_BUG_ON(sizeof_field(struct net_device, - ifindex) != 4); - BUILD_BUG_ON(sizeof_field(struct net_device, - type) != 2); - PPC_LL_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, - dev)); - EMIT(PPC_RAW_CMPDI(r_scratch1, 0)); - if (ctx->pc_ret0 != -1) { - PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); - } else { - /* Exit, returning 0; first pass hits here. */ - PPC_BCC_SHORT(COND_NE, ctx->idx * 4 + 12); - EMIT(PPC_RAW_LI(r_ret, 0)); - PPC_JMP(exit_addr); - } - if (code == (BPF_ANC | SKF_AD_IFINDEX)) { - PPC_LWZ_OFFS(r_A, r_scratch1, - offsetof(struct net_device, ifindex)); - } else { - PPC_LHZ_OFFS(r_A, r_scratch1, - offsetof(struct net_device, type)); - } - - break; - case BPF_ANC | SKF_AD_MARK: - BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4); - PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, - mark)); - break; - case BPF_ANC | SKF_AD_RXHASH: - BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4); - PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, - hash)); - break; - case BPF_ANC | SKF_AD_VLAN_TAG: - BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2); - - PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, - vlan_tci)); - break; - case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: - PPC_LBZ_OFFS(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET()); - if (PKT_VLAN_PRESENT_BIT) - EMIT(PPC_RAW_SRWI(r_A, r_A, PKT_VLAN_PRESENT_BIT)); - if (PKT_VLAN_PRESENT_BIT < 7) - EMIT(PPC_RAW_ANDI(r_A, r_A, 1)); - break; - case BPF_ANC | SKF_AD_QUEUE: - BUILD_BUG_ON(sizeof_field(struct sk_buff, - queue_mapping) != 2); - PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, - queue_mapping)); - break; - case BPF_ANC | SKF_AD_PKTTYPE: - PPC_LBZ_OFFS(r_A, r_skb, PKT_TYPE_OFFSET()); - EMIT(PPC_RAW_ANDI(r_A, r_A, PKT_TYPE_MAX)); - EMIT(PPC_RAW_SRWI(r_A, r_A, 5)); - break; - case BPF_ANC | SKF_AD_CPU: - PPC_BPF_LOAD_CPU(r_A); - break; - /*** Absolute loads from packet header/data ***/ - case BPF_LD | BPF_W | BPF_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_word); - goto common_load; - case BPF_LD | BPF_H | BPF_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_half); - goto common_load; - case BPF_LD | BPF_B | BPF_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_byte); - common_load: - /* Load from [K]. */ - ctx->seen |= SEEN_DATAREF; - PPC_FUNC_ADDR(r_scratch1, func); - EMIT(PPC_RAW_MTLR(r_scratch1)); - PPC_LI32(r_addr, K); - EMIT(PPC_RAW_BLRL()); - /* - * Helper returns 'lt' condition on error, and an - * appropriate return value in r3 - */ - PPC_BCC(COND_LT, exit_addr); - break; - - /*** Indirect loads from packet header/data ***/ - case BPF_LD | BPF_W | BPF_IND: - func = sk_load_word; - goto common_load_ind; - case BPF_LD | BPF_H | BPF_IND: - func = sk_load_half; - goto common_load_ind; - case BPF_LD | BPF_B | BPF_IND: - func = sk_load_byte; - common_load_ind: - /* - * Load from [X + K]. Negative offsets are tested for - * in the helper functions. - */ - ctx->seen |= SEEN_DATAREF | SEEN_XREG; - PPC_FUNC_ADDR(r_scratch1, func); - EMIT(PPC_RAW_MTLR(r_scratch1)); - EMIT(PPC_RAW_ADDI(r_addr, r_X, IMM_L(K))); - if (K >= 32768) - EMIT(PPC_RAW_ADDIS(r_addr, r_addr, IMM_HA(K))); - EMIT(PPC_RAW_BLRL()); - /* If error, cr0.LT set */ - PPC_BCC(COND_LT, exit_addr); - break; - - case BPF_LDX | BPF_B | BPF_MSH: - func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh); - goto common_load; - break; - - /*** Jump and branches ***/ - case BPF_JMP | BPF_JA: - if (K != 0) - PPC_JMP(addrs[i + 1 + K]); - break; - - case BPF_JMP | BPF_JGT | BPF_K: - case BPF_JMP | BPF_JGT | BPF_X: - true_cond = COND_GT; - goto cond_branch; - case BPF_JMP | BPF_JGE | BPF_K: - case BPF_JMP | BPF_JGE | BPF_X: - true_cond = COND_GE; - goto cond_branch; - case BPF_JMP | BPF_JEQ | BPF_K: - case BPF_JMP | BPF_JEQ | BPF_X: - true_cond = COND_EQ; - goto cond_branch; - case BPF_JMP | BPF_JSET | BPF_K: - case BPF_JMP | BPF_JSET | BPF_X: - true_cond = COND_NE; - cond_branch: - /* same targets, can avoid doing the test :) */ - if (filter[i].jt == filter[i].jf) { - if (filter[i].jt > 0) - PPC_JMP(addrs[i + 1 + filter[i].jt]); - break; - } - - switch (code) { - case BPF_JMP | BPF_JGT | BPF_X: - case BPF_JMP | BPF_JGE | BPF_X: - case BPF_JMP | BPF_JEQ | BPF_X: - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_CMPLW(r_A, r_X)); - break; - case BPF_JMP | BPF_JSET | BPF_X: - ctx->seen |= SEEN_XREG; - EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A, r_X)); - break; - case BPF_JMP | BPF_JEQ | BPF_K: - case BPF_JMP | BPF_JGT | BPF_K: - case BPF_JMP | BPF_JGE | BPF_K: - if (K < 32768) - EMIT(PPC_RAW_CMPLWI(r_A, K)); - else { - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_CMPLW(r_A, r_scratch1)); - } - break; - case BPF_JMP | BPF_JSET | BPF_K: - if (K < 32768) - /* PPC_ANDI is /only/ dot-form */ - EMIT(PPC_RAW_ANDI(r_scratch1, r_A, K)); - else { - PPC_LI32(r_scratch1, K); - EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A, - r_scratch1)); - } - break; - } - /* Sometimes branches are constructed "backward", with - * the false path being the branch and true path being - * a fallthrough to the next instruction. - */ - if (filter[i].jt == 0) - /* Swap the sense of the branch */ - PPC_BCC(true_cond ^ COND_CMP_TRUE, - addrs[i + 1 + filter[i].jf]); - else { - PPC_BCC(true_cond, addrs[i + 1 + filter[i].jt]); - if (filter[i].jf != 0) - PPC_JMP(addrs[i + 1 + filter[i].jf]); - } - break; - default: - /* The filter contains something cruel & unusual. - * We don't handle it, but also there shouldn't be - * anything missing from our list. - */ - if (printk_ratelimit()) - pr_err("BPF filter opcode %04x (@%d) unsupported\n", - filter[i].code, i); - return -ENOTSUPP; - } - - } - /* Set end-of-body-code address for exit. */ - addrs[i] = ctx->idx * 4; return 0; } -void bpf_jit_compile(struct bpf_prog *fp) +struct powerpc64_jit_data { + struct bpf_binary_header *header; + u32 *addrs; + u8 *image; + u32 proglen; + struct codegen_context ctx; +}; + +bool bpf_jit_needs_zext(void) { - unsigned int proglen; - unsigned int alloclen; - u32 *image = NULL; + return true; +} + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) +{ + u32 proglen; + u32 alloclen; + u8 *image = NULL; u32 *code_base; - unsigned int *addrs; + u32 *addrs; + struct powerpc64_jit_data *jit_data; struct codegen_context cgctx; int pass; - int flen = fp->len; + int flen; + struct bpf_binary_header *bpf_hdr; + struct bpf_prog *org_fp = fp; + struct bpf_prog *tmp_fp; + bool bpf_blinded = false; + bool extra_pass = false; - if (!bpf_jit_enable) - return; + if (!fp->jit_requested) + return org_fp; + + tmp_fp = bpf_jit_blind_constants(org_fp); + if (IS_ERR(tmp_fp)) + return org_fp; + + if (tmp_fp != org_fp) { + bpf_blinded = true; + fp = tmp_fp; + } + + jit_data = fp->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + fp = org_fp; + goto out; + } + fp->aux->jit_data = jit_data; + } + + flen = fp->len; + addrs = jit_data->addrs; + if (addrs) { + cgctx = jit_data->ctx; + image = jit_data->image; + bpf_hdr = jit_data->header; + proglen = jit_data->proglen; + alloclen = proglen + FUNCTION_DESCR_SIZE; + extra_pass = true; + goto skip_init_ctx; + } addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL); - if (addrs == NULL) - return; + if (addrs == NULL) { + fp = org_fp; + goto out_addrs; + } + + memset(&cgctx, 0, sizeof(struct codegen_context)); + memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p)); + + /* Make sure that the stack is quadword aligned. */ + cgctx.stack_size = round_up(fp->aux->stack_depth, 16); + + /* Scouting faux-generate pass 0 */ + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { + /* We hit something illegal or unsupported. */ + fp = org_fp; + goto out_addrs; + } /* - * There are multiple assembly passes as the generated code will change - * size as it settles down, figuring out the max branch offsets/exit - * paths required. - * - * The range of standard conditional branches is +/- 32Kbytes. Since - * BPF_MAXINSNS = 4096, we can only jump from (worst case) start to - * finish with 8 bytes/instruction. Not feasible, so long jumps are - * used, distinct from short branches. - * - * Current: - * - * For now, both branch types assemble to 2 words (short branches padded - * with a NOP); this is less efficient, but assembly will always complete - * after exactly 3 passes: - * - * First pass: No code buffer; Program is "faux-generated" -- no code - * emitted but maximum size of output determined (and addrs[] filled - * in). Also, we note whether we use M[], whether we use skb data, etc. - * All generation choices assumed to be 'worst-case', e.g. branches all - * far (2 instructions), return path code reduction not available, etc. - * - * Second pass: Code buffer allocated with size determined previously. - * Prologue generated to support features we have seen used. Exit paths - * determined and addrs[] is filled in again, as code may be slightly - * smaller as a result. - * - * Third pass: Code generated 'for real', and branch destinations - * determined from now-accurate addrs[] map. - * - * Ideal: - * - * If we optimise this, near branches will be shorter. On the - * first assembly pass, we should err on the side of caution and - * generate the biggest code. On subsequent passes, branches will be - * generated short or long and code size will reduce. With smaller - * code, more branches may fall into the short category, and code will - * reduce more. - * - * Finally, if we see one pass generate code the same size as the - * previous pass we have converged and should now generate code for - * real. Allocating at the end will also save the memory that would - * otherwise be wasted by the (small) current code shrinkage. - * Preferably, we should do a small number of passes (e.g. 5) and if we - * haven't converged by then, get impatient and force code to generate - * as-is, even if the odd branch would be left long. The chances of a - * long jump are tiny with all but the most enormous of BPF filter - * inputs, so we should usually converge on the third pass. + * If we have seen a tail call, we need a second pass. + * This is because bpf_jit_emit_common_epilogue() is called + * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. */ + if (cgctx.seen & SEEN_TAILCALL) { + cgctx.idx = 0; + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { + fp = org_fp; + goto out_addrs; + } + } - cgctx.idx = 0; - cgctx.seen = 0; - cgctx.pc_ret0 = -1; - /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) - /* We hit something illegal or unsupported. */ - goto out; - + bpf_jit_realloc_regs(&cgctx); /* * Pretend to build prologue, given the features we've seen. This will * update ctgtx.idx as it pretends to output instructions, then we can * calculate total size from idx. */ - bpf_jit_build_prologue(fp, 0, &cgctx); + bpf_jit_build_prologue(0, &cgctx); bpf_jit_build_epilogue(0, &cgctx); proglen = cgctx.idx * 4; alloclen = proglen + FUNCTION_DESCR_SIZE; - image = module_alloc(alloclen); - if (!image) - goto out; - code_base = image + (FUNCTION_DESCR_SIZE/4); + bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns); + if (!bpf_hdr) { + fp = org_fp; + goto out_addrs; + } + +skip_init_ctx: + code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); + + if (extra_pass) { + /* + * Do not touch the prologue and epilogue as they will remain + * unchanged. Only fix the branch target address for subprog + * calls in the body. + * + * This does not change the offsets and lengths of the subprog + * call instruction sequences and hence, the size of the JITed + * image as well. + */ + bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); + + /* There is no need to perform the usual passes. */ + goto skip_codegen_passes; + } /* Code generation passes 1-2 */ for (pass = 1; pass < 3; pass++) { /* Now build the prologue, body code & epilogue for real. */ cgctx.idx = 0; - bpf_jit_build_prologue(fp, code_base, &cgctx); - bpf_jit_build_body(fp, code_base, &cgctx, addrs); + bpf_jit_build_prologue(code_base, &cgctx); + bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass); bpf_jit_build_epilogue(code_base, &cgctx); if (bpf_jit_enable > 1) @@ -652,15 +218,15 @@ void bpf_jit_compile(struct bpf_prog *fp) proglen - (cgctx.idx * 4), cgctx.seen); } +skip_codegen_passes: if (bpf_jit_enable > 1) - /* Note that we output the base address of the code_base + /* + * Note that we output the base address of the code_base * rather than image, since opcodes are in code_base. */ bpf_jit_dump(flen, proglen, pass, code_base); - bpf_flush_icache(code_base, code_base + (proglen/4)); - -#ifdef CONFIG_PPC64 +#ifdef PPC64_ELF_ABI_v1 /* Function descriptor nastiness: Address + TOC */ ((u64 *)image)[0] = (u64)code_base; ((u64 *)image)[1] = local_paca->kernel_toc; @@ -668,16 +234,38 @@ void bpf_jit_compile(struct bpf_prog *fp) fp->bpf_func = (void *)image; fp->jited = 1; + fp->jited_len = alloclen; + + bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); + if (!fp->is_func || extra_pass) { + bpf_prog_fill_jited_linfo(fp, addrs); +out_addrs: + kfree(addrs); + kfree(jit_data); + fp->aux->jit_data = NULL; + } else { + jit_data->addrs = addrs; + jit_data->ctx = cgctx; + jit_data->proglen = proglen; + jit_data->image = image; + jit_data->header = bpf_hdr; + } out: - kfree(addrs); - return; + if (bpf_blinded) + bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); + + return fp; } +/* Overriding bpf_jit_free() as we don't set images read-only. */ void bpf_jit_free(struct bpf_prog *fp) { + unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; + struct bpf_binary_header *bpf_hdr = (void *)addr; + if (fp->jited) - module_memfree(fp->bpf_func); + bpf_jit_binary_free(bpf_hdr); bpf_prog_unlock_free(fp); } diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c new file mode 100644 index 000000000000..bbb16099e8c7 --- /dev/null +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -0,0 +1,1100 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * eBPF JIT compiler for PPC32 + * + * Copyright 2020 Christophe Leroy + * CS GROUP France + * + * Based on PPC64 eBPF JIT compiler by Naveen N. Rao + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_jit.h" + +/* + * Stack layout: + * + * [ prev sp ] <------------- + * [ nv gpr save area ] 16 * 4 | + * fp (r31) --> [ ebpf stack space ] upto 512 | + * [ frame header ] 16 | + * sp (r1) ---> [ stack pointer ] -------------- + */ + +/* for gpr non volatile registers r17 to r31 (14) + tail call */ +#define BPF_PPC_STACK_SAVE (15 * 4 + 4) +/* stack frame, ensure this is quadword aligned */ +#define BPF_PPC_STACKFRAME(ctx) (STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_SAVE + (ctx)->stack_size) + +/* BPF register usage */ +#define TMP_REG (MAX_BPF_JIT_REG + 0) + +/* BPF to ppc register mappings */ +const int b2p[MAX_BPF_JIT_REG + 1] = { + /* function return value */ + [BPF_REG_0] = 12, + /* function arguments */ + [BPF_REG_1] = 4, + [BPF_REG_2] = 6, + [BPF_REG_3] = 8, + [BPF_REG_4] = 10, + [BPF_REG_5] = 22, + /* non volatile registers */ + [BPF_REG_6] = 24, + [BPF_REG_7] = 26, + [BPF_REG_8] = 28, + [BPF_REG_9] = 30, + /* frame pointer aka BPF_REG_10 */ + [BPF_REG_FP] = 18, + /* eBPF jit internal registers */ + [BPF_REG_AX] = 20, + [TMP_REG] = 31, /* 32 bits */ +}; + +static int bpf_to_ppc(struct codegen_context *ctx, int reg) +{ + return ctx->b2p[reg]; +} + +/* PPC NVR range -- update this if we ever use NVRs below r17 */ +#define BPF_PPC_NVR_MIN 17 +#define BPF_PPC_TC 16 + +static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) +{ + if ((reg >= BPF_PPC_NVR_MIN && reg < 32) || reg == BPF_PPC_TC) + return BPF_PPC_STACKFRAME(ctx) - 4 * (32 - reg); + + WARN(true, "BPF JIT is asking about unknown registers, will crash the stack"); + /* Use the hole we have left for alignment */ + return BPF_PPC_STACKFRAME(ctx) - 4; +} + +void bpf_jit_realloc_regs(struct codegen_context *ctx) +{ + if (ctx->seen & SEEN_FUNC) + return; + + while (ctx->seen & SEEN_NVREG_MASK && + (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) { + int old = 32 - fls(ctx->seen & (SEEN_NVREG_MASK & 0xaaaaaaab)); + int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa)); + int i; + + for (i = BPF_REG_0; i <= TMP_REG; i++) { + if (ctx->b2p[i] != old) + continue; + ctx->b2p[i] = new; + bpf_set_seen_register(ctx, new); + bpf_clear_seen_register(ctx, old); + if (i != TMP_REG) { + bpf_set_seen_register(ctx, new - 1); + bpf_clear_seen_register(ctx, old - 1); + } + break; + } + } +} + +void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) +{ + int i; + + /* First arg comes in as a 32 bits pointer. */ + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), __REG_R3)); + EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_1) - 1, 0)); + EMIT(PPC_RAW_STWU(__REG_R1, __REG_R1, -BPF_PPC_STACKFRAME(ctx))); + + /* + * Initialize tail_call_cnt in stack frame if we do tail calls. + * Otherwise, put in NOPs so that it can be skipped when we are + * invoked through a tail call. + */ + if (ctx->seen & SEEN_TAILCALL) { + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + } else { + EMIT(PPC_RAW_NOP()); + } + +#define BPF_TAILCALL_PROLOGUE_SIZE 16 + + /* + * We need a stack frame, but we don't necessarily need to + * save/restore LR unless we call other functions + */ + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_MFLR(__REG_R0)); + + /* + * Back up non-volatile regs -- registers r18-r31 + */ + for (i = BPF_PPC_NVR_MIN; i <= 31; i++) + if (bpf_is_seen_register(ctx, i)) + EMIT(PPC_RAW_STW(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i))); + + /* If needed retrieve arguments 9 and 10, ie 5th 64 bits arg.*/ + if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { + EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 8); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 12); + } + + /* Setup frame pointer to point to the bpf stack area */ + if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP))) { + EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_FP) - 1, 0)); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), __REG_R1, + STACK_FRAME_MIN_SIZE + ctx->stack_size)); + } + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); +} + +static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) +{ + int i; + + /* Restore NVRs */ + for (i = BPF_PPC_NVR_MIN; i <= 31; i++) + if (bpf_is_seen_register(ctx, i)) + EMIT(PPC_RAW_LWZ(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i))); +} + +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) +{ + EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_0))); + + bpf_jit_emit_common_epilogue(image, ctx); + + /* Tear down our stack frame */ + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + + EMIT(PPC_RAW_ADDI(__REG_R1, __REG_R1, BPF_PPC_STACKFRAME(ctx))); + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_MTLR(__REG_R0)); + + EMIT(PPC_RAW_BLR()); +} + +void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) +{ + s32 rel = (s32)func - (s32)(image + ctx->idx); + + if (image && rel < 0x2000000 && rel >= -0x2000000) { + PPC_BL_ABS(func); + } else { + /* Load function address into r0 */ + EMIT(PPC_RAW_LIS(__REG_R0, IMM_H(func))); + EMIT(PPC_RAW_ORI(__REG_R0, __REG_R0, IMM_L(func))); + EMIT(PPC_RAW_MTLR(__REG_R0)); + EMIT(PPC_RAW_BLRL()); + } +} + +static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) +{ + /* + * By now, the eBPF program has already setup parameters in r3-r6 + * r3-r4/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program + * r5-r6/BPF_REG_2 - pointer to bpf_array + * r7-r8/BPF_REG_3 - index in bpf_array + */ + int b2p_bpf_array = bpf_to_ppc(ctx, BPF_REG_2); + int b2p_index = bpf_to_ppc(ctx, BPF_REG_3); + + /* + * if (index >= array->map.max_entries) + * goto out; + */ + EMIT(PPC_RAW_LWZ(__REG_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); + EMIT(PPC_RAW_CMPLW(b2p_index, __REG_R0)); + EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + PPC_BCC(COND_GE, out); + + /* + * if (tail_call_cnt > MAX_TAIL_CALL_CNT) + * goto out; + */ + EMIT(PPC_RAW_CMPLWI(__REG_R0, MAX_TAIL_CALL_CNT)); + /* tail_call_cnt++; */ + EMIT(PPC_RAW_ADDIC(__REG_R0, __REG_R0, 1)); + PPC_BCC(COND_GT, out); + + /* prog = array->ptrs[index]; */ + EMIT(PPC_RAW_RLWINM(__REG_R3, b2p_index, 2, 0, 29)); + EMIT(PPC_RAW_ADD(__REG_R3, __REG_R3, b2p_bpf_array)); + EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_array, ptrs))); + EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + + /* + * if (prog == NULL) + * goto out; + */ + EMIT(PPC_RAW_CMPLWI(__REG_R3, 0)); + PPC_BCC(COND_EQ, out); + + /* goto *(prog->bpf_func + prologue_size); */ + EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_prog, bpf_func))); + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + + EMIT(PPC_RAW_ADDIC(__REG_R3, __REG_R3, BPF_TAILCALL_PROLOGUE_SIZE)); + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_MTLR(__REG_R0)); + + EMIT(PPC_RAW_MTCTR(__REG_R3)); + + EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_1))); + + /* tear restore NVRs, ... */ + bpf_jit_emit_common_epilogue(image, ctx); + + EMIT(PPC_RAW_BCTR()); + /* out: */ +} + +/* Assemble the body code between the prologue & epilogue */ +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, + u32 *addrs, bool extra_pass) +{ + const struct bpf_insn *insn = fp->insnsi; + int flen = fp->len; + int i, ret; + + /* Start of epilogue code - will only be valid 2nd pass onwards */ + u32 exit_addr = addrs[flen]; + + for (i = 0; i < flen; i++) { + u32 code = insn[i].code; + u32 dst_reg = bpf_to_ppc(ctx, insn[i].dst_reg); + u32 dst_reg_h = dst_reg - 1; + u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg); + u32 src_reg_h = src_reg - 1; + u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG); + s16 off = insn[i].off; + s32 imm = insn[i].imm; + bool func_addr_fixed; + u64 func_addr; + u32 true_cond; + + /* + * addrs[] maps a BPF bytecode address into a real offset from + * the start of the body code. + */ + addrs[i] = ctx->idx * 4; + + /* + * As an optimization, we note down which registers + * are used so that we can only save/restore those in our + * prologue and epilogue. We do this here regardless of whether + * the actual BPF instruction uses src/dst registers or not + * (for instance, BPF_CALL does not use them). The expectation + * is that those instructions will have src_reg/dst_reg set to + * 0. Even otherwise, we just lose some prologue/epilogue + * optimization but everything else should work without + * any issues. + */ + if (dst_reg >= 3 && dst_reg < 32) { + bpf_set_seen_register(ctx, dst_reg); + bpf_set_seen_register(ctx, dst_reg_h); + } + + if (src_reg >= 3 && src_reg < 32) { + bpf_set_seen_register(ctx, src_reg); + bpf_set_seen_register(ctx, src_reg_h); + } + + switch (code) { + /* + * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG + */ + case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */ + EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */ + EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_ADDE(dst_reg_h, dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */ + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */ + EMIT(PPC_RAW_SUBFC(dst_reg, src_reg, dst_reg)); + EMIT(PPC_RAW_SUBFE(dst_reg_h, src_reg_h, dst_reg_h)); + break; + case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ + imm = -imm; + fallthrough; + case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ + if (IMM_HA(imm) & 0xffff) + EMIT(PPC_RAW_ADDIS(dst_reg, dst_reg, IMM_HA(imm))); + if (IMM_L(imm)) + EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); + break; + case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ + imm = -imm; + fallthrough; + case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ + if (!imm) + break; + + if (imm >= -32768 && imm < 32768) { + EMIT(PPC_RAW_ADDIC(dst_reg, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, __REG_R0)); + } + if (imm >= 0) + EMIT(PPC_RAW_ADDZE(dst_reg_h, dst_reg_h)); + else + EMIT(PPC_RAW_ADDME(dst_reg_h, dst_reg_h)); + break; + case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_MULW(__REG_R0, dst_reg, src_reg_h)); + EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_MULHWU(tmp_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, tmp_reg)); + break; + case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */ + if (imm >= -32768 && imm < 32768) { + EMIT(PPC_RAW_MULI(dst_reg, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, __REG_R0)); + } + break; + case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ + if (!imm) { + PPC_LI32(dst_reg, 0); + PPC_LI32(dst_reg_h, 0); + break; + } + if (imm == 1) + break; + if (imm == -1) { + EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); + EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h)); + break; + } + bpf_set_seen_register(ctx, tmp_reg); + PPC_LI32(tmp_reg, imm); + EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, tmp_reg)); + if (imm < 0) + EMIT(PPC_RAW_SUB(dst_reg_h, dst_reg_h, dst_reg)); + EMIT(PPC_RAW_MULHWU(__REG_R0, dst_reg, tmp_reg)); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp_reg)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0)); + break; + case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ + EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(__REG_R0, src_reg, __REG_R0)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0)); + break; + case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ + return -EOPNOTSUPP; + case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ + return -EOPNOTSUPP; + case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ + if (!imm) + return -EINVAL; + if (imm == 1) + break; + + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, __REG_R0)); + break; + case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ + if (!imm) + return -EINVAL; + + if (!is_power_of_2((u32)imm)) { + bpf_set_seen_register(ctx, tmp_reg); + PPC_LI32(tmp_reg, imm); + EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, tmp_reg)); + EMIT(PPC_RAW_MULW(__REG_R0, tmp_reg, __REG_R0)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0)); + break; + } + if (imm == 1) + EMIT(PPC_RAW_LI(dst_reg, 0)); + else + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2((u32)imm), 31)); + + break; + case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */ + if (!imm) + return -EINVAL; + if (imm < 0) + imm = -imm; + if (!is_power_of_2(imm)) + return -EOPNOTSUPP; + if (imm == 1) + EMIT(PPC_RAW_LI(dst_reg, 0)); + else + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2(imm), 31)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ + if (!imm) + return -EINVAL; + if (!is_power_of_2(abs(imm))) + return -EOPNOTSUPP; + + if (imm < 0) { + EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); + EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h)); + imm = -imm; + } + if (imm == 1) + break; + imm = ilog2(imm); + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31)); + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, imm)); + break; + case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */ + EMIT(PPC_RAW_NEG(dst_reg, dst_reg)); + break; + case BPF_ALU64 | BPF_NEG: /* dst = -dst */ + EMIT(PPC_RAW_SUBFIC(dst_reg, dst_reg, 0)); + EMIT(PPC_RAW_SUBFZE(dst_reg_h, dst_reg_h)); + break; + + /* + * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH + */ + case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */ + EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_AND(dst_reg_h, dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */ + EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ + if (imm >= 0) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + fallthrough; + case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */ + if (!IMM_H(imm)) { + EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm))); + } else if (!IMM_L(imm)) { + EMIT(PPC_RAW_ANDIS(dst_reg, dst_reg, IMM_H(imm))); + } else if (imm == (((1 << fls(imm)) - 1) ^ ((1 << (ffs(i) - 1)) - 1))) { + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, + 32 - fls(imm), 32 - ffs(imm))); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_AND(dst_reg, dst_reg, __REG_R0)); + } + break; + case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ + EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ + EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ + /* Sign-extended */ + if (imm < 0) + EMIT(PPC_RAW_LI(dst_reg_h, -1)); + fallthrough; + case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */ + if (IMM_L(imm)) + EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm))); + if (IMM_H(imm)) + EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm))); + break; + case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */ + if (dst_reg == src_reg) { + EMIT(PPC_RAW_LI(dst_reg, 0)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + } else { + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_XOR(dst_reg_h, dst_reg_h, src_reg_h)); + } + break; + case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */ + if (dst_reg == src_reg) + EMIT(PPC_RAW_LI(dst_reg, 0)); + else + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ + if (imm < 0) + EMIT(PPC_RAW_NOR(dst_reg_h, dst_reg_h, dst_reg_h)); + fallthrough; + case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */ + if (IMM_L(imm)) + EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm))); + if (IMM_H(imm)) + EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm))); + break; + case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ + EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); + EMIT(PPC_RAW_SRW(__REG_R0, dst_reg, __REG_R0)); + EMIT(PPC_RAW_SLW(tmp_reg, dst_reg, tmp_reg)); + EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, tmp_reg)); + break; + case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<= (u32) imm */ + if (!imm) + break; + EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm)); + break; + case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<= imm */ + if (imm < 0) + return -EINVAL; + if (!imm) + break; + if (imm < 32) { + EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, imm, 0, 31 - imm)); + EMIT(PPC_RAW_RLWIMI(dst_reg_h, dst_reg, imm, 32 - imm, 31)); + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, imm, 0, 31 - imm)); + break; + } + if (imm < 64) + EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg, imm, 0, 31 - imm)); + else + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + EMIT(PPC_RAW_LI(dst_reg, 0)); + break; + case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ + EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); + EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_SRW(tmp_reg, dst_reg_h, tmp_reg)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0)); + EMIT(PPC_RAW_SRW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg)); + break; + case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ + if (!imm) + break; + EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm)); + break; + case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ + if (imm < 0) + return -EINVAL; + if (!imm) + break; + if (imm < 32) { + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31)); + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1)); + EMIT(PPC_RAW_RLWINM(dst_reg_h, dst_reg_h, 32 - imm, imm, 31)); + break; + } + if (imm < 64) + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg_h, 64 - imm, imm - 32, 31)); + else + EMIT(PPC_RAW_LI(dst_reg, 0)); + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */ + EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0)); + EMIT(PPC_RAW_RLWINM(__REG_R0, tmp_reg, 0, 26, 26)); + EMIT(PPC_RAW_SRAW(tmp_reg, dst_reg_h, tmp_reg)); + EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg_h, src_reg)); + EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, __REG_R0)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg)); + break; + case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */ + if (!imm) + break; + EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm)); + break; + case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */ + if (imm < 0) + return -EINVAL; + if (!imm) + break; + if (imm < 32) { + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 32 - imm, imm, 31)); + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg_h, 32 - imm, 0, imm - 1)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, imm)); + break; + } + if (imm < 64) + EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg_h, imm - 32)); + else + EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg_h, 31)); + EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg_h, 31)); + break; + + /* + * MOV + */ + case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ + if (dst_reg == src_reg) + break; + EMIT(PPC_RAW_MR(dst_reg, src_reg)); + EMIT(PPC_RAW_MR(dst_reg_h, src_reg_h)); + break; + case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */ + /* special mov32 for zext */ + if (imm == 1) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + else if (dst_reg != src_reg) + EMIT(PPC_RAW_MR(dst_reg, src_reg)); + break; + case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */ + PPC_LI32(dst_reg, imm); + PPC_EX32(dst_reg_h, imm); + break; + case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ + PPC_LI32(dst_reg, imm); + break; + + /* + * BPF_FROM_BE/LE + */ + case BPF_ALU | BPF_END | BPF_FROM_LE: + switch (imm) { + case 16: + /* Copy 16 bits to upper part */ + EMIT(PPC_RAW_RLWIMI(dst_reg, dst_reg, 16, 0, 15)); + /* Rotate 8 bits right & mask */ + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 24, 16, 31)); + break; + case 32: + /* + * Rotate word left by 8 bits: + * 2 bytes are already in their final position + * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) + */ + EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg, 8, 0, 31)); + /* Rotate 24 bits and insert byte 1 */ + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 0, 7)); + /* Rotate 24 bits and insert byte 3 */ + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, __REG_R0)); + break; + case 64: + bpf_set_seen_register(ctx, tmp_reg); + EMIT(PPC_RAW_RLWINM(tmp_reg, dst_reg, 8, 0, 31)); + EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg_h, 8, 0, 31)); + /* Rotate 24 bits and insert byte 1 */ + EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 0, 7)); + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 0, 7)); + /* Rotate 24 bits and insert byte 3 */ + EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, __REG_R0)); + EMIT(PPC_RAW_MR(dst_reg_h, tmp_reg)); + break; + } + break; + case BPF_ALU | BPF_END | BPF_FROM_BE: + switch (imm) { + case 16: + /* zero-extend 16 bits into 32 bits */ + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 16, 31)); + break; + case 32: + case 64: + /* nop */ + break; + } + break; + + /* + * BPF_ST(X) + */ + case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ + EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); + break; + case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STB(__REG_R0, dst_reg, off)); + break; + case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ + EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); + break; + case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STH(__REG_R0, dst_reg, off)); + break; + case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ + EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); + break; + case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off)); + break; + case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ + EMIT(PPC_RAW_STW(src_reg_h, dst_reg, off)); + EMIT(PPC_RAW_STW(src_reg, dst_reg, off + 4)); + break; + case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off + 4)); + PPC_EX32(__REG_R0, imm); + EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off)); + break; + + /* + * BPF_STX XADD (atomic_add) + */ + case BPF_STX | BPF_XADD | BPF_W: /* *(u32 *)(dst + off) += src */ + bpf_set_seen_register(ctx, tmp_reg); + /* Get offset into TMP_REG */ + EMIT(PPC_RAW_LI(tmp_reg, off)); + /* load value from memory into r0 */ + EMIT(PPC_RAW_LWARX(__REG_R0, tmp_reg, dst_reg, 0)); + /* add value from src_reg into this */ + EMIT(PPC_RAW_ADD(__REG_R0, __REG_R0, src_reg)); + /* store result back */ + EMIT(PPC_RAW_STWCX(__REG_R0, tmp_reg, dst_reg)); + /* we're done if this succeeded */ + PPC_BCC_SHORT(COND_NE, (ctx->idx - 3) * 4); + break; + + case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */ + return -EOPNOTSUPP; + + /* + * BPF_LDX + */ + case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */ + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ + EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off)); + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4)); + break; + + /* + * Doubleword load + * 16 byte instruction that uses two 'struct bpf_insn' + */ + case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ + PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm); + PPC_LI32(dst_reg, (u32)insn[i].imm); + /* Adjust for two bpf instructions */ + addrs[++i] = ctx->idx * 4; + break; + + /* + * Return/Exit + */ + case BPF_JMP | BPF_EXIT: + /* + * If this isn't the very last instruction, branch to + * the epilogue. If we _are_ the last instruction, + * we'll just fall through to the epilogue. + */ + if (i != flen - 1) + PPC_JMP(exit_addr); + /* else fall through to the epilogue */ + break; + + /* + * Call kernel helper or bpf function + */ + case BPF_JMP | BPF_CALL: + ctx->seen |= SEEN_FUNC; + + ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, + &func_addr, &func_addr_fixed); + if (ret < 0) + return ret; + + if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, 8)); + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, 12)); + } + + bpf_jit_emit_func_call_rel(image, ctx, func_addr); + + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, __REG_R3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), __REG_R4)); + break; + + /* + * Jumps and branches + */ + case BPF_JMP | BPF_JA: + PPC_JMP(addrs[i + 1 + off]); + break; + + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_X: + true_cond = COND_GT; + goto cond_branch; + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_X: + true_cond = COND_LT; + goto cond_branch; + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_X: + true_cond = COND_GE; + goto cond_branch; + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_X: + true_cond = COND_LE; + goto cond_branch; + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_X: + true_cond = COND_EQ; + goto cond_branch; + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_X: + true_cond = COND_NE; + goto cond_branch; + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_X: + true_cond = COND_NE; + /* fallthrough; */ + +cond_branch: + switch (code) { + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + /* unsigned comparison */ + EMIT(PPC_RAW_CMPLW(dst_reg_h, src_reg_h)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); + break; + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + /* unsigned comparison */ + EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); + break; + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_X: + /* signed comparison */ + EMIT(PPC_RAW_CMPW(dst_reg_h, src_reg_h)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); + break; + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: + /* signed comparison */ + EMIT(PPC_RAW_CMPW(dst_reg, src_reg)); + break; + case BPF_JMP | BPF_JSET | BPF_X: + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg_h, src_reg_h)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg)); + break; + case BPF_JMP32 | BPF_JSET | BPF_X: { + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg)); + break; + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JLE | BPF_K: + /* + * Need sign-extended load, so only positive + * values can be used as imm in cmplwi + */ + if (imm >= 0 && imm < 32768) { + EMIT(PPC_RAW_CMPLWI(dst_reg_h, 0)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); + } else { + /* sign-extending load ... but unsigned comparison */ + PPC_EX32(__REG_R0, imm); + EMIT(PPC_RAW_CMPLW(dst_reg_h, __REG_R0)); + PPC_LI32(__REG_R0, imm); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + } + break; + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + if (imm >= 0 && imm < 65536) { + EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + } + break; + } + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: + if (imm >= 0 && imm < 65536) { + EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); + } else { + /* sign-extending load */ + EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0)); + PPC_LI32(__REG_R0, imm); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + } + break; + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + /* + * signed comparison, so any 16-bit value + * can be used in cmpwi + */ + if (imm >= -32768 && imm < 32768) { + EMIT(PPC_RAW_CMPWI(dst_reg, imm)); + } else { + /* sign-extending load */ + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_CMPW(dst_reg, __REG_R0)); + } + break; + case BPF_JMP | BPF_JSET | BPF_K: + /* andi does not sign-extend the immediate */ + if (imm >= 0 && imm < 32768) { + /* PPC_ANDI is _only/always_ dot-form */ + EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + if (imm < 0) { + EMIT(PPC_RAW_CMPWI(dst_reg_h, 0)); + PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); + } + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0)); + } + break; + case BPF_JMP32 | BPF_JSET | BPF_K: + /* andi does not sign-extend the immediate */ + if (imm >= -32768 && imm < 32768) { + /* PPC_ANDI is _only/always_ dot-form */ + EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm)); + } else { + PPC_LI32(__REG_R0, imm); + EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0)); + } + break; + } + PPC_BCC(true_cond, addrs[i + 1 + off]); + break; + + /* + * Tail call + */ + case BPF_JMP | BPF_TAIL_CALL: + ctx->seen |= SEEN_TAILCALL; + bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); + break; + + default: + /* + * The filter contains something cruel & unusual. + * We don't handle it, but also there shouldn't be + * anything missing from our list. + */ + pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", code, i); + return -EOPNOTSUPP; + } + if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext && + !insn_is_zext(&insn[i + 1])) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + } + + /* Set end-of-body-code address for exit. */ + addrs[i] = ctx->idx * 4; + + return 0; +} diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index aaf1a887f653..57a8c1153851 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -18,27 +18,6 @@ #include "bpf_jit64.h" -static void bpf_jit_fill_ill_insns(void *area, unsigned int size) -{ - memset32(area, BREAKPOINT_INSTRUCTION, size/4); -} - -static inline void bpf_flush_icache(void *start, void *end) -{ - smp_wmb(); - flush_icache_range((unsigned long)start, (unsigned long)end); -} - -static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i) -{ - return (ctx->seen & (1 << (31 - b2p[i]))); -} - -static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) -{ - ctx->seen |= (1 << (31 - b2p[i])); -} - static inline bool bpf_has_stack_frame(struct codegen_context *ctx) { /* @@ -47,7 +26,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * - the bpf program uses its stack area * The latter condition is deduced from the usage of BPF_REG_FP */ - return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP); + return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, b2p[BPF_REG_FP]); } /* @@ -85,7 +64,11 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) BUG(); } -static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) +void bpf_jit_realloc_regs(struct codegen_context *ctx) +{ +} + +void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; @@ -124,11 +107,11 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * in the protected zone below the previous stack frame */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, i)) + if (bpf_is_seen_register(ctx, b2p[i])) PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); /* Setup frame pointer to point to the bpf stack area */ - if (bpf_is_seen_register(ctx, BPF_REG_FP)) + if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP])) EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1, STACK_FRAME_MIN_SIZE + ctx->stack_size)); } @@ -139,7 +122,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Restore NVRs */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, i)) + if (bpf_is_seen_register(ctx, b2p[i])) PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); /* Tear down our stack frame */ @@ -152,7 +135,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx } } -static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) { bpf_jit_emit_common_epilogue(image, ctx); @@ -187,8 +170,7 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, EMIT(PPC_RAW_BLRL()); } -static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, - u64 func) +void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) { unsigned int i, ctx_idx = ctx->idx; @@ -289,9 +271,8 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 } /* Assemble the body code between the prologue & epilogue */ -static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx, - u32 *addrs, bool extra_pass) +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, + u32 *addrs, bool extra_pass) { const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; @@ -330,9 +311,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, * any issues. */ if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32) - bpf_set_seen_register(ctx, insn[i].dst_reg); + bpf_set_seen_register(ctx, dst_reg); if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32) - bpf_set_seen_register(ctx, insn[i].src_reg); + bpf_set_seen_register(ctx, src_reg); switch (code) { /* @@ -1026,249 +1007,3 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, return 0; } - -/* Fix the branch target addresses for subprog calls */ -static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx, u32 *addrs) -{ - const struct bpf_insn *insn = fp->insnsi; - bool func_addr_fixed; - u64 func_addr; - u32 tmp_idx; - int i, ret; - - for (i = 0; i < fp->len; i++) { - /* - * During the extra pass, only the branch target addresses for - * the subprog calls need to be fixed. All other instructions - * can left untouched. - * - * The JITed image length does not change because we already - * ensure that the JITed instruction sequence for these calls - * are of fixed length by padding them with NOPs. - */ - if (insn[i].code == (BPF_JMP | BPF_CALL) && - insn[i].src_reg == BPF_PSEUDO_CALL) { - ret = bpf_jit_get_func_addr(fp, &insn[i], true, - &func_addr, - &func_addr_fixed); - if (ret < 0) - return ret; - - /* - * Save ctx->idx as this would currently point to the - * end of the JITed image and set it to the offset of - * the instruction sequence corresponding to the - * subprog call temporarily. - */ - tmp_idx = ctx->idx; - ctx->idx = addrs[i] / 4; - bpf_jit_emit_func_call_rel(image, ctx, func_addr); - - /* - * Restore ctx->idx here. This is safe as the length - * of the JITed sequence remains unchanged. - */ - ctx->idx = tmp_idx; - } - } - - return 0; -} - -struct powerpc64_jit_data { - struct bpf_binary_header *header; - u32 *addrs; - u8 *image; - u32 proglen; - struct codegen_context ctx; -}; - -bool bpf_jit_needs_zext(void) -{ - return true; -} - -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) -{ - u32 proglen; - u32 alloclen; - u8 *image = NULL; - u32 *code_base; - u32 *addrs; - struct powerpc64_jit_data *jit_data; - struct codegen_context cgctx; - int pass; - int flen; - struct bpf_binary_header *bpf_hdr; - struct bpf_prog *org_fp = fp; - struct bpf_prog *tmp_fp; - bool bpf_blinded = false; - bool extra_pass = false; - - if (!fp->jit_requested) - return org_fp; - - tmp_fp = bpf_jit_blind_constants(org_fp); - if (IS_ERR(tmp_fp)) - return org_fp; - - if (tmp_fp != org_fp) { - bpf_blinded = true; - fp = tmp_fp; - } - - jit_data = fp->aux->jit_data; - if (!jit_data) { - jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); - if (!jit_data) { - fp = org_fp; - goto out; - } - fp->aux->jit_data = jit_data; - } - - flen = fp->len; - addrs = jit_data->addrs; - if (addrs) { - cgctx = jit_data->ctx; - image = jit_data->image; - bpf_hdr = jit_data->header; - proglen = jit_data->proglen; - alloclen = proglen + FUNCTION_DESCR_SIZE; - extra_pass = true; - goto skip_init_ctx; - } - - addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL); - if (addrs == NULL) { - fp = org_fp; - goto out_addrs; - } - - memset(&cgctx, 0, sizeof(struct codegen_context)); - - /* Make sure that the stack is quadword aligned. */ - cgctx.stack_size = round_up(fp->aux->stack_depth, 16); - - /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { - /* We hit something illegal or unsupported. */ - fp = org_fp; - goto out_addrs; - } - - /* - * If we have seen a tail call, we need a second pass. - * This is because bpf_jit_emit_common_epilogue() is called - * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. - */ - if (cgctx.seen & SEEN_TAILCALL) { - cgctx.idx = 0; - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { - fp = org_fp; - goto out_addrs; - } - } - - /* - * Pretend to build prologue, given the features we've seen. This will - * update ctgtx.idx as it pretends to output instructions, then we can - * calculate total size from idx. - */ - bpf_jit_build_prologue(0, &cgctx); - bpf_jit_build_epilogue(0, &cgctx); - - proglen = cgctx.idx * 4; - alloclen = proglen + FUNCTION_DESCR_SIZE; - - bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, - bpf_jit_fill_ill_insns); - if (!bpf_hdr) { - fp = org_fp; - goto out_addrs; - } - -skip_init_ctx: - code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); - - if (extra_pass) { - /* - * Do not touch the prologue and epilogue as they will remain - * unchanged. Only fix the branch target address for subprog - * calls in the body. - * - * This does not change the offsets and lengths of the subprog - * call instruction sequences and hence, the size of the JITed - * image as well. - */ - bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); - - /* There is no need to perform the usual passes. */ - goto skip_codegen_passes; - } - - /* Code generation passes 1-2 */ - for (pass = 1; pass < 3; pass++) { - /* Now build the prologue, body code & epilogue for real. */ - cgctx.idx = 0; - bpf_jit_build_prologue(code_base, &cgctx); - bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass); - bpf_jit_build_epilogue(code_base, &cgctx); - - if (bpf_jit_enable > 1) - pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, - proglen - (cgctx.idx * 4), cgctx.seen); - } - -skip_codegen_passes: - if (bpf_jit_enable > 1) - /* - * Note that we output the base address of the code_base - * rather than image, since opcodes are in code_base. - */ - bpf_jit_dump(flen, proglen, pass, code_base); - -#ifdef PPC64_ELF_ABI_v1 - /* Function descriptor nastiness: Address + TOC */ - ((u64 *)image)[0] = (u64)code_base; - ((u64 *)image)[1] = local_paca->kernel_toc; -#endif - - fp->bpf_func = (void *)image; - fp->jited = 1; - fp->jited_len = alloclen; - - bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); - if (!fp->is_func || extra_pass) { - bpf_prog_fill_jited_linfo(fp, addrs); -out_addrs: - kfree(addrs); - kfree(jit_data); - fp->aux->jit_data = NULL; - } else { - jit_data->addrs = addrs; - jit_data->ctx = cgctx; - jit_data->proglen = proglen; - jit_data->image = image; - jit_data->header = bpf_hdr; - } - -out: - if (bpf_blinded) - bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); - - return fp; -} - -/* Overriding bpf_jit_free() as we don't set images read-only. */ -void bpf_jit_free(struct bpf_prog *fp) -{ - unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; - struct bpf_binary_header *bpf_hdr = (void *)addr; - - if (fp->jited) - bpf_jit_binary_free(bpf_hdr); - - bpf_prog_unlock_free(fp); -} diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 766f064f00fb..16d4d1b6a1ff 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -17,6 +17,7 @@ #include #include #include +#include #ifdef CONFIG_PPC64 #include "internal.h" @@ -168,7 +169,7 @@ static bool regs_use_siar(struct pt_regs *regs) * they have not been setup using perf_read_regs() and so regs->result * is something random. */ - return ((TRAP(regs) == 0xf00) && regs->result); + return ((TRAP(regs) == INTERRUPT_PERFMON) && regs->result); } /* @@ -347,7 +348,7 @@ static inline void perf_read_regs(struct pt_regs *regs) * hypervisor samples as well as samples in the kernel with * interrupts off hence the userspace check. */ - if (TRAP(regs) != 0xf00) + if (TRAP(regs) != INTERRUPT_PERFMON) use_siar = 0; else if ((ppmu->flags & PPMU_NO_SIAR)) use_siar = 0; @@ -1963,6 +1964,17 @@ static int power_pmu_event_init(struct perf_event *event) return -ENOENT; } + /* + * PMU config registers have fields that are + * reserved and some specific values for bit fields are reserved. + * For ex., MMCRA[61:62] is Randome Sampling Mode (SM) + * and value of 0b11 to this field is reserved. + * Check for invalid values in attr.config. + */ + if (ppmu->check_attr_config && + ppmu->check_attr_config(event)) + return -EINVAL; + event->hw.config_base = ev; event->hw.idx = 0; @@ -2206,9 +2218,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, ppmu->get_mem_data_src) ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs); - if (event->attr.sample_type & PERF_SAMPLE_WEIGHT && + if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE && ppmu->get_mem_weight) - ppmu->get_mem_weight(&data.weight.full); + ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type); if (perf_event_overflow(event, &data, regs)) power_pmu_stop(event, 0); diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index e5eb33255066..1816f560a465 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -226,14 +226,14 @@ static struct attribute_group event_long_desc_group = { static struct kmem_cache *hv_page_cache; -DEFINE_PER_CPU(int, hv_24x7_txn_flags); -DEFINE_PER_CPU(int, hv_24x7_txn_err); +static DEFINE_PER_CPU(int, hv_24x7_txn_flags); +static DEFINE_PER_CPU(int, hv_24x7_txn_err); struct hv_24x7_hw { struct perf_event *events[255]; }; -DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw); +static DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw); /* * request_buffer and result_buffer are not required to be 4k aligned, @@ -241,8 +241,8 @@ DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw); * the simplest way to ensure that. */ #define H24x7_DATA_BUFFER_SIZE 4096 -DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); -DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); +static DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); +static DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); static unsigned int max_num_requests(int interface_version) { diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index e4f577da33d8..f92bf5f6b74f 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -21,7 +21,7 @@ PMU_FORMAT_ATTR(thresh_stop, "config:32-35"); PMU_FORMAT_ATTR(thresh_start, "config:36-39"); PMU_FORMAT_ATTR(thresh_cmp, "config:40-49"); -struct attribute *isa207_pmu_format_attr[] = { +static struct attribute *isa207_pmu_format_attr[] = { &format_attr_event.attr, &format_attr_pmcxsel.attr, &format_attr_mark.attr, @@ -275,17 +275,47 @@ void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags, sier = mfspr(SPRN_SIER); val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT; - if (val == 1 || val == 2) { - idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT; - sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> ISA207_SIER_DATA_SRC_SHIFT; + if (val != 1 && val != 2 && !(val == 7 && cpu_has_feature(CPU_FTR_ARCH_31))) + return; - dsrc->val = isa207_find_source(idx, sub_idx); + idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT; + sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> ISA207_SIER_DATA_SRC_SHIFT; + + dsrc->val = isa207_find_source(idx, sub_idx); + if (val == 7) { + u64 mmcra; + u32 op_type; + + /* + * Type 0b111 denotes either larx or stcx instruction. Use the + * MMCRA sampling bits [57:59] along with the type value + * to determine the exact instruction type. If the sampling + * criteria is neither load or store, set the type as default + * to NA. + */ + mmcra = mfspr(SPRN_MMCRA); + + op_type = (mmcra >> MMCRA_SAMP_ELIG_SHIFT) & MMCRA_SAMP_ELIG_MASK; + switch (op_type) { + case 5: + dsrc->val |= P(OP, LOAD); + break; + case 7: + dsrc->val |= P(OP, STORE); + break; + default: + dsrc->val |= P(OP, NA); + break; + } + } else { dsrc->val |= (val == 1) ? P(OP, LOAD) : P(OP, STORE); } } -void isa207_get_mem_weight(u64 *weight) +void isa207_get_mem_weight(u64 *weight, u64 type) { + union perf_sample_weight *weight_fields; + u64 weight_lat; u64 mmcra = mfspr(SPRN_MMCRA); u64 exp = MMCRA_THR_CTR_EXP(mmcra); u64 mantissa = MMCRA_THR_CTR_MANT(mmcra); @@ -295,10 +325,31 @@ void isa207_get_mem_weight(u64 *weight) if (cpu_has_feature(CPU_FTR_ARCH_31)) mantissa = P10_MMCRA_THR_CTR_MANT(mmcra); - if (val == 0 || val == 7) - *weight = 0; + if (val == 0 || (val == 7 && !cpu_has_feature(CPU_FTR_ARCH_31))) + weight_lat = 0; else - *weight = mantissa << (2 * exp); + weight_lat = mantissa << (2 * exp); + + /* + * Use 64 bit weight field (full) if sample type is + * WEIGHT. + * + * if sample type is WEIGHT_STRUCT: + * - store memory latency in the lower 32 bits. + * - For ISA v3.1, use remaining two 16 bit fields of + * perf_sample_weight to store cycle counter values + * from sier2. + */ + weight_fields = (union perf_sample_weight *)weight; + if (type & PERF_SAMPLE_WEIGHT) + weight_fields->full = weight_lat; + else { + weight_fields->var1_dw = (u32)weight_lat; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + weight_fields->var2_w = P10_SIER2_FINISH_CYC(mfspr(SPRN_SIER2)); + weight_fields->var3_w = P10_SIER2_DISPATCH_CYC(mfspr(SPRN_SIER2)); + } + } } int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1) @@ -447,8 +498,8 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, * EBB events are pinned & exclusive, so this should never actually * hit, but we leave it as a fallback in case. */ - mask |= CNST_EBB_VAL(ebb); - value |= CNST_EBB_MASK; + mask |= CNST_EBB_MASK; + value |= CNST_EBB_VAL(ebb); *maskp = mask; *valp = value; @@ -694,3 +745,45 @@ int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags, return num_alt; } + +int isa3XX_check_attr_config(struct perf_event *ev) +{ + u64 val, sample_mode; + u64 event = ev->attr.config; + + val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK; + sample_mode = val & 0x3; + + /* + * MMCRA[61:62] is Random Sampling Mode (SM). + * value of 0b11 is reserved. + */ + if (sample_mode == 0x3) + return -EINVAL; + + /* + * Check for all reserved value + * Source: Performance Monitoring Unit User Guide + */ + switch (val) { + case 0x5: + case 0x9: + case 0xD: + case 0x19: + case 0x1D: + case 0x1A: + case 0x1E: + return -EINVAL; + } + + /* + * MMCRA[48:51]/[52:55]) Threshold Start/Stop + * Events Selection. + * 0b11110000/0b00001111 is reserved. + */ + val = (event >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK; + if (((val & 0xF0) == 0xF0) || ((val & 0xF) == 0xF)) + return -EINVAL; + + return 0; +} diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 1af0e8c97ac7..4a2cbc3dc047 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -220,6 +220,7 @@ /* Bits in MMCRA for PowerISA v2.07 */ #define MMCRA_SAMP_MODE_SHIFT 1 #define MMCRA_SAMP_ELIG_SHIFT 4 +#define MMCRA_SAMP_ELIG_MASK 7 #define MMCRA_THR_CTL_SHIFT 8 #define MMCRA_THR_SEL_SHIFT 16 #define MMCRA_THR_CMP_SHIFT 32 @@ -265,6 +266,10 @@ #define ISA207_SIER_DATA_SRC_SHIFT 53 #define ISA207_SIER_DATA_SRC_MASK (0x7ull << ISA207_SIER_DATA_SRC_SHIFT) +/* Bits in SIER2/SIER3 for Power10 */ +#define P10_SIER2_FINISH_CYC(sier2) (((sier2) >> (63 - 37)) & 0x7fful) +#define P10_SIER2_DISPATCH_CYC(sier2) (((sier2) >> (63 - 13)) & 0x7fful) + #define P(a, b) PERF_MEM_S(a, b) #define PH(a, b) (P(LVL, HIT) | P(a, b)) #define PM(a, b) (P(LVL, MISS) | P(a, b)) @@ -278,6 +283,8 @@ int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags, const unsigned int ev_alt[][MAX_ALT]); void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags, struct pt_regs *regs); -void isa207_get_mem_weight(u64 *weight); +void isa207_get_mem_weight(u64 *weight, u64 type); + +int isa3XX_check_attr_config(struct perf_event *ev); #endif diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h index e45dafe818ed..93be7197d250 100644 --- a/arch/powerpc/perf/power10-events-list.h +++ b/arch/powerpc/perf/power10-events-list.h @@ -75,5 +75,5 @@ EVENT(PM_RUN_INST_CMPL_ALT, 0x00002); * thresh end (TE) */ -EVENT(MEM_LOADS, 0x34340401e0); -EVENT(MEM_STORES, 0x343c0401e0); +EVENT(MEM_LOADS, 0x35340401e0); +EVENT(MEM_STORES, 0x353c0401e0); diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index a901c1348cad..f9d64c63bb4a 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -106,6 +106,18 @@ static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[]) return num_alt; } +static int power10_check_attr_config(struct perf_event *ev) +{ + u64 val; + u64 event = ev->attr.config; + + val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK; + if (val == 0x10 || isa3XX_check_attr_config(ev)) + return -EINVAL; + + return 0; +} + GENERIC_EVENT_ATTR(cpu-cycles, PM_RUN_CYC); GENERIC_EVENT_ATTR(instructions, PM_RUN_INST_CMPL); GENERIC_EVENT_ATTR(branch-instructions, PM_BR_CMPL); @@ -559,6 +571,7 @@ static struct power_pmu power10_pmu = { .attr_groups = power10_pmu_attr_groups, .bhrb_nr = 32, .capabilities = PERF_PMU_CAP_EXTENDED_REGS, + .check_attr_config = power10_check_attr_config, }; int init_power10_pmu(void) diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 2a57e93a79dc..ff3382140d7e 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -151,6 +151,18 @@ static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[]) return num_alt; } +static int power9_check_attr_config(struct perf_event *ev) +{ + u64 val; + u64 event = ev->attr.config; + + val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK; + if (val == 0xC || isa3XX_check_attr_config(ev)) + return -EINVAL; + + return 0; +} + GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC); GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_ICT_NOSLOT_CYC); GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL); @@ -437,6 +449,7 @@ static struct power_pmu power9_pmu = { .attr_groups = power9_pmu_attr_groups, .bhrb_nr = 32, .capabilities = PERF_PMU_CAP_EXTENDED_REGS, + .check_attr_config = power9_check_attr_config, }; int init_power9_pmu(void) diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 7d41e9264510..83975ef50975 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -5,7 +5,7 @@ config PPC_47x select MPIC help This option enables support for the 47x family of processors and is - not currently compatible with other 44x or 46x varients + not currently compatible with other 44x or 46x variants config BAMBOO bool "Bamboo" diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S index 11475c58ea43..afee8b1515a8 100644 --- a/arch/powerpc/platforms/52xx/lite5200_sleep.S +++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S @@ -181,7 +181,7 @@ sram_code: udelay: /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */ mullw r12, r12, r11 mftb r13 /* start */ - addi r12, r13, r12 /* end */ + add r12, r13, r12 /* end */ 1: mftb r13 /* current */ cmp cr0, r13, r12 diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 3ce907523b1e..e4b05667686e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -101,6 +101,8 @@ config PPC_BOOK3S_64 select ARCH_SUPPORTS_NUMA_BALANCING select IRQ_WORK select PPC_MM_SLICES + select PPC_HAVE_KUEP + select PPC_HAVE_KUAP config PPC_BOOK3E_64 bool "Embedded processors" @@ -306,6 +308,7 @@ config PHYS_64BIT config ALTIVEC bool "AltiVec Support" depends on PPC_BOOK3S_32 || PPC_BOOK3S_64 || (PPC_E500MC && PPC64) + select PPC_FPU help This option enables kernel support for the Altivec extensions to the PowerPC processor. The kernel currently supports saving and restoring @@ -363,8 +366,6 @@ config PPC_RADIX_MMU bool "Radix MMU Support" depends on PPC_BOOK3S_64 select ARCH_HAS_GIGANTIC_PAGE - select PPC_HAVE_KUEP - select PPC_HAVE_KUAP default y help Enable support for the Power ISA 3.0 Radix style MMU. Currently this diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 2124831cf57c..fa08699aedeb 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -486,7 +486,8 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np, window->table.it_size = size >> window->table.it_page_shift; window->table.it_ops = &cell_iommu_ops; - iommu_init_table(&window->table, iommu->nid, 0, 0); + if (!iommu_init_table(&window->table, iommu->nid, 0, 0)) + panic("Failed to initialize iommu table"); pr_debug("\tioid %d\n", window->ioid); pr_debug("\tblocksize %ld\n", window->table.it_blocksize); diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c index abdef9bcf432..fe0d8797a00a 100644 --- a/arch/powerpc/platforms/cell/spu_callbacks.c +++ b/arch/powerpc/platforms/cell/spu_callbacks.c @@ -35,9 +35,9 @@ */ static void *spu_syscall_table[] = { +#define __SYSCALL_WITH_COMPAT(nr, entry, compat) __SYSCALL(nr, entry) #define __SYSCALL(nr, entry) [nr] = entry, #include -#undef __SYSCALL }; long spu_sys_callback(struct spu_syscall_block *s) diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c index 8c421dc78b28..76e6256cb0a7 100644 --- a/arch/powerpc/platforms/chrp/pci.c +++ b/arch/powerpc/platforms/chrp/pci.c @@ -131,8 +131,7 @@ static struct pci_ops rtas_pci_ops = volatile struct Hydra __iomem *Hydra = NULL; -int __init -hydra_init(void) +static int __init hydra_init(void) { struct device_node *np; struct resource r; diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig index c1920961f410..4c6d703a4284 100644 --- a/arch/powerpc/platforms/embedded6xx/Kconfig +++ b/arch/powerpc/platforms/embedded6xx/Kconfig @@ -71,11 +71,6 @@ config MPC10X_BRIDGE bool select PPC_INDIRECT_PCI -config MV64X60 - bool - select PPC_INDIRECT_PCI - select CHECK_CACHE_COHERENCY - config GAMECUBE_COMMON bool diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index a20b9576de22..37875e478b3a 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -34,7 +34,7 @@ static struct pci_controller *u3_agp, *u3_ht, *u4_pcie; static int __init fixup_one_level_bus_range(struct device_node *node, int higher) { - for (; node != 0;node = node->sibling) { + for (; node; node = node->sibling) { const int *bus_range; const unsigned int *class_code; int len; diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index b500a6e47e6b..5be7242fbd86 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -146,7 +146,9 @@ static void iommu_table_iobmap_setup(void) */ iommu_table_iobmap.it_blocksize = 4; iommu_table_iobmap.it_ops = &iommu_table_iobmap_ops; - iommu_init_table(&iommu_table_iobmap, 0, 0, 0); + if (!iommu_init_table(&iommu_table_iobmap, 0, 0, 0)) + panic("Failed to initialize iommu table"); + pr_debug(" <- %s\n", __func__); } diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 019669eb21d2..71c1262589fe 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -46,10 +46,26 @@ static ssize_t memtrace_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size); } +static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct memtrace_entry *ent = filp->private_data; + + if (ent->size < vma->vm_end - vma->vm_start) + return -EINVAL; + + if (vma->vm_pgoff << PAGE_SHIFT >= ent->size) + return -EINVAL; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + return remap_pfn_range(vma, vma->vm_start, PHYS_PFN(ent->start) + vma->vm_pgoff, + vma->vm_end - vma->vm_start, vma->vm_page_prot); +} + static const struct file_operations memtrace_fops = { .llseek = default_llseek, .read = memtrace_read, .open = simple_open, + .mmap = memtrace_mmap, }; #define FLUSH_CHUNK_SIZE SZ_1G @@ -187,7 +203,7 @@ static int memtrace_init_debugfs(void) dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir); ent->dir = dir; - debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops); + debugfs_create_file_unsafe("trace", 0600, dir, ent, &memtrace_fops); debugfs_create_x64("start", 0400, dir, &ent->start); debugfs_create_x64("size", 0400, dir, &ent->size); } diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index 0d9ba70f7251..5b9736bbc2aa 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -71,7 +71,7 @@ static LIST_HEAD(opalcore_list); static struct opalcore_config *oc_conf; static const struct opal_mpipl_fadump *opalc_metadata; static const struct opal_mpipl_fadump *opalc_cpu_metadata; -struct kobject *mpipl_kobj; +static struct kobject *mpipl_kobj; /* * Set crashing CPU's signal to SIGUSR1. if the kernel is triggered diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index deddaebf8c14..a191f4c60ce7 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -105,7 +105,6 @@ static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) { size_t addr, size; pgprot_t page_prot; - int rc; pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n", vma->vm_start, vma->vm_end, vma->vm_pgoff, @@ -121,10 +120,8 @@ static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) page_prot = phys_mem_access_prot(file, vma->vm_pgoff, size, vma->vm_page_prot); - rc = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, + return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, page_prot); - - return rc; } static bool opal_msg_queue_empty(void) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index f0f901683a2f..66c3c3337334 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1762,7 +1762,8 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, tbl->it_ops = &pnv_ioda1_iommu_ops; pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift; pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift; - iommu_init_table(tbl, phb->hose->node, 0, 0); + if (!iommu_init_table(tbl, phb->hose->node, 0, 0)) + panic("Failed to initialize iommu table"); pe->dma_setup_done = true; return; @@ -1930,16 +1931,16 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) res_start = pe->phb->ioda.m32_pci_base >> tbl->it_page_shift; res_end = min(window_size, SZ_4G) >> tbl->it_page_shift; } - iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end); - rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl); + if (iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end)) + rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl); + else + rc = -ENOMEM; if (rc) { - pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", - rc); + pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", rc); iommu_tce_table_put(tbl); - return rc; + tbl = NULL; /* This clears iommu_table_base below */ } - if (!pnv_iommu_bypass_disabled) pnv_pci_ioda2_set_bypass(pe, true); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index aadf932c4e61..a8db3f153063 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -157,7 +157,7 @@ static void __init pnv_check_guarded_cores(void) for_each_node_by_type(dn, "cpu") { if (of_property_match_string(dn, "status", "bad") >= 0) bad_count++; - }; + } if (bad_count) { printk(" _ _______________\n"); diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 233503fcf8f0..3ac70790ec7a 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -329,6 +329,20 @@ int dlpar_release_drc(u32 drc_index) return 0; } +int dlpar_unisolate_drc(u32 drc_index) +{ + int dr_status, rc; + + rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status, + DR_ENTITY_SENSE, drc_index); + if (rc || dr_status != DR_ENTITY_PRESENT) + return -1; + + rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE); + + return 0; +} + int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog) { int rc; diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 12cbffd3c2e3..7e970f81d8ff 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -47,9 +47,6 @@ static void rtas_stop_self(void) BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE); - printk("cpu %u (hwid %u) Ready to die...\n", - smp_processor_id(), hard_smp_processor_id()); - rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL); panic("Alas, I survived.\n"); @@ -271,6 +268,19 @@ static int dlpar_offline_cpu(struct device_node *dn) if (!cpu_online(cpu)) break; + /* + * device_offline() will return -EBUSY (via cpu_down()) if there + * is only one CPU left. Check it here to fail earlier and with a + * more informative error message, while also retaining the + * cpu_add_remove_lock to be sure that no CPUs are being + * online/offlined during this check. + */ + if (num_online_cpus() == 1) { + pr_warn("Unable to remove last online CPU %pOFn\n", dn); + rc = -EBUSY; + goto out_unlock; + } + cpu_maps_update_done(); rc = device_offline(get_cpu_device(cpu)); if (rc) @@ -283,6 +293,7 @@ static int dlpar_offline_cpu(struct device_node *dn) thread); } } +out_unlock: cpu_maps_update_done(); out: @@ -802,8 +813,16 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) case PSERIES_HP_ELOG_ACTION_REMOVE: if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) rc = dlpar_cpu_remove_by_count(count); - else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) + else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) { rc = dlpar_cpu_remove_by_index(drc_index); + /* + * Setting the isolation state of an UNISOLATED/CONFIGURED + * device to UNISOLATE is a no-op, but the hypervisor can + * use it as a hint that the CPU removal failed. + */ + if (rc) + dlpar_unisolate_drc(drc_index); + } else rc = -EINVAL; break; diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 2c59b4986ea5..3a50612a78db 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -26,7 +26,7 @@ struct hcall_stats { }; #define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) -DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); +static DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); /* * Routines for displaying the statistics in debugfs diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 9fc5217f0c8e..0c55b991f665 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -638,7 +638,8 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) iommu_table_setparms(pci->phb, dn, tbl); tbl->it_ops = &iommu_table_pseries_ops; - iommu_init_table(tbl, pci->phb->node, 0, 0); + if (!iommu_init_table(tbl, pci->phb->node, 0, 0)) + panic("Failed to initialize iommu table"); /* Divide the rest (1.75GB) among the children */ pci->phb->dma_window_size = 0x80000000ul; @@ -720,7 +721,8 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) iommu_table_setparms_lpar(ppci->phb, pdn, tbl, ppci->table_group, dma_window); tbl->it_ops = &iommu_table_lpar_multi_ops; - iommu_init_table(tbl, ppci->phb->node, 0, 0); + if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) + panic("Failed to initialize iommu table"); iommu_register_group(ppci->table_group, pci_domain_nr(bus), 0); pr_debug(" created table: %p\n", ppci->table_group); @@ -749,7 +751,9 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) tbl = PCI_DN(dn)->table_group->tables[0]; iommu_table_setparms(phb, dn, tbl); tbl->it_ops = &iommu_table_pseries_ops; - iommu_init_table(tbl, phb->node, 0, 0); + if (!iommu_init_table(tbl, phb->node, 0, 0)) + panic("Failed to initialize iommu table"); + set_iommu_table_base(&dev->dev, tbl); return; } @@ -1099,6 +1103,33 @@ static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn) ret); } +/* Return largest page shift based on "IO Page Sizes" output of ibm,query-pe-dma-window. */ +static int iommu_get_page_shift(u32 query_page_size) +{ + /* Supported IO page-sizes according to LoPAR */ + const int shift[] = { + __builtin_ctzll(SZ_4K), __builtin_ctzll(SZ_64K), __builtin_ctzll(SZ_16M), + __builtin_ctzll(SZ_32M), __builtin_ctzll(SZ_64M), __builtin_ctzll(SZ_128M), + __builtin_ctzll(SZ_256M), __builtin_ctzll(SZ_16G) + }; + + int i = ARRAY_SIZE(shift) - 1; + + /* + * On LoPAR, ibm,query-pe-dma-window outputs "IO Page Sizes" using a bit field: + * - bit 31 means 4k pages are supported, + * - bit 30 means 64k pages are supported, and so on. + * Larger pagesizes map more memory with the same amount of TCEs, so start probing them. + */ + for (; i >= 0 ; i--) { + if (query_page_size & (1 << i)) + return shift[i]; + } + + /* No valid page size found. */ + return 0; +} + /* * If the PE supports dynamic dma windows, and there is space for a table * that can map all pages in a linear offset, then setup such a table, @@ -1206,13 +1237,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) goto out_failed; } } - if (query.page_size & 4) { - page_shift = 24; /* 16MB */ - } else if (query.page_size & 2) { - page_shift = 16; /* 64kB */ - } else if (query.page_size & 1) { - page_shift = 12; /* 4kB */ - } else { + + page_shift = iommu_get_page_shift(query.page_size); + if (!page_shift) { dev_dbg(&dev->dev, "no supported direct page size in mask %x", query.page_size); goto out_failed; @@ -1229,7 +1256,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (pmem_present) { if (query.largest_available_block >= (1ULL << (MAX_PHYSMEM_BITS - page_shift))) - len = MAX_PHYSMEM_BITS - page_shift; + len = MAX_PHYSMEM_BITS; else dev_info(&dev->dev, "Skipping ibm,pmemory"); } diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 3805519a6469..1f3152ad7213 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -977,11 +977,13 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, slot = pSeries_lpar_hpte_find(vpn, psize, ssize); BUG_ON(slot == -1); - flags = newpp & 7; + flags = newpp & (HPTE_R_PP | HPTE_R_N); if (mmu_has_feature(MMU_FTR_KERNEL_RO)) /* Move pp0 into bit 8 (IBM 55) */ flags |= (newpp & HPTE_R_PP0) >> 55; + flags |= ((newpp & HPTE_R_KEY_HI) >> 48) | (newpp & HPTE_R_KEY_LO); + lpar_rc = plpar_pte_protect(flags, slot, 0); BUG_ON(lpar_rc != H_SUCCESS); @@ -1630,7 +1632,7 @@ static int pseries_lpar_resize_hpt(unsigned long shift) } msleep(delay); rc = plpar_resize_hpt_prepare(0, shift); - }; + } switch (rc) { case H_SUCCESS: diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index e278390ab28d..f71eac74ea92 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -537,6 +537,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) parse_em_data(m); maxmem_data(m); + seq_printf(m, "security_flavor=%u\n", pseries_security_flavor); + return 0; } diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 835163f54244..ef26fe40efb0 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -93,6 +93,7 @@ struct papr_scm_priv { uint64_t block_size; int metadata_size; bool is_volatile; + bool hcall_flush_required; uint64_t bound_addr; @@ -117,6 +118,38 @@ struct papr_scm_priv { size_t stat_buffer_len; }; +static int papr_scm_pmem_flush(struct nd_region *nd_region, + struct bio *bio __maybe_unused) +{ + struct papr_scm_priv *p = nd_region_provider_data(nd_region); + unsigned long ret_buf[PLPAR_HCALL_BUFSIZE], token = 0; + long rc; + + dev_dbg(&p->pdev->dev, "flush drc 0x%x", p->drc_index); + + do { + rc = plpar_hcall(H_SCM_FLUSH, ret_buf, p->drc_index, token); + token = ret_buf[0]; + + /* Check if we are stalled for some time */ + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } else if (rc == H_BUSY) { + cond_resched(); + } + } while (rc == H_BUSY); + + if (rc) { + dev_err(&p->pdev->dev, "flush error: %ld", rc); + rc = -EIO; + } else { + dev_dbg(&p->pdev->dev, "flush drc 0x%x complete", p->drc_index); + } + + return rc; +} + static LIST_HEAD(papr_nd_regions); static DEFINE_MUTEX(papr_ndr_lock); @@ -914,6 +947,15 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) dimm_flags = 0; set_bit(NDD_LABELING, &dimm_flags); + /* + * Check if the nvdimm is unarmed. No locking needed as we are still + * initializing. Ignore error encountered if any. + */ + __drc_pmem_query_health(p); + + if (p->health_bitmap & PAPR_PMEM_UNARMED_MASK) + set_bit(NDD_UNARMED, &dimm_flags); + p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups, dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL); if (!p->nvdimm) { @@ -943,6 +985,11 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) ndr_desc.num_mappings = 1; ndr_desc.nd_set = &p->nd_set; + if (p->hcall_flush_required) { + set_bit(ND_REGION_ASYNC, &ndr_desc.flags); + ndr_desc.flush = papr_scm_pmem_flush; + } + if (p->is_volatile) p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc); else { @@ -1088,6 +1135,7 @@ static int papr_scm_probe(struct platform_device *pdev) p->block_size = block_size; p->blocks = blocks; p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required"); + p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required"); /* We just need to ensure that set cookies are unique across */ uuid_parse(uuid_str, (uuid_t *) uuid); diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index f9ae17e8a0f4..a8f9140a24fa 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -50,6 +50,7 @@ EXPORT_SYMBOL_GPL(init_phb_dynamic); int remove_phb_dynamic(struct pci_controller *phb) { struct pci_bus *b = phb->bus; + struct pci_host_bridge *host_bridge = to_pci_host_bridge(b->bridge); struct resource *res; int rc, i; @@ -76,7 +77,8 @@ int remove_phb_dynamic(struct pci_controller *phb) /* Remove the PCI bus and unregister the bridge device from sysfs */ phb->bus = NULL; pci_remove_bus(b); - device_unregister(b->bridge); + host_bridge->bus = NULL; + device_unregister(&host_bridge->dev); /* Now release the IO resource */ if (res->flags & IORESOURCE_IO) diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c index e1dc5d3254df..439ac72c2470 100644 --- a/arch/powerpc/platforms/pseries/pmem.c +++ b/arch/powerpc/platforms/pseries/pmem.c @@ -139,7 +139,7 @@ int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog) return rc; } -const struct of_device_id drc_pmem_match[] = { +static const struct of_device_id drc_pmem_match[] = { { .type = "ibm,persistent-memory", }, {} }; diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 4fe48c04c6c2..1f051a786fb3 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -43,9 +43,6 @@ extern void pSeries_final_fixup(void); /* Poweron flag used for enabling auto ups restart */ extern unsigned long rtas_poweron_auto; -/* Provided by HVC VIO */ -extern void hvc_vio_init_early(void); - /* Dynamic logical Partitioning/Mobility */ extern void dlpar_free_cc_nodes(struct device_node *); extern void dlpar_free_cc_property(struct property *); @@ -55,6 +52,7 @@ extern int dlpar_attach_node(struct device_node *, struct device_node *); extern int dlpar_detach_node(struct device_node *); extern int dlpar_acquire_drc(u32 drc_index); extern int dlpar_release_drc(u32 drc_index); +extern int dlpar_unisolate_drc(u32 drc_index); void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog); int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog); @@ -111,6 +109,7 @@ static inline unsigned long cmo_get_page_size(void) int dlpar_workqueue_init(void); +extern u32 pseries_security_flavor; void pseries_setup_security_mitigations(void); void pseries_lpar_read_hblkrm_characteristics(void); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index f8b390a9d9fb..9d4ef65da7f3 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -699,7 +699,7 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.error_type = MCE_ERROR_TYPE_DCACHE; break; case MC_ERROR_TYPE_I_CACHE: - mce_err.error_type = MCE_ERROR_TYPE_DCACHE; + mce_err.error_type = MCE_ERROR_TYPE_ICACHE; break; case MC_ERROR_TYPE_UNKNOWN: default: diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c index 81343908ed33..f8f73b47b107 100644 --- a/arch/powerpc/platforms/pseries/rtas-fadump.c +++ b/arch/powerpc/platforms/pseries/rtas-fadump.c @@ -247,7 +247,7 @@ static inline int rtas_fadump_gpr_index(u64 id) return i; } -void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val) +static void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val) { int i; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 46e1540abc22..754e493b7c05 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -71,6 +71,7 @@ #include #include #include +#include #include "pseries.h" #include "../../../../drivers/pci/pci.h" @@ -85,6 +86,7 @@ EXPORT_SYMBOL(CMO_PageSize); int fwnmi_active; /* TRUE if an FWNMI handler is present */ int ibm_nmi_interlock_token; +u32 pseries_security_flavor; static void pSeries_show_cpuinfo(struct seq_file *m) { @@ -534,9 +536,15 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) /* * The features below are enabled by default, so we instead look to see * if firmware has *disabled* them, and clear them if so. + * H_CPU_BEHAV_FAVOUR_SECURITY_H could be set only if + * H_CPU_BEHAV_FAVOUR_SECURITY is. */ if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) security_ftr_clear(SEC_FTR_FAVOUR_SECURITY); + else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H) + pseries_security_flavor = 1; + else + pseries_security_flavor = 2; if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 9cb4fc839fd5..e00f3725ec96 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -1278,6 +1279,20 @@ static int vio_bus_remove(struct device *dev) return 0; } +static void vio_bus_shutdown(struct device *dev) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct vio_driver *viodrv; + + if (dev->driver) { + viodrv = to_vio_driver(dev->driver); + if (viodrv->shutdown) + viodrv->shutdown(viodev); + else if (kexec_in_progress) + vio_bus_remove(dev); + } +} + /** * vio_register_driver: - Register a new vio driver * @viodrv: The vio_driver structure to be registered. @@ -1285,6 +1300,10 @@ static int vio_bus_remove(struct device *dev) int __vio_register_driver(struct vio_driver *viodrv, struct module *owner, const char *mod_name) { + // vio_bus_type is only initialised for pseries + if (!machine_is(pseries)) + return -ENODEV; + pr_debug("%s: driver %s registering\n", __func__, viodrv->name); /* fill in 'struct driver' fields */ @@ -1613,6 +1632,7 @@ struct bus_type vio_bus_type = { .match = vio_bus_match, .probe = vio_bus_probe, .remove = vio_bus_remove, + .shutdown = vio_bus_shutdown, }; /** diff --git a/arch/powerpc/purgatory/trampoline_64.S b/arch/powerpc/purgatory/trampoline_64.S index d956b8a35fd1..b35837c13852 100644 --- a/arch/powerpc/purgatory/trampoline_64.S +++ b/arch/powerpc/purgatory/trampoline_64.S @@ -12,7 +12,6 @@ #include #include - .machine ppc64 .balign 256 .globl purgatory_start purgatory_start: diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 6b4a34b36d98..1d33b7a5ea83 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -344,7 +344,8 @@ static void iommu_table_dart_setup(void) iommu_table_dart.it_index = 0; iommu_table_dart.it_blocksize = 1; iommu_table_dart.it_ops = &iommu_dart_ops; - iommu_init_table(&iommu_table_dart, -1, 0, 0); + if (!iommu_init_table(&iommu_table_dart, -1, 0, 0)) + panic("Failed to initialize iommu table"); /* Reserve the last page of the DART to avoid possible prefetch * past the DART mapped area diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 040b9d01c079..69af73765783 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -455,7 +455,7 @@ static void setup_pci_atmu(struct pci_controller *hose) } } -static void __init setup_pci_cmd(struct pci_controller *hose) +static void setup_pci_cmd(struct pci_controller *hose) { u16 cmd; int cap_x; diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 595310e056f4..50469700dec6 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -63,8 +63,19 @@ static const struct xive_ops *xive_ops; static struct irq_domain *xive_irq_domain; #ifdef CONFIG_SMP -/* The IPIs all use the same logical irq number */ -static u32 xive_ipi_irq; +/* The IPIs use the same logical irq number when on the same chip */ +static struct xive_ipi_desc { + unsigned int irq; + char name[16]; +} *xive_ipis; + +/* + * Use early_cpu_to_node() for hot-plugged CPUs + */ +static unsigned int xive_ipi_cpu_to_irq(unsigned int cpu) +{ + return xive_ipis[early_cpu_to_node(cpu)].irq; +} #endif /* Xive state for each CPU */ @@ -253,17 +264,20 @@ notrace void xmon_xive_do_dump(int cpu) xmon_printf("\n"); } +static struct irq_data *xive_get_irq_data(u32 hw_irq) +{ + unsigned int irq = irq_find_mapping(xive_irq_domain, hw_irq); + + return irq ? irq_get_irq_data(irq) : NULL; +} + int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) { - struct irq_chip *chip = irq_data_get_irq_chip(d); int rc; u32 target; u8 prio; u32 lirq; - if (!is_xive_irq(chip)) - return -EINVAL; - rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); if (rc) { xmon_printf("IRQ 0x%08x : no config rc=%d\n", hw_irq, rc); @@ -273,6 +287,9 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) xmon_printf("IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", hw_irq, target, prio, lirq); + if (!d) + d = xive_get_irq_data(hw_irq); + if (d) { struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); u64 val = xive_esb_read(xd, XIVE_ESB_GET); @@ -289,6 +306,20 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) return 0; } +void xmon_xive_get_irq_all(void) +{ + unsigned int i; + struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + struct irq_data *d = irq_desc_get_irq_data(desc); + unsigned int hwirq = (unsigned int)irqd_to_hwirq(d); + + if (d->domain == xive_irq_domain) + xmon_xive_get_irq_config(hwirq, d); + } +} + #endif /* CONFIG_XMON */ static unsigned int xive_get_irq(void) @@ -1067,28 +1098,94 @@ static struct irq_chip xive_ipi_chip = { .irq_unmask = xive_ipi_do_nothing, }; -static void __init xive_request_ipi(void) +/* + * IPIs are marked per-cpu. We use separate HW interrupts under the + * hood but associated with the same "linux" interrupt + */ +struct xive_ipi_alloc_info { + irq_hw_number_t hwirq; +}; + +static int xive_ipi_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) { - unsigned int virq; + struct xive_ipi_alloc_info *info = arg; + int i; - /* - * Initialization failed, move on, we might manage to - * reach the point where we display our errors before - * the system falls appart - */ - if (!xive_irq_domain) - return; + for (i = 0; i < nr_irqs; i++) { + irq_domain_set_info(domain, virq + i, info->hwirq + i, &xive_ipi_chip, + domain->host_data, handle_percpu_irq, + NULL, NULL); + } + return 0; +} - /* Initialize it */ - virq = irq_create_mapping(xive_irq_domain, XIVE_IPI_HW_IRQ); - xive_ipi_irq = virq; +static const struct irq_domain_ops xive_ipi_irq_domain_ops = { + .alloc = xive_ipi_irq_domain_alloc, +}; - WARN_ON(request_irq(virq, xive_muxed_ipi_action, - IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL)); +static int __init xive_request_ipi(void) +{ + struct fwnode_handle *fwnode; + struct irq_domain *ipi_domain; + unsigned int node; + int ret = -ENOMEM; + + fwnode = irq_domain_alloc_named_fwnode("XIVE-IPI"); + if (!fwnode) + goto out; + + ipi_domain = irq_domain_create_linear(fwnode, nr_node_ids, + &xive_ipi_irq_domain_ops, NULL); + if (!ipi_domain) + goto out_free_fwnode; + + xive_ipis = kcalloc(nr_node_ids, sizeof(*xive_ipis), GFP_KERNEL | __GFP_NOFAIL); + if (!xive_ipis) + goto out_free_domain; + + for_each_node(node) { + struct xive_ipi_desc *xid = &xive_ipis[node]; + struct xive_ipi_alloc_info info = { node }; + + /* Skip nodes without CPUs */ + if (cpumask_empty(cpumask_of_node(node))) + continue; + + /* + * Map one IPI interrupt per node for all cpus of that node. + * Since the HW interrupt number doesn't have any meaning, + * simply use the node number. + */ + xid->irq = irq_domain_alloc_irqs(ipi_domain, 1, node, &info); + if (xid->irq < 0) { + ret = xid->irq; + goto out_free_xive_ipis; + } + + snprintf(xid->name, sizeof(xid->name), "IPI-%d", node); + + ret = request_irq(xid->irq, xive_muxed_ipi_action, + IRQF_PERCPU | IRQF_NO_THREAD, xid->name, NULL); + + WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret); + } + + return ret; + +out_free_xive_ipis: + kfree(xive_ipis); +out_free_domain: + irq_domain_remove(ipi_domain); +out_free_fwnode: + irq_domain_free_fwnode(fwnode); +out: + return ret; } static int xive_setup_cpu_ipi(unsigned int cpu) { + unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu); struct xive_cpu *xc; int rc; @@ -1131,6 +1228,8 @@ static int xive_setup_cpu_ipi(unsigned int cpu) static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) { + unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu); + /* Disable the IPI and free the IRQ data */ /* Already cleaned up ? */ @@ -1178,19 +1277,6 @@ static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq, */ irq_clear_status_flags(virq, IRQ_LEVEL); -#ifdef CONFIG_SMP - /* IPIs are special and come up with HW number 0 */ - if (hw == XIVE_IPI_HW_IRQ) { - /* - * IPIs are marked per-cpu. We use separate HW interrupts under - * the hood but associated with the same "linux" interrupt - */ - irq_set_chip_and_handler(virq, &xive_ipi_chip, - handle_percpu_irq); - return 0; - } -#endif - rc = xive_irq_alloc_data(virq, hw); if (rc) return rc; @@ -1202,15 +1288,7 @@ static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq, static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq) { - struct irq_data *data = irq_get_irq_data(virq); - unsigned int hw_irq; - - /* XXX Assign BAD number */ - if (!data) - return; - hw_irq = (unsigned int)irqd_to_hwirq(data); - if (hw_irq != XIVE_IPI_HW_IRQ) - xive_irq_free_data(virq); + xive_irq_free_data(virq); } static int xive_irq_domain_xlate(struct irq_domain *h, struct device_node *ct, @@ -1335,17 +1413,14 @@ static int xive_prepare_cpu(unsigned int cpu) xc = per_cpu(xive_cpu, cpu); if (!xc) { - struct device_node *np; - xc = kzalloc_node(sizeof(struct xive_cpu), GFP_KERNEL, cpu_to_node(cpu)); if (!xc) return -ENOMEM; - np = of_get_cpu_node(cpu, NULL); - if (np) - xc->chip_id = of_get_ibm_chip_id(np); - of_node_put(np); xc->hw_ipi = XIVE_BAD_IRQ; + xc->chip_id = XIVE_INVALID_CHIP_ID; + if (xive_ops->prepare_cpu) + xive_ops->prepare_cpu(cpu, xc); per_cpu(xive_cpu, cpu) = xc; } @@ -1408,13 +1483,12 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) struct irq_desc *desc = irq_to_desc(irq); struct irq_data *d = irq_desc_get_irq_data(desc); struct xive_irq_data *xd; - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); /* * Ignore anything that isn't a XIVE irq and ignore * IPIs, so can just be dropped. */ - if (d->domain != xive_irq_domain || hw_irq == XIVE_IPI_HW_IRQ) + if (d->domain != xive_irq_domain) continue; /* @@ -1592,16 +1666,15 @@ static void xive_debug_show_cpu(struct seq_file *m, int cpu) seq_puts(m, "\n"); } -static void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data *d) +static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d) { - struct irq_chip *chip = irq_data_get_irq_chip(d); + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); int rc; u32 target; u8 prio; u32 lirq; - - if (!is_xive_irq(chip)) - return; + struct xive_irq_data *xd; + u64 val; rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); if (rc) { @@ -1612,17 +1685,14 @@ static void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", hw_irq, target, prio, lirq); - if (d) { - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); - u64 val = xive_esb_read(xd, XIVE_ESB_GET); - - seq_printf(m, "flags=%c%c%c PQ=%c%c", - xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', - xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', - xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', - val & XIVE_ESB_VAL_P ? 'P' : '-', - val & XIVE_ESB_VAL_Q ? 'Q' : '-'); - } + xd = irq_data_get_irq_handler_data(d); + val = xive_esb_read(xd, XIVE_ESB_GET); + seq_printf(m, "flags=%c%c%c PQ=%c%c", + xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', + xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', + xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', + val & XIVE_ESB_VAL_P ? 'P' : '-', + val & XIVE_ESB_VAL_Q ? 'Q' : '-'); seq_puts(m, "\n"); } @@ -1640,16 +1710,9 @@ static int xive_core_debug_show(struct seq_file *m, void *private) for_each_irq_desc(i, desc) { struct irq_data *d = irq_desc_get_irq_data(desc); - unsigned int hw_irq; - if (!d) - continue; - - hw_irq = (unsigned int)irqd_to_hwirq(d); - - /* IPIs are special (HW number 0) */ - if (hw_irq != XIVE_IPI_HW_IRQ) - xive_debug_show_irq(m, hw_irq, d); + if (d->domain == xive_irq_domain) + xive_debug_show_irq(m, d); } return 0; } diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 05a800a3104e..57e3f1540435 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -380,6 +380,11 @@ static void xive_native_update_pending(struct xive_cpu *xc) } } +static void xive_native_prepare_cpu(unsigned int cpu, struct xive_cpu *xc) +{ + xc->chip_id = cpu_to_chip_id(cpu); +} + static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc) { s64 rc; @@ -462,6 +467,7 @@ static const struct xive_ops xive_native_ops = { .match = xive_native_match, .shutdown = xive_native_shutdown, .update_pending = xive_native_update_pending, + .prepare_cpu = xive_native_prepare_cpu, .setup_cpu = xive_native_setup_cpu, .teardown_cpu = xive_native_teardown_cpu, .sync_source = xive_native_sync_source, diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 01ccc0786ada..f143b6f111ac 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -549,7 +549,7 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, static bool xive_spapr_match(struct device_node *node) { /* Ignore cascaded controllers for the moment */ - return 1; + return true; } #ifdef CONFIG_SMP diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index 9cf57c722faa..504e7edce358 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -5,8 +5,6 @@ #ifndef __XIVE_INTERNAL_H #define __XIVE_INTERNAL_H -#define XIVE_IPI_HW_IRQ 0 /* interrupt source # for IPIs */ - /* * A "disabled" interrupt should never fire, to catch problems * we set its logical number to this @@ -46,6 +44,7 @@ struct xive_ops { u32 *sw_irq); int (*setup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio); void (*cleanup_queue)(unsigned int cpu, struct xive_cpu *xc, u8 prio); + void (*prepare_cpu)(unsigned int cpu, struct xive_cpu *xc); void (*setup_cpu)(unsigned int cpu, struct xive_cpu *xc); void (*teardown_cpu)(unsigned int cpu, struct xive_cpu *xc); bool (*match)(struct device_node *np); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index bf7d69625a2e..c8173e92f19d 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -54,6 +54,7 @@ #include #include #include +#include #ifdef CONFIG_PPC64 #include @@ -605,7 +606,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) * debugger break (IPI). This is similar to * crash_kexec_secondary(). */ - if (TRAP(regs) != 0x100 || !wait_for_other_cpus(ncpus)) + if (TRAP(regs) != INTERRUPT_SYSTEM_RESET || !wait_for_other_cpus(ncpus)) smp_send_debugger_break(); wait_for_other_cpus(ncpus); @@ -615,7 +616,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) if (!locked_down) { /* for breakpoint or single step, print curr insn */ - if (bp || TRAP(regs) == 0xd00) + if (bp || TRAP(regs) == INTERRUPT_TRACE) ppc_inst_dump(regs->nip, 1, 0); printf("enter ? for help\n"); } @@ -684,7 +685,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) disable_surveillance(); if (!locked_down) { /* for breakpoint or single step, print current insn */ - if (bp || TRAP(regs) == 0xd00) + if (bp || TRAP(regs) == INTERRUPT_TRACE) ppc_inst_dump(regs->nip, 1, 0); printf("enter ? for help\n"); } @@ -1769,9 +1770,12 @@ static void excprint(struct pt_regs *fp) printf(" sp: %lx\n", fp->gpr[1]); printf(" msr: %lx\n", fp->msr); - if (trap == 0x300 || trap == 0x380 || trap == 0x600 || trap == 0x200) { + if (trap == INTERRUPT_DATA_STORAGE || + trap == INTERRUPT_DATA_SEGMENT || + trap == INTERRUPT_ALIGNMENT || + trap == INTERRUPT_MACHINE_CHECK) { printf(" dar: %lx\n", fp->dar); - if (trap != 0x380) + if (trap != INTERRUPT_DATA_SEGMENT) printf(" dsisr: %lx\n", fp->dsisr); } @@ -1785,7 +1789,7 @@ static void excprint(struct pt_regs *fp) current->pid, current->comm); } - if (trap == 0x700) + if (trap == INTERRUPT_PROGRAM) print_bug_trap(fp); printf(linux_banner); @@ -1815,25 +1819,16 @@ static void prregs(struct pt_regs *fp) } #ifdef CONFIG_PPC64 - if (FULL_REGS(fp)) { - for (n = 0; n < 16; ++n) - printf("R%.2d = "REG" R%.2d = "REG"\n", - n, fp->gpr[n], n+16, fp->gpr[n+16]); - } else { - for (n = 0; n < 7; ++n) - printf("R%.2d = "REG" R%.2d = "REG"\n", - n, fp->gpr[n], n+7, fp->gpr[n+7]); - } +#define R_PER_LINE 2 #else - for (n = 0; n < 32; ++n) { - printf("R%.2d = %.8lx%s", n, fp->gpr[n], - (n & 3) == 3? "\n": " "); - if (n == 12 && !FULL_REGS(fp)) { - printf("\n"); - break; - } - } +#define R_PER_LINE 4 #endif + + for (n = 0; n < 32; ++n) { + printf("R%.2d = "REG"%s", n, fp->gpr[n], + (n % R_PER_LINE) == R_PER_LINE - 1 ? "\n" : " "); + } + printf("pc = "); xmon_print_symbol(fp->nip, " ", "\n"); if (!trap_is_syscall(fp) && cpu_has_feature(CPU_FTR_CFAR)) { @@ -1846,7 +1841,9 @@ static void prregs(struct pt_regs *fp) printf("ctr = "REG" xer = "REG" trap = %4lx\n", fp->ctr, fp->xer, fp->trap); trap = TRAP(fp); - if (trap == 0x300 || trap == 0x380 || trap == 0x600) + if (trap == INTERRUPT_DATA_STORAGE || + trap == INTERRUPT_DATA_SEGMENT || + trap == INTERRUPT_ALIGNMENT) printf("dar = "REG" dsisr = %.8lx\n", fp->dar, fp->dsisr); } @@ -2727,30 +2724,6 @@ static void dump_all_xives(void) dump_one_xive(cpu); } -static void dump_one_xive_irq(u32 num, struct irq_data *d) -{ - xmon_xive_get_irq_config(num, d); -} - -static void dump_all_xive_irq(void) -{ - unsigned int i; - struct irq_desc *desc; - - for_each_irq_desc(i, desc) { - struct irq_data *d = irq_desc_get_irq_data(desc); - unsigned int hwirq; - - if (!d) - continue; - - hwirq = (unsigned int)irqd_to_hwirq(d); - /* IPIs are special (HW number 0) */ - if (hwirq) - dump_one_xive_irq(hwirq, d); - } -} - static void dump_xives(void) { unsigned long num; @@ -2767,9 +2740,9 @@ static void dump_xives(void) return; } else if (c == 'i') { if (scanhex(&num)) - dump_one_xive_irq(num, NULL); + xmon_xive_get_irq_config(num, NULL); else - dump_all_xive_irq(); + xmon_xive_get_irq_all(); return; } @@ -2980,7 +2953,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr, if (!ppc_inst_prefixed(inst)) dump_func(ppc_inst_val(inst), adr); else - dump_func(ppc_inst_as_u64(inst), adr); + dump_func(ppc_inst_as_ulong(inst), adr); printf("\n"); } return adr - first_adr; @@ -4212,8 +4185,7 @@ static void dump_spu_fields(struct spu *spu) DUMP_FIELD(spu, "0x%p", pdata); } -int -spu_inst_dump(unsigned long adr, long count, int praddr) +static int spu_inst_dump(unsigned long adr, long count, int praddr) { return generic_inst_dump(adr, count, praddr, print_insn_spu); } diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h index ed89ef742530..383c53c3dddd 100644 --- a/arch/s390/include/asm/vdso/gettimeofday.h +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -68,7 +68,8 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts) } #ifdef CONFIG_TIME_NS -static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { return _timens_data; } diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index df01d7349d79..1936f21ed8cd 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -58,7 +58,8 @@ extern struct ms_hyperv_tsc_page hvclock_page #endif #ifdef CONFIG_TIME_NS -static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { return __timens_vdso_data; } diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 3eec59f1fed3..543a05ed0c82 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -776,7 +776,7 @@ config I2C_MT7621 config I2C_MV64XXX tristate "Marvell mv64xxx I2C Controller" - depends on MV64X60 || PLAT_ORION || ARCH_SUNXI || ARCH_MVEBU || COMPILE_TEST + depends on PLAT_ORION || ARCH_SUNXI || ARCH_MVEBU || COMPILE_TEST help If you say yes to this option, support will be included for the built-in I2C interface on the Marvell 64xxx line of host bridges. diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 73e6ae88fafd..4bdd4c45e7a7 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -180,14 +180,13 @@ static struct proc_dir_entry *proc_pmu_options; static int option_server_mode; int pmu_battery_count; -int pmu_cur_battery; +static int pmu_cur_battery; unsigned int pmu_power_flags = PMU_PWR_AC_PRESENT; struct pmu_battery_info pmu_batteries[PMU_MAX_BATTERIES]; static int query_batt_timer = BATTERY_POLLING_COUNT; static struct adb_request batt_req; static struct proc_dir_entry *proc_pmu_batt[PMU_MAX_BATTERIES]; -int __fake_sleep; int asleep; #ifdef CONFIG_ADB @@ -1833,6 +1832,7 @@ pmu_present(void) */ static u32 save_via[8]; +static int __fake_sleep; static void save_via_state(void) diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c index 77612303841e..07f91ec1f960 100644 --- a/drivers/macintosh/windfarm_core.c +++ b/drivers/macintosh/windfarm_core.c @@ -56,7 +56,7 @@ static BLOCKING_NOTIFIER_HEAD(wf_client_list); static int wf_client_count; static unsigned int wf_overtemp; static unsigned int wf_overtemp_counter; -struct task_struct *wf_thread; +static struct task_struct *wf_thread; static struct platform_device wf_platform_device = { .name = "windfarm", diff --git a/drivers/macintosh/windfarm_pm121.c b/drivers/macintosh/windfarm_pm121.c index ab467b9c31be..ba1ec6fc11d2 100644 --- a/drivers/macintosh/windfarm_pm121.c +++ b/drivers/macintosh/windfarm_pm121.c @@ -433,7 +433,7 @@ struct pm121_sys_state { struct wf_pid_state pid; }; -struct pm121_sys_state *pm121_sys_state[N_LOOPS] = {}; +static struct pm121_sys_state *pm121_sys_state[N_LOOPS] = {}; /* * ****** CPU Fans Control Loop ****** diff --git a/drivers/macintosh/windfarm_smu_controls.c b/drivers/macintosh/windfarm_smu_controls.c index 79cb1ad09bfd..75966052819a 100644 --- a/drivers/macintosh/windfarm_smu_controls.c +++ b/drivers/macintosh/windfarm_smu_controls.c @@ -94,7 +94,7 @@ static int smu_set_fan(int pwm, u8 id, u16 value) return rc; wait_for_completion(&comp); - /* Handle fallback (see coment above) */ + /* Handle fallback (see comment above) */ if (cmd.status != 0 && smu_supports_new_fans_ops) { printk(KERN_WARNING "windfarm: SMU failed new fan command " "falling back to old method\n"); diff --git a/include/linux/compat.h b/include/linux/compat.h index f0d2dd35d408..acac0b571df1 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -467,6 +467,34 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, unsafe_put_user(__s->sig[0], &__c->sig[0], label); \ } \ } while (0) + +#define unsafe_get_compat_sigset(set, compat, label) do { \ + const compat_sigset_t __user *__c = compat; \ + compat_sigset_word hi, lo; \ + sigset_t *__s = set; \ + \ + switch (_NSIG_WORDS) { \ + case 4: \ + unsafe_get_user(lo, &__c->sig[7], label); \ + unsafe_get_user(hi, &__c->sig[6], label); \ + __s->sig[3] = hi | (((long)lo) << 32); \ + fallthrough; \ + case 3: \ + unsafe_get_user(lo, &__c->sig[5], label); \ + unsafe_get_user(hi, &__c->sig[4], label); \ + __s->sig[2] = hi | (((long)lo) << 32); \ + fallthrough; \ + case 2: \ + unsafe_get_user(lo, &__c->sig[3], label); \ + unsafe_get_user(hi, &__c->sig[2], label); \ + __s->sig[1] = hi | (((long)lo) << 32); \ + fallthrough; \ + case 1: \ + unsafe_get_user(lo, &__c->sig[1], label); \ + unsafe_get_user(hi, &__c->sig[0], label); \ + __s->sig[0] = hi | (((long)lo) << 32); \ + } \ +} while (0) #else #define unsafe_put_compat_sigset(compat, set, label) do { \ compat_sigset_t __user *__c = compat; \ @@ -474,6 +502,13 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, \ unsafe_copy_to_user(__c, __s, sizeof(*__c), label); \ } while (0) + +#define unsafe_get_compat_sigset(set, compat, label) do { \ + const compat_sigset_t __user *__c = compat; \ + sigset_t *__s = set; \ + \ + unsafe_copy_from_user(__s, __c, sizeof(*__c), label); \ +} while (0) #endif extern int compat_ptrace_request(struct task_struct *child, diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index c7c6e8b8344d..c05e903cef02 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -397,6 +397,7 @@ long strnlen_user_nofault(const void __user *unsafe_addr, long count); #define unsafe_get_user(x,p,e) unsafe_op_wrap(__get_user(x,p),e) #define unsafe_put_user(x,p,e) unsafe_op_wrap(__put_user(x,p),e) #define unsafe_copy_to_user(d,s,l,e) unsafe_op_wrap(__copy_to_user(d,s,l),e) +#define unsafe_copy_from_user(d,s,l,e) unsafe_op_wrap(__copy_from_user(d,s,l),e) static inline unsigned long user_access_save(void) { return 0UL; } static inline void user_access_restore(unsigned long flags) { } #endif diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 2919f1698140..ce2f69552003 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -46,16 +46,18 @@ static inline bool vdso_cycles_ok(u64 cycles) #endif #ifdef CONFIG_TIME_NS -static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) { - const struct vdso_data *vd = __arch_get_timens_vdso_data(); + const struct vdso_data *vd; const struct timens_offset *offs = &vdns->offset[clk]; const struct vdso_timestamp *vdso_ts; u64 cycles, last, ns; u32 seq; s64 sec; + vd = vdns - (clk == CLOCK_MONOTONIC_RAW ? CS_RAW : CS_HRES_COARSE); + vd = __arch_get_timens_vdso_data(vd); if (clk != CLOCK_MONOTONIC_RAW) vd = &vd[CS_HRES_COARSE]; else @@ -92,13 +94,14 @@ static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, return 0; } #else -static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { return NULL; } -static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) { return -EINVAL; } @@ -159,10 +162,10 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, } #ifdef CONFIG_TIME_NS -static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) { - const struct vdso_data *vd = __arch_get_timens_vdso_data(); + const struct vdso_data *vd = __arch_get_timens_vdso_data(vdns); const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; const struct timens_offset *offs = &vdns->offset[clk]; u64 nsec; @@ -188,8 +191,8 @@ static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, return 0; } #else -static int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, + struct __kernel_timespec *ts) { return -1; } @@ -310,7 +313,7 @@ __cvdso_gettimeofday_data(const struct vdso_data *vd, if (unlikely(tz != NULL)) { if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VDSO_CLOCKMODE_TIMENS) - vd = __arch_get_timens_vdso_data(); + vd = __arch_get_timens_vdso_data(vd); tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest; tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime; @@ -333,7 +336,7 @@ __cvdso_time_data(const struct vdso_data *vd, __kernel_old_time_t *time) if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VDSO_CLOCKMODE_TIMENS) - vd = __arch_get_timens_vdso_data(); + vd = __arch_get_timens_vdso_data(vd); t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec); @@ -363,7 +366,7 @@ int __cvdso_clock_getres_common(const struct vdso_data *vd, clockid_t clock, if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VDSO_CLOCKMODE_TIMENS) - vd = __arch_get_timens_vdso_data(); + vd = __arch_get_timens_vdso_data(vd); /* * Convert the clockid to a bitmask and use it to check which diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index c25cf7cd45e9..33ee34fc0828 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -10,16 +10,7 @@ * * We create two sets of source and destination buffers, one in regular memory, * the other cache-inhibited (by default we use /dev/fb0 for this, but an - * alterative path for cache-inhibited memory may be provided). - * - * One way to get cache-inhibited memory is to use the "mem" kernel parameter - * to limit the kernel to less memory than actually exists. Addresses above - * the limit may still be accessed but will be treated as cache-inhibited. For - * example, if there is actually 4GB of memory and the parameter "mem=3GB" is - * used, memory from address 0xC0000000 onwards is treated as cache-inhibited. - * To access this region /dev/mem is used. The kernel should be configured - * without CONFIG_STRICT_DEVMEM. In this case use: - * ./alignment_handler /dev/mem 0xc0000000 + * alterative path for cache-inhibited memory may be provided, e.g. memtrace). * * We initialise the source buffers, then use whichever set of load/store * instructions is under test to copy bytes from the source buffers to the diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index defe488d6bf1..40253abc6208 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -5,6 +5,7 @@ noarg: TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ large_vm_fork_separation bad_accesses pkey_exec_prot \ pkey_siginfo stack_expansion_signal stack_expansion_ldst +TEST_PROGS := stress_code_patching.sh TEST_GEN_PROGS_EXTENDED := tlbie_test TEST_GEN_FILES := tempfile diff --git a/tools/testing/selftests/powerpc/mm/stress_code_patching.sh b/tools/testing/selftests/powerpc/mm/stress_code_patching.sh new file mode 100755 index 000000000000..e454509659f6 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/stress_code_patching.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later + +TIMEOUT=30 + +DEBUFS_DIR=`cat /proc/mounts | grep debugfs | awk '{print $2}'` +if [ ! -e "$DEBUFS_DIR" ] +then + echo "debugfs not found, skipping" 1>&2 + exit 4 +fi + +if [ ! -e "$DEBUFS_DIR/tracing/current_tracer" ] +then + echo "Tracing files not found, skipping" 1>&2 + exit 4 +fi + + +echo "Testing for spurious faults when mapping kernel memory..." + +if grep -q "FUNCTION TRACING IS CORRUPTED" "$DEBUFS_DIR/tracing/trace" +then + echo "FAILED: Ftrace already dead. Probably due to a spurious fault" 1>&2 + exit 1 +fi + +dmesg -C +START_TIME=`date +%s` +END_TIME=`expr $START_TIME + $TIMEOUT` +while [ `date +%s` -lt $END_TIME ] +do + echo function > $DEBUFS_DIR/tracing/current_tracer + echo nop > $DEBUFS_DIR/tracing/current_tracer + if dmesg | grep -q 'ftrace bug' + then + break + fi +done + +echo nop > $DEBUFS_DIR/tracing/current_tracer +if dmesg | grep -q 'ftrace bug' +then + echo "FAILED: Mapping kernel memory causes spurious faults" 1>&2 + exit 1 +else + echo "OK: Mapping kernel memory does not cause spurious faults" + exit 0 +fi diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c index 02dffb65de48..b099753b50e4 100644 --- a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c +++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c @@ -324,7 +324,7 @@ int compress_file(int argc, char **argv, void *handle) fprintf(stderr, "error: cannot progress; "); fprintf(stderr, "too many faults\n"); exit(-1); - }; + } } fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */ diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore index 0e96150b7c7e..eb75e5360e31 100644 --- a/tools/testing/selftests/powerpc/ptrace/.gitignore +++ b/tools/testing/selftests/powerpc/ptrace/.gitignore @@ -14,3 +14,4 @@ perf-hwbreak core-pkey ptrace-pkey ptrace-syscall +ptrace-perf-hwbreak diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index 8d3f006c98cc..a500639da97a 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -2,7 +2,7 @@ TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \ ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \ - perf-hwbreak ptrace-syscall + perf-hwbreak ptrace-syscall ptrace-perf-hwbreak top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c index c1f324afdbf3..ecde2c199f3b 100644 --- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -21,8 +21,13 @@ #include #include #include +#include #include #include +#include +#include +#include +#include #include #include #include @@ -30,32 +35,130 @@ #include #include "utils.h" +#ifndef PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 +#define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 0x20 +#endif + #define MAX_LOOPS 10000 #define DAWR_LENGTH_MAX ((0x3f + 1) * 8) -static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, - int cpu, int group_fd, - unsigned long flags) +int nprocs; + +static volatile int a = 10; +static volatile int b = 10; +static volatile char c[512 + 8] __attribute__((aligned(512))); + +static void perf_event_attr_set(struct perf_event_attr *attr, + __u32 type, __u64 addr, __u64 len, + bool exclude_user) { - attr->size = sizeof(*attr); - return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); + memset(attr, 0, sizeof(struct perf_event_attr)); + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->bp_type = type; + attr->bp_addr = addr; + attr->bp_len = len; + attr->exclude_kernel = 1; + attr->exclude_hv = 1; + attr->exclude_guest = 1; + attr->exclude_user = exclude_user; + attr->disabled = 1; +} + +static int +perf_process_event_open_exclude_user(__u32 type, __u64 addr, __u64 len, bool exclude_user) +{ + struct perf_event_attr attr; + + perf_event_attr_set(&attr, type, addr, len, exclude_user); + return syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0); +} + +static int perf_process_event_open(__u32 type, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_event_attr_set(&attr, type, addr, len, 0); + return syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0); +} + +static int perf_cpu_event_open(long cpu, __u32 type, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_event_attr_set(&attr, type, addr, len, 0); + return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); +} + +static void close_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + close(fd[i]); +} + +static unsigned long read_fds(int *fd, int n) +{ + int i; + unsigned long c = 0; + unsigned long count = 0; + size_t res; + + for (i = 0; i < n; i++) { + res = read(fd[i], &c, sizeof(c)); + assert(res == sizeof(unsigned long long)); + count += c; + } + return count; +} + +static void reset_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + ioctl(fd[i], PERF_EVENT_IOC_RESET); +} + +static void enable_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + ioctl(fd[i], PERF_EVENT_IOC_ENABLE); +} + +static void disable_fds(int *fd, int n) +{ + int i; + + for (i = 0; i < n; i++) + ioctl(fd[i], PERF_EVENT_IOC_DISABLE); +} + +static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len) +{ + int i = 0; + + /* Assume online processors are 0 to nprocs for simplisity */ + for (i = 0; i < nprocs; i++) { + fd[i] = perf_cpu_event_open(i, type, addr, len); + if (fd[i] < 0) { + close_fds(fd, i); + return fd[i]; + } + } + return 0; } static inline bool breakpoint_test(int len) { - struct perf_event_attr attr; int fd; - /* setup counters */ - memset(&attr, 0, sizeof(attr)); - attr.disabled = 1; - attr.type = PERF_TYPE_BREAKPOINT; - attr.bp_type = HW_BREAKPOINT_R; /* bp_addr can point anywhere but needs to be aligned */ - attr.bp_addr = (__u64)(&attr) & 0xfffffffffffff800; - attr.bp_len = len; - fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + fd = perf_process_event_open(HW_BREAKPOINT_R, (__u64)(&fd) & 0xfffffffffffff800, len); if (fd < 0) return false; close(fd); @@ -75,7 +178,6 @@ static inline bool dawr_supported(void) static int runtestsingle(int readwriteflag, int exclude_user, int arraytest) { int i,j; - struct perf_event_attr attr; size_t res; unsigned long long breaks, needed; int readint; @@ -85,6 +187,7 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest) int break_fd; int loop_num = MAX_LOOPS - (rand() % 100); /* provide some variability */ volatile int *k; + __u64 len; /* align to 0x400 boundary as required by DAWR */ readintalign = (int *)(((unsigned long)readintarraybig + 0x7ff) & @@ -94,19 +197,11 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest) if (arraytest) ptr = &readintalign[0]; - /* setup counters */ - memset(&attr, 0, sizeof(attr)); - attr.disabled = 1; - attr.type = PERF_TYPE_BREAKPOINT; - attr.bp_type = readwriteflag; - attr.bp_addr = (__u64)ptr; - attr.bp_len = sizeof(int); - if (arraytest) - attr.bp_len = DAWR_LENGTH_MAX; - attr.exclude_user = exclude_user; - break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + len = arraytest ? DAWR_LENGTH_MAX : sizeof(int); + break_fd = perf_process_event_open_exclude_user(readwriteflag, (__u64)ptr, + len, exclude_user); if (break_fd < 0) { - perror("sys_perf_event_open"); + perror("perf_process_event_open_exclude_user"); exit(1); } @@ -153,7 +248,6 @@ static int runtest_dar_outside(void) void *target; volatile __u16 temp16; volatile __u64 temp64; - struct perf_event_attr attr; int break_fd; unsigned long long breaks; int fail = 0; @@ -165,21 +259,11 @@ static int runtest_dar_outside(void) exit(EXIT_FAILURE); } - /* setup counters */ - memset(&attr, 0, sizeof(attr)); - attr.disabled = 1; - attr.type = PERF_TYPE_BREAKPOINT; - attr.exclude_kernel = 1; - attr.exclude_hv = 1; - attr.exclude_guest = 1; - attr.bp_type = HW_BREAKPOINT_RW; /* watch middle half of target array */ - attr.bp_addr = (__u64)(target + 2); - attr.bp_len = 4; - break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + break_fd = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)(target + 2), 4); if (break_fd < 0) { free(target); - perror("sys_perf_event_open"); + perror("perf_process_event_open"); exit(EXIT_FAILURE); } @@ -263,11 +347,467 @@ static int runtest_dar_outside(void) return fail; } +static void multi_dawr_workload(void) +{ + a += 10; + b += 10; + c[512 + 1] += 'a'; +} + +static int test_process_multi_diff_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, diff addr"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_process_multi_same_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, same addr"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_process_multi_diff_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, diff addr, one is RO, other is WO"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_process_multi_same_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int fd1, fd2; + char *desc = "Process specific, Two events, same addr, one is RO, other is WO"; + size_t res; + + fd1 = perf_process_event_open(HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a)); + if (fd1 < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + fd2 = perf_process_event_open(HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (fd2 < 0) { + close(fd1); + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd1, PERF_EVENT_IOC_RESET); + ioctl(fd2, PERF_EVENT_IOC_RESET); + ioctl(fd1, PERF_EVENT_IOC_ENABLE); + ioctl(fd2, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd1, PERF_EVENT_IOC_DISABLE); + ioctl(fd2, PERF_EVENT_IOC_DISABLE); + + res = read(fd1, &breaks1, sizeof(breaks1)); + assert(res == sizeof(unsigned long long)); + res = read(fd2, &breaks2, sizeof(breaks2)); + assert(res == sizeof(unsigned long long)); + + close(fd1); + close(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_diff_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, diff addr"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_same_addr(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, same addr"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 2 || breaks2 != 2) { + printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_diff_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, diff addr, one is RO, other is WO"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int test_syswide_multi_same_addr_ro_wo(void) +{ + unsigned long long breaks1 = 0, breaks2 = 0; + int *fd1 = malloc(nprocs * sizeof(int)); + int *fd2 = malloc(nprocs * sizeof(int)); + char *desc = "Systemwide, Two events, same addr, one is RO, other is WO"; + int ret; + + ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a)); + if (ret) { + close_fds(fd1, nprocs); + perror("perf_systemwide_event_open"); + exit(EXIT_FAILURE); + } + + reset_fds(fd1, nprocs); + reset_fds(fd2, nprocs); + enable_fds(fd1, nprocs); + enable_fds(fd2, nprocs); + multi_dawr_workload(); + disable_fds(fd1, nprocs); + disable_fds(fd2, nprocs); + + breaks1 = read_fds(fd1, nprocs); + breaks2 = read_fds(fd2, nprocs); + + close_fds(fd1, nprocs); + close_fds(fd2, nprocs); + + free(fd1); + free(fd2); + + if (breaks1 != 1 || breaks2 != 1) { + printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +static int runtest_multi_dawr(void) +{ + int ret = 0; + + ret |= test_process_multi_diff_addr(); + ret |= test_process_multi_same_addr(); + ret |= test_process_multi_diff_addr_ro_wo(); + ret |= test_process_multi_same_addr_ro_wo(); + ret |= test_syswide_multi_diff_addr(); + ret |= test_syswide_multi_same_addr(); + ret |= test_syswide_multi_diff_addr_ro_wo(); + ret |= test_syswide_multi_same_addr_ro_wo(); + + return ret; +} + +static int runtest_unaligned_512bytes(void) +{ + unsigned long long breaks = 0; + int fd; + char *desc = "Process specific, 512 bytes, unaligned"; + __u64 addr = (__u64)&c + 8; + size_t res; + + fd = perf_process_event_open(HW_BREAKPOINT_RW, addr, 512); + if (fd < 0) { + perror("perf_process_event_open"); + exit(EXIT_FAILURE); + } + + ioctl(fd, PERF_EVENT_IOC_RESET); + ioctl(fd, PERF_EVENT_IOC_ENABLE); + multi_dawr_workload(); + ioctl(fd, PERF_EVENT_IOC_DISABLE); + + res = read(fd, &breaks, sizeof(breaks)); + assert(res == sizeof(unsigned long long)); + + close(fd); + + if (breaks != 2) { + printf("FAILED: %s: %lld != 2\n", desc, breaks); + return 1; + } + + printf("TESTED: %s\n", desc); + return 0; +} + +/* There is no perf api to find number of available watchpoints. Use ptrace. */ +static int get_nr_wps(bool *arch_31) +{ + struct ppc_debug_info dbginfo; + int child_pid; + + child_pid = fork(); + if (!child_pid) { + int ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); + if (ret) { + perror("PTRACE_TRACEME failed\n"); + exit(EXIT_FAILURE); + } + kill(getpid(), SIGUSR1); + + sleep(1); + exit(EXIT_SUCCESS); + } + + wait(NULL); + if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo)) { + perror("Can't get breakpoint info"); + exit(EXIT_FAILURE); + } + + *arch_31 = !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_ARCH_31); + return dbginfo.num_data_bps; +} + static int runtest(void) { int rwflag; int exclude_user; int ret; + bool dawr = dawr_supported(); + bool arch_31 = false; + int nr_wps = get_nr_wps(&arch_31); /* * perf defines rwflag as two bits read and write and at least @@ -280,7 +820,7 @@ static int runtest(void) return ret; /* if we have the dawr, we can do an array test */ - if (!dawr_supported()) + if (!dawr) continue; ret = runtestsingle(rwflag, exclude_user, 1); if (ret) @@ -289,6 +829,19 @@ static int runtest(void) } ret = runtest_dar_outside(); + if (ret) + return ret; + + if (dawr && nr_wps > 1) { + nprocs = get_nprocs(); + ret = runtest_multi_dawr(); + if (ret) + return ret; + } + + if (dawr && arch_31) + ret = runtest_unaligned_512bytes(); + return ret; } diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c index 2e0d86e0687e..a0635a3819aa 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -194,6 +194,18 @@ static void test_workload(void) big_var[rand() % DAWR_MAX_LEN] = 'a'; else cvar = big_var[rand() % DAWR_MAX_LEN]; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED, WO test */ + gstruct.a[rand() % A_LEN] = 'a'; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED, RO test */ + cvar = gstruct.b[rand() % B_LEN]; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, WO test */ + gstruct.a[rand() % A_LEN] = 'a'; + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, RO test */ + cvar = gstruct.a[rand() % A_LEN]; } static void check_success(pid_t child_pid, const char *name, const char *type, @@ -417,6 +429,69 @@ static void test_sethwdebug_range_aligned(pid_t child_pid) ptrace_delhwdebug(child_pid, wh); } +static void test_multi_sethwdebug_range(pid_t child_pid) +{ + struct ppc_hw_breakpoint info1, info2; + unsigned long wp_addr1, wp_addr2; + char *name1 = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED"; + char *name2 = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED"; + int len1, len2; + int wh1, wh2; + + wp_addr1 = (unsigned long)&gstruct.a; + wp_addr2 = (unsigned long)&gstruct.b; + len1 = A_LEN; + len2 = B_LEN; + get_ppc_hw_breakpoint(&info1, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr1, len1); + get_ppc_hw_breakpoint(&info2, PPC_BREAKPOINT_TRIGGER_READ, wp_addr2, len2); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED, WO test */ + wh1 = ptrace_sethwdebug(child_pid, &info1); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED, RO test */ + wh2 = ptrace_sethwdebug(child_pid, &info2); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name1, "WO", wp_addr1, len1); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name2, "RO", wp_addr2, len2); + + ptrace_delhwdebug(child_pid, wh1); + ptrace_delhwdebug(child_pid, wh2); +} + +static void test_multi_sethwdebug_range_dawr_overlap(pid_t child_pid) +{ + struct ppc_hw_breakpoint info1, info2; + unsigned long wp_addr1, wp_addr2; + char *name = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap"; + int len1, len2; + int wh1, wh2; + + wp_addr1 = (unsigned long)&gstruct.a; + wp_addr2 = (unsigned long)&gstruct.a; + len1 = A_LEN; + len2 = A_LEN; + get_ppc_hw_breakpoint(&info1, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr1, len1); + get_ppc_hw_breakpoint(&info2, PPC_BREAKPOINT_TRIGGER_READ, wp_addr2, len2); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, WO test */ + wh1 = ptrace_sethwdebug(child_pid, &info1); + + /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, RO test */ + wh2 = ptrace_sethwdebug(child_pid, &info2); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "WO", wp_addr1, len1); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RO", wp_addr2, len2); + + ptrace_delhwdebug(child_pid, wh1); + ptrace_delhwdebug(child_pid, wh2); +} + static void test_sethwdebug_range_unaligned(pid_t child_pid) { struct ppc_hw_breakpoint info; @@ -504,6 +579,10 @@ run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr) test_sethwdebug_range_unaligned(child_pid); test_sethwdebug_range_unaligned_dar(child_pid); test_sethwdebug_dawr_max_range(child_pid); + if (dbginfo->num_data_bps > 1) { + test_multi_sethwdebug_range(child_pid); + test_multi_sethwdebug_range_dawr_overlap(child_pid); + } } } } diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c new file mode 100644 index 000000000000..3344e74a97b4 --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c @@ -0,0 +1,659 @@ +// SPDX-License-Identifier: GPL-2.0+ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ptrace.h" + +char data[16]; + +/* Overlapping address range */ +volatile __u64 *ptrace_data1 = (__u64 *)&data[0]; +volatile __u64 *perf_data1 = (__u64 *)&data[4]; + +/* Non-overlapping address range */ +volatile __u64 *ptrace_data2 = (__u64 *)&data[0]; +volatile __u64 *perf_data2 = (__u64 *)&data[8]; + +static unsigned long pid_max_addr(void) +{ + FILE *fp; + char *line, *c; + char addr[100]; + size_t len = 0; + + fp = fopen("/proc/kallsyms", "r"); + if (!fp) { + printf("Failed to read /proc/kallsyms. Exiting..\n"); + exit(EXIT_FAILURE); + } + + while (getline(&line, &len, fp) != -1) { + if (!strstr(line, "pid_max") || strstr(line, "pid_max_max") || + strstr(line, "pid_max_min")) + continue; + + strncpy(addr, line, len < 100 ? len : 100); + c = strchr(addr, ' '); + *c = '\0'; + return strtoul(addr, &c, 16); + } + fclose(fp); + printf("Could not find pix_max. Exiting..\n"); + exit(EXIT_FAILURE); + return -1; +} + +static void perf_user_event_attr_set(struct perf_event_attr *attr, __u64 addr, __u64 len) +{ + memset(attr, 0, sizeof(struct perf_event_attr)); + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->bp_type = HW_BREAKPOINT_R; + attr->bp_addr = addr; + attr->bp_len = len; + attr->exclude_kernel = 1; + attr->exclude_hv = 1; +} + +static void perf_kernel_event_attr_set(struct perf_event_attr *attr) +{ + memset(attr, 0, sizeof(struct perf_event_attr)); + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->bp_type = HW_BREAKPOINT_R; + attr->bp_addr = pid_max_addr(); + attr->bp_len = sizeof(unsigned long); + attr->exclude_user = 1; + attr->exclude_hv = 1; +} + +static int perf_cpu_event_open(int cpu, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_user_event_attr_set(&attr, addr, len); + return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); +} + +static int perf_thread_event_open(pid_t child_pid, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_user_event_attr_set(&attr, addr, len); + return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0); +} + +static int perf_thread_cpu_event_open(pid_t child_pid, int cpu, __u64 addr, __u64 len) +{ + struct perf_event_attr attr; + + perf_user_event_attr_set(&attr, addr, len); + return syscall(__NR_perf_event_open, &attr, child_pid, cpu, -1, 0); +} + +static int perf_thread_kernel_event_open(pid_t child_pid) +{ + struct perf_event_attr attr; + + perf_kernel_event_attr_set(&attr); + return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0); +} + +static int perf_cpu_kernel_event_open(int cpu) +{ + struct perf_event_attr attr; + + perf_kernel_event_attr_set(&attr); + return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); +} + +static int child(void) +{ + int ret; + + ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); + if (ret) { + printf("Error: PTRACE_TRACEME failed\n"); + return 0; + } + kill(getpid(), SIGUSR1); /* --> parent (SIGUSR1) */ + + return 0; +} + +static void ptrace_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type, + __u64 addr, int len) +{ + info->version = 1; + info->trigger_type = type; + info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE; + info->addr = addr; + info->addr2 = addr + len; + info->condition_value = 0; + if (!len) + info->addr_mode = PPC_BREAKPOINT_MODE_EXACT; + else + info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; +} + +static int ptrace_open(pid_t child_pid, __u64 wp_addr, int len) +{ + struct ppc_hw_breakpoint info; + + ptrace_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len); + return ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info); +} + +static int test1(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing cpu event by perf) + * if (addr range overlaps) + * fail; + */ + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd > 0 || errno != ENOSPC) + ret = -1; + + close(perf_fd); + return ret; +} + +static int test2(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing cpu event by perf) + * if (addr range does not overlaps) + * allow; + */ + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) { + ret = -1; + goto perf_close; + } + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + +perf_close: + close(perf_fd); + return ret; +} + +static int test3(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing thread event by perf on the same thread) + * if (addr range overlaps) + * fail; + */ + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1, + sizeof(*perf_data1)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd > 0 || errno != ENOSPC) + ret = -1; + + close(perf_fd); + return ret; +} + +static int test4(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing thread event by perf on the same thread) + * if (addr range does not overlaps) + * fail; + */ + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2, + sizeof(*perf_data2)); + if (perf_fd < 0) + return -1; + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) { + ret = -1; + goto perf_close; + } + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + +perf_close: + close(perf_fd); + return ret; +} + +static int test5(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int cpid; + int ret = 0; + + /* Test: + * if (new per thread event by ptrace) + * if (existing thread event by perf on the different thread) + * allow; + */ + cpid = fork(); + if (!cpid) { + /* Temporary Child */ + pause(); + exit(EXIT_SUCCESS); + } + + perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd < 0) { + ret = -1; + goto kill_child; + } + + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) { + ret = -1; + goto perf_close; + } + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); +perf_close: + close(perf_fd); +kill_child: + kill(cpid, SIGINT); + return ret; +} + +static int test6(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread kernel event by perf) + * if (existing thread event by ptrace on the same thread) + * allow; + * -- OR -- + * if (new per cpu kernel event by perf) + * if (existing thread event by ptrace) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_kernel_event_open(child_pid); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + + perf_fd = perf_cpu_kernel_event_open(0); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test7(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range overlaps) + * fail; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1, + sizeof(*perf_data1)); + if (perf_fd > 0 || errno != ENOSPC) + ret = -1; + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test8(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range does not overlaps) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2, + sizeof(*perf_data2)); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test9(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int cpid; + int ret = 0; + + /* Test: + * if (new per thread event by perf) + * if (existing thread event by ptrace on the other thread) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + cpid = fork(); + if (!cpid) { + /* Temporary Child */ + pause(); + exit(EXIT_SUCCESS); + } + + perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd < 0) { + ret = -1; + goto kill_child; + } + close(perf_fd); + +kill_child: + kill(cpid, SIGINT); + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test10(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range overlaps) + * fail; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd > 0 || errno != ENOSPC) + ret = -1; + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test11(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range does not overlap) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2)); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test12(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread and per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range overlaps) + * fail; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data1, sizeof(*perf_data1)); + if (perf_fd > 0 || errno != ENOSPC) + ret = -1; + + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test13(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int ret = 0; + + /* Test: + * if (new per thread and per cpu event by perf) + * if (existing thread event by ptrace on the same thread) + * if (addr range does not overlap) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); + if (ptrace_fd < 0) + return -1; + + perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data2, sizeof(*perf_data2)); + if (perf_fd < 0) { + ret = -1; + goto ptrace_close; + } + close(perf_fd); + +ptrace_close: + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int test14(pid_t child_pid) +{ + int perf_fd; + int ptrace_fd; + int cpid; + int ret = 0; + + /* Test: + * if (new per thread and per cpu event by perf) + * if (existing thread event by ptrace on the other thread) + * allow; + */ + ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); + if (ptrace_fd < 0) + return -1; + + cpid = fork(); + if (!cpid) { + /* Temporary Child */ + pause(); + exit(EXIT_SUCCESS); + } + + perf_fd = perf_thread_cpu_event_open(cpid, 0, (__u64)perf_data1, + sizeof(*perf_data1)); + if (perf_fd < 0) { + ret = -1; + goto kill_child; + } + close(perf_fd); + +kill_child: + kill(cpid, SIGINT); + ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + return ret; +} + +static int do_test(const char *msg, int (*fun)(pid_t arg), pid_t arg) +{ + int ret; + + ret = fun(arg); + if (ret) + printf("%s: Error\n", msg); + else + printf("%s: Ok\n", msg); + return ret; +} + +char *desc[14] = { + "perf cpu event -> ptrace thread event (Overlapping)", + "perf cpu event -> ptrace thread event (Non-overlapping)", + "perf thread event -> ptrace same thread event (Overlapping)", + "perf thread event -> ptrace same thread event (Non-overlapping)", + "perf thread event -> ptrace other thread event", + "ptrace thread event -> perf kernel event", + "ptrace thread event -> perf same thread event (Overlapping)", + "ptrace thread event -> perf same thread event (Non-overlapping)", + "ptrace thread event -> perf other thread event", + "ptrace thread event -> perf cpu event (Overlapping)", + "ptrace thread event -> perf cpu event (Non-overlapping)", + "ptrace thread event -> perf same thread & cpu event (Overlapping)", + "ptrace thread event -> perf same thread & cpu event (Non-overlapping)", + "ptrace thread event -> perf other thread & cpu event", +}; + +static int test(pid_t child_pid) +{ + int ret = TEST_PASS; + + ret |= do_test(desc[0], test1, child_pid); + ret |= do_test(desc[1], test2, child_pid); + ret |= do_test(desc[2], test3, child_pid); + ret |= do_test(desc[3], test4, child_pid); + ret |= do_test(desc[4], test5, child_pid); + ret |= do_test(desc[5], test6, child_pid); + ret |= do_test(desc[6], test7, child_pid); + ret |= do_test(desc[7], test8, child_pid); + ret |= do_test(desc[8], test9, child_pid); + ret |= do_test(desc[9], test10, child_pid); + ret |= do_test(desc[10], test11, child_pid); + ret |= do_test(desc[11], test12, child_pid); + ret |= do_test(desc[12], test13, child_pid); + ret |= do_test(desc[13], test14, child_pid); + + return ret; +} + +static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo) +{ + if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, dbginfo)) { + perror("Can't get breakpoint info"); + exit(-1); + } +} + +static int ptrace_perf_hwbreak(void) +{ + int ret; + pid_t child_pid; + struct ppc_debug_info dbginfo; + + child_pid = fork(); + if (!child_pid) + return child(); + + /* parent */ + wait(NULL); /* <-- child (SIGUSR1) */ + + get_dbginfo(child_pid, &dbginfo); + SKIP_IF(dbginfo.num_data_bps <= 1); + + ret = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); + SKIP_IF(ret < 0); + close(ret); + + ret = test(child_pid); + + ptrace(PTRACE_CONT, child_pid, NULL, 0); + return ret; +} + +int main(int argc, char *argv[]) +{ + return test_harness(ptrace_perf_hwbreak, "ptrace-perf-hwbreak"); +} diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index f25e854fe370..844d18cd5f93 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0+ -TEST_GEN_PROGS := rfi_flush entry_flush spectre_v2 +TEST_GEN_PROGS := rfi_flush entry_flush uaccess_flush spectre_v2 top_srcdir = ../../../../.. CFLAGS += -I../../../../../usr/include @@ -13,3 +13,4 @@ $(OUTPUT)/spectre_v2: CFLAGS += -m64 $(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S $(OUTPUT)/rfi_flush: flush_utils.c $(OUTPUT)/entry_flush: flush_utils.c +$(OUTPUT)/uaccess_flush: flush_utils.c diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c index 78cf914fa321..68ce377b205e 100644 --- a/tools/testing/selftests/powerpc/security/entry_flush.c +++ b/tools/testing/selftests/powerpc/security/entry_flush.c @@ -53,7 +53,7 @@ int entry_flush_test(void) entry_flush = entry_flush_orig; - fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); FAIL_IF(fd < 0); p = (char *)memalign(zero_size, CACHELINE_SIZE); diff --git a/tools/testing/selftests/powerpc/security/flush_utils.c b/tools/testing/selftests/powerpc/security/flush_utils.c index 0c3c4c40c7fb..4d95965cb751 100644 --- a/tools/testing/selftests/powerpc/security/flush_utils.c +++ b/tools/testing/selftests/powerpc/security/flush_utils.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "utils.h" #include "flush_utils.h" @@ -35,6 +36,18 @@ void syscall_loop(char *p, unsigned long iterations, } } +void syscall_loop_uaccess(char *p, unsigned long iterations, + unsigned long zero_size) +{ + struct utsname utsname; + + for (unsigned long i = 0; i < iterations; i++) { + for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) + load(p + j); + uname(&utsname); + } +} + static void sigill_handler(int signr, siginfo_t *info, void *unused) { static int warned; diff --git a/tools/testing/selftests/powerpc/security/flush_utils.h b/tools/testing/selftests/powerpc/security/flush_utils.h index 07a5eb301466..e1e68281f7ac 100644 --- a/tools/testing/selftests/powerpc/security/flush_utils.h +++ b/tools/testing/selftests/powerpc/security/flush_utils.h @@ -9,9 +9,16 @@ #define CACHELINE_SIZE 128 +#define PERF_L1D_READ_MISS_CONFIG ((PERF_COUNT_HW_CACHE_L1D) | \ + (PERF_COUNT_HW_CACHE_OP_READ << 8) | \ + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)) + void syscall_loop(char *p, unsigned long iterations, unsigned long zero_size); +void syscall_loop_uaccess(char *p, unsigned long iterations, + unsigned long zero_size); + void set_dscr(unsigned long val); #endif /* _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H */ diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index 7565fd786640..f73484a6470f 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -54,7 +54,7 @@ int rfi_flush_test(void) rfi_flush = rfi_flush_orig; - fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); FAIL_IF(fd < 0); p = (char *)memalign(zero_size, CACHELINE_SIZE); diff --git a/tools/testing/selftests/powerpc/security/uaccess_flush.c b/tools/testing/selftests/powerpc/security/uaccess_flush.c new file mode 100644 index 000000000000..cf80f960e38a --- /dev/null +++ b/tools/testing/selftests/powerpc/security/uaccess_flush.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2018 IBM Corporation. + * Copyright 2020 Canonical Ltd. + */ + +#define __SANE_USERSPACE_TYPES__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "utils.h" +#include "flush_utils.h" + +int uaccess_flush_test(void) +{ + char *p; + int repetitions = 10; + int fd, passes = 0, iter, rc = 0; + struct perf_event_read v; + __u64 l1d_misses_total = 0; + unsigned long iterations = 100000, zero_size = 24 * 1024; + unsigned long l1d_misses_expected; + int rfi_flush_orig; + int entry_flush_orig; + int uaccess_flush, uaccess_flush_orig; + + SKIP_IF(geteuid() != 0); + + // The PMU event we use only works on Power7 or later + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); + + if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) { + perror("Unable to read powerpc/rfi_flush debugfs file"); + SKIP_IF(1); + } + + if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) { + perror("Unable to read powerpc/entry_flush debugfs file"); + SKIP_IF(1); + } + + if (read_debugfs_file("powerpc/uaccess_flush", &uaccess_flush_orig) < 0) { + perror("Unable to read powerpc/entry_flush debugfs file"); + SKIP_IF(1); + } + + if (rfi_flush_orig != 0) { + if (write_debugfs_file("powerpc/rfi_flush", 0) < 0) { + perror("error writing to powerpc/rfi_flush debugfs file"); + FAIL_IF(1); + } + } + + if (entry_flush_orig != 0) { + if (write_debugfs_file("powerpc/entry_flush", 0) < 0) { + perror("error writing to powerpc/entry_flush debugfs file"); + FAIL_IF(1); + } + } + + uaccess_flush = uaccess_flush_orig; + + fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1); + FAIL_IF(fd < 0); + + p = (char *)memalign(zero_size, CACHELINE_SIZE); + + FAIL_IF(perf_event_enable(fd)); + + // disable L1 prefetching + set_dscr(1); + + iter = repetitions; + + /* + * We expect to see l1d miss for each cacheline access when entry_flush + * is set. Allow a small variation on this. + */ + l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2); + +again: + FAIL_IF(perf_event_reset(fd)); + + syscall_loop_uaccess(p, iterations, zero_size); + + FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v)); + + if (uaccess_flush && v.l1d_misses >= l1d_misses_expected) + passes++; + else if (!uaccess_flush && v.l1d_misses < (l1d_misses_expected / 2)) + passes++; + + l1d_misses_total += v.l1d_misses; + + while (--iter) + goto again; + + if (passes < repetitions) { + printf("FAIL (L1D misses with uaccess_flush=%d: %llu %c %lu) [%d/%d failures]\n", + uaccess_flush, l1d_misses_total, uaccess_flush ? '<' : '>', + uaccess_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + repetitions - passes, repetitions); + rc = 1; + } else { + printf("PASS (L1D misses with uaccess_flush=%d: %llu %c %lu) [%d/%d pass]\n", + uaccess_flush, l1d_misses_total, uaccess_flush ? '>' : '<', + uaccess_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + passes, repetitions); + } + + if (uaccess_flush == uaccess_flush_orig) { + uaccess_flush = !uaccess_flush_orig; + if (write_debugfs_file("powerpc/uaccess_flush", uaccess_flush) < 0) { + perror("error writing to powerpc/uaccess_flush debugfs file"); + return 1; + } + iter = repetitions; + l1d_misses_total = 0; + passes = 0; + goto again; + } + + perf_event_disable(fd); + close(fd); + + set_dscr(0); + + if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) { + perror("unable to restore original value of powerpc/rfi_flush debugfs file"); + return 1; + } + + if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) { + perror("unable to restore original value of powerpc/entry_flush debugfs file"); + return 1; + } + + if (write_debugfs_file("powerpc/uaccess_flush", uaccess_flush_orig) < 0) { + perror("unable to restore original value of powerpc/uaccess_flush debugfs file"); + return 1; + } + + return rc; +} + +int main(int argc, char *argv[]) +{ + return test_harness(uaccess_flush_test, "uaccess_flush_test"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c index c75960af8018..11521077f915 100644 --- a/tools/testing/selftests/powerpc/tm/tm-trap.c +++ b/tools/testing/selftests/powerpc/tm/tm-trap.c @@ -66,7 +66,7 @@ void trap_signal_handler(int signo, siginfo_t *si, void *uc) /* Get thread endianness: extract bit LE from MSR */ thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR]; - /*** + /* * Little-Endian Machine */ @@ -126,7 +126,7 @@ void trap_signal_handler(int signo, siginfo_t *si, void *uc) } } - /*** + /* * Big-Endian Machine */ diff --git a/tools/testing/selftests/timens/gettime_perf.c b/tools/testing/selftests/timens/gettime_perf.c index 7bf841a3967b..6b13dc277724 100644 --- a/tools/testing/selftests/timens/gettime_perf.c +++ b/tools/testing/selftests/timens/gettime_perf.c @@ -25,12 +25,20 @@ static void fill_function_pointers(void) if (!vdso) vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-vdso32.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-vdso64.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); if (!vdso) { pr_err("[WARN]\tfailed to find vDSO\n"); return; } vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); + if (!vdso_clock_gettime) + vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__kernel_clock_gettime"); if (!vdso_clock_gettime) pr_err("Warning: failed to find clock_gettime in vDSO\n");