From 192b2e742c06af399e8eecb4a1726520bfccece8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 26 Feb 2018 13:17:07 +1100 Subject: [PATCH 01/15] selftests/powerpc: Skip tm-trap if transactional memory is not enabled Some processor revisions do not support transactional memory, and additionally kernel support can be disabled. In either case the tm-trap test should be skipped, otherwise it will fail with a SIGILL. Fixes: a08082f8e4e1 ("powerpc/selftests: Check endianness on trap in TM") Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/tm/tm-trap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c index 5d92c23ee6cb..179d592f0073 100644 --- a/tools/testing/selftests/powerpc/tm/tm-trap.c +++ b/tools/testing/selftests/powerpc/tm/tm-trap.c @@ -255,6 +255,8 @@ int tm_trap_test(void) struct sigaction trap_sa; + SKIP_IF(!have_htm()); + trap_sa.sa_flags = SA_SIGINFO; trap_sa.sa_sigaction = trap_signal_handler; sigaction(SIGTRAP, &trap_sa, NULL); From 64c3f648c25d108f346fdc96c15180c6b7d250e9 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 23 Feb 2018 12:55:59 -0800 Subject: [PATCH 02/15] powerpc/boot: Fix random libfdt related build errors Once in a while I see build errors similar to the following when building images from a clean tree. Building powerpc:virtex-ml507:44x/virtex5_defconfig ... failed ------------ Error log: arch/powerpc/boot/treeboot-akebono.c:37:20: fatal error: libfdt.h: No such file or directory Building powerpc:bamboo:smpdev:44x/bamboo_defconfig ... failed ------------ Error log: arch/powerpc/boot/treeboot-akebono.c:37:20: fatal error: libfdt.h: No such file or directory arch/powerpc/boot/treeboot-currituck.c:35:20: fatal error: libfdt.h: No such file or directory Rebuilds will succeed. Turns out that several source files in arch/powerpc/boot/ include libfdt.h, but Makefile dependencies are incomplete. Let's fix that. Signed-off-by: Guenter Roeck Signed-off-by: Michael Ellerman --- arch/powerpc/boot/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index ef6549e57157..26d5d2a5b8e9 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -101,7 +101,8 @@ $(addprefix $(obj)/,$(zlib-y)): \ libfdt := fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c libfdtheader := fdt.h libfdt.h libfdt_internal.h -$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o): \ +$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o \ + treeboot-akebono.o treeboot-currituck.o treeboot-iss4xx.o): \ $(addprefix $(obj)/,$(libfdtheader)) src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \ From b7abbd5a3533a31a1e7d4696ea275df543440c51 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 28 Feb 2018 15:15:56 +1100 Subject: [PATCH 03/15] selftests/powerpc: Fix missing clean of pmu/lib.o The tm-resched-dscr test links against pmu/lib.o, but we don't have a rule to clean pmu/lib.o. This can lead to a build break if you build for big endian and then little, or vice versa. Fix it by making tm-resched-dscr depend on pmu/lib.c, causing the code to be built directly in, meaning no .o is generated. Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/tm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index a23453943ad2..5c72ff978f27 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -16,7 +16,7 @@ $(OUTPUT)/tm-syscall: tm-syscall-asm.S $(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include $(OUTPUT)/tm-tmspr: CFLAGS += -pthread $(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64 -$(OUTPUT)/tm-resched-dscr: ../pmu/lib.o +$(OUTPUT)/tm-resched-dscr: ../pmu/lib.c $(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx $(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64 From cd4a6f3ab4d80cb919d15897eb3cbc85c2009d4b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 26 Feb 2018 15:22:22 +1100 Subject: [PATCH 04/15] selftests/powerpc: Skip the subpage_prot tests if the syscall is unavailable The subpage_prot syscall is only functional when the system is using the Hash MMU. Since commit 5b2b80714796 ("powerpc/mm: Invalidate subpage_prot() system call on radix platforms") it returns ENOENT when the Radix MMU is active. Currently this just makes the test fail. Additionally the syscall is not available if the kernel is built with 4K pages, or if CONFIG_PPC_SUBPAGE_PROT=n, in which case it returns ENOSYS because the syscall is missing entirely. So check explicitly for ENOENT and ENOSYS and skip if we see either of those. Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/mm/subpage_prot.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c index 35ade7406dcd..3ae77ba93208 100644 --- a/tools/testing/selftests/powerpc/mm/subpage_prot.c +++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c @@ -135,6 +135,16 @@ static int run_test(void *addr, unsigned long size) return 0; } +static int syscall_available(void) +{ + int rc; + + errno = 0; + rc = syscall(__NR_subpage_prot, 0, 0, 0); + + return rc == 0 || (errno != ENOENT && errno != ENOSYS); +} + int test_anon(void) { unsigned long align; @@ -145,6 +155,8 @@ int test_anon(void) void *mallocblock; unsigned long mallocsize; + SKIP_IF(!syscall_available()); + if (getpagesize() != 0x10000) { fprintf(stderr, "Kernel page size must be 64K!\n"); return 1; @@ -180,6 +192,8 @@ int test_file(void) off_t filesize; int fd; + SKIP_IF(!syscall_available()); + fd = open(file_name, O_RDWR); if (fd == -1) { perror("failed to open file"); From 07c5ccd70ad702e561fcda8e4df494f098a42742 Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Thu, 22 Feb 2018 15:17:38 +1100 Subject: [PATCH 05/15] ocxl: Add get_metadata IOCTL to share OCXL information to userspace Some required information is not exposed to userspace currently (eg. the PASID), pass this information back, along with other information which is currently communicated via sysfs, which saves some parsing effort in userspace. Signed-off-by: Alastair D'Silva Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman --- drivers/misc/ocxl/file.c | 27 +++++++++++++++++++++++++++ include/uapi/misc/ocxl.h | 17 +++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c index 337462e1569f..038509e5d031 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -102,10 +102,32 @@ static long afu_ioctl_attach(struct ocxl_context *ctx, return rc; } +static long afu_ioctl_get_metadata(struct ocxl_context *ctx, + struct ocxl_ioctl_metadata __user *uarg) +{ + struct ocxl_ioctl_metadata arg; + + memset(&arg, 0, sizeof(arg)); + + arg.version = 0; + + arg.afu_version_major = ctx->afu->config.version_major; + arg.afu_version_minor = ctx->afu->config.version_minor; + arg.pasid = ctx->pasid; + arg.pp_mmio_size = ctx->afu->config.pp_mmio_stride; + arg.global_mmio_size = ctx->afu->config.global_mmio_size; + + if (copy_to_user(uarg, &arg, sizeof(arg))) + return -EFAULT; + + return 0; +} + #define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" : \ x == OCXL_IOCTL_IRQ_ALLOC ? "IRQ_ALLOC" : \ x == OCXL_IOCTL_IRQ_FREE ? "IRQ_FREE" : \ x == OCXL_IOCTL_IRQ_SET_FD ? "IRQ_SET_FD" : \ + x == OCXL_IOCTL_GET_METADATA ? "GET_METADATA" : \ "UNKNOWN") static long afu_ioctl(struct file *file, unsigned int cmd, @@ -159,6 +181,11 @@ static long afu_ioctl(struct file *file, unsigned int cmd, irq_fd.eventfd); break; + case OCXL_IOCTL_GET_METADATA: + rc = afu_ioctl_get_metadata(ctx, + (struct ocxl_ioctl_metadata __user *) args); + break; + default: rc = -EINVAL; } diff --git a/include/uapi/misc/ocxl.h b/include/uapi/misc/ocxl.h index 4b0b0b756f3e..0af83d80fb3e 100644 --- a/include/uapi/misc/ocxl.h +++ b/include/uapi/misc/ocxl.h @@ -32,6 +32,22 @@ struct ocxl_ioctl_attach { __u64 reserved3; }; +struct ocxl_ioctl_metadata { + __u16 version; // struct version, always backwards compatible + + // Version 0 fields + __u8 afu_version_major; + __u8 afu_version_minor; + __u32 pasid; // PASID assigned to the current context + + __u64 pp_mmio_size; // Per PASID MMIO size + __u64 global_mmio_size; + + // End version 0 fields + + __u64 reserved[13]; // Total of 16*u64 +}; + struct ocxl_ioctl_irq_fd { __u64 irq_offset; __s32 eventfd; @@ -45,5 +61,6 @@ struct ocxl_ioctl_irq_fd { #define OCXL_IOCTL_IRQ_ALLOC _IOR(OCXL_MAGIC, 0x11, __u64) #define OCXL_IOCTL_IRQ_FREE _IOW(OCXL_MAGIC, 0x12, __u64) #define OCXL_IOCTL_IRQ_SET_FD _IOW(OCXL_MAGIC, 0x13, struct ocxl_ioctl_irq_fd) +#define OCXL_IOCTL_GET_METADATA _IOR(OCXL_MAGIC, 0x14, struct ocxl_ioctl_metadata) #endif /* _UAPI_MISC_OCXL_H */ From e7666d046ac0eda535282a5fd3b188f31d0f4afd Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Thu, 22 Feb 2018 15:17:39 +1100 Subject: [PATCH 06/15] ocxl: Document the OCXL_IOCTL_GET_METADATA IOCTL Signed-off-by: Alastair D'Silva Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman --- Documentation/accelerators/ocxl.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/accelerators/ocxl.rst b/Documentation/accelerators/ocxl.rst index 4f7af841d935..ddcc58d01cfb 100644 --- a/Documentation/accelerators/ocxl.rst +++ b/Documentation/accelerators/ocxl.rst @@ -152,6 +152,11 @@ OCXL_IOCTL_IRQ_SET_FD: Associate an event fd to an AFU interrupt so that the user process can be notified when the AFU sends an interrupt. +OCXL_IOCTL_GET_METADATA: + + Obtains configuration information from the card, such at the size of + MMIO areas, the AFU version, and the PASID for the current context. + mmap ---- From b0c41b8b6e43120d7c35e4709508a3d90a09646e Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Tue, 6 Mar 2018 13:44:32 +0530 Subject: [PATCH 07/15] powerpc/pseries: Fix vector5 in ibm architecture vector table With ibm,dynamic-memory-v2 and ibm,drc-info coming around the same time, byte22 in vector5 of ibm architecture vector table got set twice separately. The end result is that guest kernel isn't advertising support for ibm,dynamic-memory-v2. Fix this by removing the duplicate assignment of byte22. Fixes: 02ef6dd8109b ("powerpc: Enable support for ibm,drc-info devtree property") Signed-off-by: Bharata B Rao Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index d22c41c26bb3..acf4b2e0530c 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -874,7 +874,6 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .mmu = 0, .hash_ext = 0, .radix_ext = 0, - .byte22 = 0, }, /* option vector 6: IBM PAPR hints */ From e4b79900222b8cccd4da4a7a89581f0e1b764ed2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 13 Mar 2018 15:58:11 +1100 Subject: [PATCH 08/15] powerpc/64s: Fix NULL AT_BASE_PLATFORM when using DT CPU features When running virtualised the powerpc kernel is able to run the system in "compat mode" - which means the kernel and hardware are pretending to userspace that the CPU is an older version than it actually is. AT_BASE_PLATFORM is an AUXV entry that we export to userspace for use when we're running in that mode, which tells userspace the "platform" string for the real CPU version, as opposed to the faked version. Although we don't support compat mode when using DT CPU features, and arguably don't need to set AT_BASE_PLATFORM, the existing cputable based code always sets it even when we're running bare metal. That means the lack of AT_BASE_PLATFORM is a user-visible artifact of the fact that the kernel is using DT CPU features, which we don't want. So set it in the DT CPU features code also. This results in eg: $ LD_SHOW_AUXV=1 /bin/true | grep "AT_.*PLATFORM" AT_PLATFORM: power9 AT_BASE_PLATFORM:power9 Signed-off-by: Michael Ellerman Reviewed-by: Nicholas Piggin --- arch/powerpc/kernel/dt_cpu_ftrs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 945e2c29ad2d..0bcfb0f256e1 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -720,6 +720,9 @@ static void __init cpufeatures_setup_finished(void) cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE; } + /* Make sure powerpc_base_platform is non-NULL */ + powerpc_base_platform = cur_cpu_spec->platform; + system_registers.lpcr = mfspr(SPRN_LPCR); system_registers.hfscr = mfspr(SPRN_HFSCR); system_registers.fscr = mfspr(SPRN_FSCR); From ff6781fd1bb404d8a551c02c35c70cec1da17ff1 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 21 Mar 2018 12:22:28 +1000 Subject: [PATCH 09/15] powerpc/64s: Fix lost pending interrupt due to race causing lost update to irq_happened force_external_irq_replay() can be called in the do_IRQ path with interrupts hard enabled and soft disabled if may_hard_irq_enable() set MSR[EE]=1. It updates local_paca->irq_happened with a load, modify, store sequence. If a maskable interrupt hits during this sequence, it will go to the masked handler to be marked pending in irq_happened. This update will be lost when the interrupt returns and the store instruction executes. This can result in unpredictable latencies, timeouts, lockups, etc. Fix this by ensuring hard interrupts are disabled before modifying irq_happened. This could cause any maskable asynchronous interrupt to get lost, but it was noticed on P9 SMP system doing RDMA NVMe target over 100GbE, so very high external interrupt rate and high IPI rate. The hang was bisected down to enabling doorbell interrupts for IPIs. These provided an interrupt type that could run at high rates in the do_IRQ path, stressing the race. Fixes: 1d607bb3bd60 ("powerpc/irq: Add mechanism to force a replay of interrupts") Cc: stable@vger.kernel.org # v4.8+ Reported-by: Carol L. Soto Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/irq.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index f88038847790..061aa0f47bb1 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -476,6 +476,14 @@ void force_external_irq_replay(void) */ WARN_ON(!arch_irqs_disabled()); + /* + * Interrupts must always be hard disabled before irq_happened is + * modified (to prevent lost update in case of interrupt between + * load and store). + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + /* Indicate in the PACA that we have an interrupt to replay */ local_paca->irq_happened |= PACA_IRQ_EE; } From aff6f8cb3e2170b9e58b0932bce7bfb492775e23 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 23 Mar 2018 09:29:05 +1100 Subject: [PATCH 10/15] powerpc/mm: Add tracking of the number of coprocessors using a context Currently, when using coprocessors (which use the Nest MMU), we simply increment the active_cpu count to force all TLB invalidations to be come broadcast. Unfortunately, due to an errata in POWER9, we will need to know more specifically that coprocessors are in use. This maintains a separate copros counter in the MMU context for that purpose. NB. The commit mentioned in the fixes tag below is not at fault for the bug we're fixing in this commit and the next, but this fix applies on top the infrastructure it introduced. Fixes: 03b8abedf4f4 ("cxl: Enable global TLBIs for cxl contexts") Cc: stable@vger.kernel.org # v4.15+ Signed-off-by: Benjamin Herrenschmidt Tested-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu.h | 3 +++ arch/powerpc/include/asm/mmu_context.h | 18 +++++++++++++----- arch/powerpc/mm/mmu_context_book3s64.c | 1 + 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 0abeb0e2d616..37671feb2bf6 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -87,6 +87,9 @@ typedef struct { /* Number of bits in the mm_cpumask */ atomic_t active_cpus; + /* Number of users of the external (Nest) MMU */ + atomic_t copros; + /* NPU NMMU context */ struct npu_context *npu_context; diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 051b3d63afe3..3a15b6db9501 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -92,15 +92,23 @@ static inline void dec_mm_active_cpus(struct mm_struct *mm) static inline void mm_context_add_copro(struct mm_struct *mm) { /* - * On hash, should only be called once over the lifetime of - * the context, as we can't decrement the active cpus count - * and flush properly for the time being. + * If any copro is in use, increment the active CPU count + * in order to force TLB invalidations to be global as to + * propagate to the Nest MMU. */ - inc_mm_active_cpus(mm); + if (atomic_inc_return(&mm->context.copros) == 1) + inc_mm_active_cpus(mm); } static inline void mm_context_remove_copro(struct mm_struct *mm) { + int c; + + c = atomic_dec_if_positive(&mm->context.copros); + + /* Detect imbalance between add and remove */ + WARN_ON(c < 0); + /* * Need to broadcast a global flush of the full mm before * decrementing active_cpus count, as the next TLBI may be @@ -111,7 +119,7 @@ static inline void mm_context_remove_copro(struct mm_struct *mm) * for the time being. Invalidations will remain global if * used on hash. */ - if (radix_enabled()) { + if (c == 0 && radix_enabled()) { flush_all_mm(mm); dec_mm_active_cpus(mm); } diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 929d9ef7083f..3f980baade4c 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -173,6 +173,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) mm_iommu_init(mm); #endif atomic_set(&mm->context.active_cpus, 0); + atomic_set(&mm->context.copros, 0); return 0; } From 80a4ae202f2d319eced8bbf612a4e8b0f11c21f5 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 23 Mar 2018 09:29:06 +1100 Subject: [PATCH 11/15] powerpc/mm: Workaround Nest MMU bug with TLB invalidations On POWER9 the Nest MMU may fail to invalidate some translations when doing a tlbie "by PID" or "by LPID" that is targeted at the TLB only and not the page walk cache. This works around it by forcing such invalidations to escalate to RIC=2 (full invalidation of TLB *and* PWC) when a coprocessor is in use for the context. Fixes: 03b8abedf4f4 ("cxl: Enable global TLBIs for cxl contexts") Cc: stable@vger.kernel.org # v4.15+ Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Balbir Singh [balbirs: fixed spelling and coding style to quiesce checkpatch.pl] Tested-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/mm/tlb-radix.c | 50 +++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 71d1b19ad1c0..8e4c6cb4a808 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -151,7 +151,23 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) static inline void _tlbie_pid(unsigned long pid, unsigned long ric) { asm volatile("ptesync": : :"memory"); - __tlbie_pid(pid, ric); + + /* + * Workaround the fact that the "ric" argument to __tlbie_pid + * must be a compile-time contraint to match the "i" constraint + * in the asm statement. + */ + switch (ric) { + case RIC_FLUSH_TLB: + __tlbie_pid(pid, RIC_FLUSH_TLB); + break; + case RIC_FLUSH_PWC: + __tlbie_pid(pid, RIC_FLUSH_PWC); + break; + case RIC_FLUSH_ALL: + default: + __tlbie_pid(pid, RIC_FLUSH_ALL); + } asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -311,6 +327,16 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd } EXPORT_SYMBOL(radix__local_flush_tlb_page); +static bool mm_needs_flush_escalation(struct mm_struct *mm) +{ + /* + * P9 nest MMU has issues with the page walk cache + * caching PTEs and not flushing them properly when + * RIC = 0 for a PID/LPID invalidate + */ + return atomic_read(&mm->context.copros) != 0; +} + #ifdef CONFIG_SMP void radix__flush_tlb_mm(struct mm_struct *mm) { @@ -321,9 +347,12 @@ void radix__flush_tlb_mm(struct mm_struct *mm) return; preempt_disable(); - if (!mm_is_thread_local(mm)) - _tlbie_pid(pid, RIC_FLUSH_TLB); - else + if (!mm_is_thread_local(mm)) { + if (mm_needs_flush_escalation(mm)) + _tlbie_pid(pid, RIC_FLUSH_ALL); + else + _tlbie_pid(pid, RIC_FLUSH_TLB); + } else _tlbiel_pid(pid, RIC_FLUSH_TLB); preempt_enable(); } @@ -435,10 +464,14 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, } if (full) { - if (local) + if (local) { _tlbiel_pid(pid, RIC_FLUSH_TLB); - else - _tlbie_pid(pid, RIC_FLUSH_TLB); + } else { + if (mm_needs_flush_escalation(mm)) + _tlbie_pid(pid, RIC_FLUSH_ALL); + else + _tlbie_pid(pid, RIC_FLUSH_TLB); + } } else { bool hflush = false; unsigned long hstart, hend; @@ -548,6 +581,9 @@ static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, } if (full) { + if (!local && mm_needs_flush_escalation(mm)) + also_pwc = true; + if (local) _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); else From 99491e2d0e50c53a0620dfec340b249d08f5968f Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 23 Mar 2018 10:26:25 +0530 Subject: [PATCH 12/15] powerpc/mm/radix: Remove unused code These function are not used in the code. Remove them. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- .../include/asm/book3s/64/tlbflush-radix.h | 3 -- arch/powerpc/mm/tlb-radix.c | 40 ------------------- 2 files changed, 43 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 8eea90f80e45..19b45ba6caf9 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -47,9 +47,6 @@ extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmad #endif extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr); -extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, - unsigned long page_size); -extern void radix__flush_tlb_lpid(unsigned long lpid); extern void radix__flush_tlb_all(void); extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, unsigned long address); diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 8e4c6cb4a808..e47ee8c867c5 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -639,46 +639,6 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, - unsigned long page_size) -{ - unsigned long rb,rs,prs,r; - unsigned long ap; - unsigned long ric = RIC_FLUSH_TLB; - - ap = mmu_get_ap(radix_get_mmu_psize(page_size)); - rb = gpa & ~(PPC_BITMASK(52, 63)); - rb |= ap << PPC_BITLSHIFT(58); - rs = lpid & ((1UL << 32) - 1); - prs = 0; /* process scoped */ - r = 1; /* raidx format */ - - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - asm volatile("eieio; tlbsync; ptesync": : :"memory"); - trace_tlbie(lpid, 0, rb, rs, ric, prs, r); -} -EXPORT_SYMBOL(radix__flush_tlb_lpid_va); - -void radix__flush_tlb_lpid(unsigned long lpid) -{ - unsigned long rb,rs,prs,r; - unsigned long ric = RIC_FLUSH_ALL; - - rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */ - rs = lpid & ((1UL << 32) - 1); - prs = 0; /* partition scoped */ - r = 1; /* raidx format */ - - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - asm volatile("eieio; tlbsync; ptesync": : :"memory"); - trace_tlbie(lpid, 0, rb, rs, ric, prs, r); -} -EXPORT_SYMBOL(radix__flush_tlb_lpid); - void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { From 243fee3249ff78e5f7ab822139dc89719def82d2 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 23 Mar 2018 10:26:26 +0530 Subject: [PATCH 13/15] powerpc/mm/radix: Move the functions that does the actual tlbie closer No functionality change. Just code movement to ease code changes later Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/tlb-radix.c | 64 ++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index e47ee8c867c5..74354c26d316 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -119,6 +119,38 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric) trace_tlbie(0, 0, rb, rs, ric, prs, r); } +static inline void __tlbiel_va(unsigned long va, unsigned long pid, + unsigned long ap, unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = va & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = pid << PPC_BITLSHIFT(31); + prs = 1; /* process scoped */ + r = 1; /* raidx format */ + + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 1, rb, rs, ric, prs, r); +} + +static inline void __tlbie_va(unsigned long va, unsigned long pid, + unsigned long ap, unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = va & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = pid << PPC_BITLSHIFT(31); + prs = 1; /* process scoped */ + r = 1; /* raidx format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); +} + /* * We use 128 set in radix mode and 256 set in hpt mode. */ @@ -171,22 +203,6 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) asm volatile("eieio; tlbsync; ptesync": : :"memory"); } -static inline void __tlbiel_va(unsigned long va, unsigned long pid, - unsigned long ap, unsigned long ric) -{ - unsigned long rb,rs,prs,r; - - rb = va & ~(PPC_BITMASK(52, 63)); - rb |= ap << PPC_BITLSHIFT(58); - rs = pid << PPC_BITLSHIFT(31); - prs = 1; /* process scoped */ - r = 1; /* raidx format */ - - asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - trace_tlbie(0, 1, rb, rs, ric, prs, r); -} - static inline void __tlbiel_va_range(unsigned long start, unsigned long end, unsigned long pid, unsigned long page_size, unsigned long psize) @@ -219,22 +235,6 @@ static inline void _tlbiel_va_range(unsigned long start, unsigned long end, asm volatile("ptesync": : :"memory"); } -static inline void __tlbie_va(unsigned long va, unsigned long pid, - unsigned long ap, unsigned long ric) -{ - unsigned long rb,rs,prs,r; - - rb = va & ~(PPC_BITMASK(52, 63)); - rb |= ap << PPC_BITLSHIFT(58); - rs = pid << PPC_BITLSHIFT(31); - prs = 1; /* process scoped */ - r = 1; /* raidx format */ - - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - trace_tlbie(0, 0, rb, rs, ric, prs, r); -} - static inline void __tlbie_va_range(unsigned long start, unsigned long end, unsigned long pid, unsigned long page_size, unsigned long psize) From a5d4b5891c2f1f865a2def1eb0030f534e77ff86 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 23 Mar 2018 10:26:27 +0530 Subject: [PATCH 14/15] powerpc/mm: Fixup tlbie vs store ordering issue on POWER9 On POWER9, under some circumstances, a broadcast TLB invalidation might complete before all previous stores have drained, potentially allowing stale stores from becoming visible after the invalidation. This works around it by doubling up those TLB invalidations which was verified by HW to be sufficient to close the risk window. This will be documented in a yet-to-be-published errata. Fixes: 1a472c9dba6b ("powerpc/mm/radix: Add tlbflush routines") Signed-off-by: Aneesh Kumar K.V [mpe: Enable the feature in the DT CPU features code for all Power9, rename the feature to CPU_FTR_P9_TLBIE_BUG per benh.] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cputable.h | 3 ++- arch/powerpc/kernel/dt_cpu_ftrs.c | 3 +++ arch/powerpc/kvm/book3s_64_mmu_radix.c | 3 +++ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 11 +++++++++++ arch/powerpc/mm/hash_native_64.c | 16 +++++++++++++++- arch/powerpc/mm/pgtable_64.c | 1 + arch/powerpc/mm/tlb-radix.c | 15 +++++++++++++++ 7 files changed, 50 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index a2c5c95882cf..2e2bacbdf6ed 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -203,6 +203,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000) #define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) +#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x2000000000000000) #define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000) #define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x8000000000000000) @@ -465,7 +466,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \ - CPU_FTR_PKEY) + CPU_FTR_PKEY | CPU_FTR_P9_TLBIE_BUG) #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ (~CPU_FTR_SAO)) #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9 diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 0bcfb0f256e1..8ca5d5b74618 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -709,6 +709,9 @@ static __init void cpufeatures_cpu_quirks(void) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1; else if ((version & 0xffffefff) == 0x004e0201) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; + + if ((version & 0xffff0000) == 0x004e0000) + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; } static void __init cpufeatures_setup_finished(void) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 0c854816e653..0837b9738d76 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -157,6 +157,9 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, asm volatile("ptesync": : :"memory"); asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) + asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) + : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); asm volatile("ptesync": : :"memory"); } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 8888e625a999..e1c083fbe434 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -473,6 +473,17 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, trace_tlbie(kvm->arch.lpid, 0, rbvalues[i], kvm->arch.lpid, 0, 0, 0); } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + /* + * Need the extra ptesync to make sure we don't + * re-order the tlbie + */ + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : + "r" (rbvalues[0]), "r" (kvm->arch.lpid)); + } + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); kvm->arch.tlbie_lock = 0; } else { diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index a0675e91ad7d..656933c85925 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -201,6 +201,15 @@ static inline unsigned long ___tlbie(unsigned long vpn, int psize, return va; } +static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize) +{ + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + /* Need the extra ptesync to ensure we don't reorder tlbie*/ + asm volatile("ptesync": : :"memory"); + ___tlbie(vpn, psize, apsize, ssize); + } +} + static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) { unsigned long rb; @@ -278,6 +287,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize, asm volatile("ptesync": : :"memory"); } else { __tlbie(vpn, psize, apsize, ssize); + fixup_tlbie(vpn, psize, apsize, ssize); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } if (lock_tlbie && !use_local) @@ -771,7 +781,7 @@ static void native_hpte_clear(void) */ static void native_flush_hash_range(unsigned long number, int local) { - unsigned long vpn; + unsigned long vpn = 0; unsigned long hash, index, hidx, shift, slot; struct hash_pte *hptep; unsigned long hpte_v; @@ -843,6 +853,10 @@ static void native_flush_hash_range(unsigned long number, int local) __tlbie(vpn, psize, psize, ssize); } pte_iterate_hashed_end(); } + /* + * Just do one more with the last used values. + */ + fixup_tlbie(vpn, psize, psize, ssize); asm volatile("eieio; tlbsync; ptesync":::"memory"); if (lock_tlbie) diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 28c980eb4422..adf469f312f2 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -481,6 +481,7 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0); } + /* do we need fixup here ?*/ asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 74354c26d316..a07f5372a4bf 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -151,6 +151,17 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid, trace_tlbie(0, 0, rb, rs, ric, prs, r); } +static inline void fixup_tlbie(void) +{ + unsigned long pid = 0; + unsigned long va = ((1UL << 52) - 1); + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); + } +} + /* * We use 128 set in radix mode and 256 set in hpt mode. */ @@ -200,6 +211,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) default: __tlbie_pid(pid, RIC_FLUSH_ALL); } + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -253,6 +265,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid, asm volatile("ptesync": : :"memory"); __tlbie_va(va, pid, ap, ric); + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -264,6 +277,7 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end, if (also_pwc) __tlbie_pid(pid, RIC_FLUSH_PWC); __tlbie_va_range(start, end, pid, page_size, psize); + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -498,6 +512,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, if (hflush) __tlbie_va_range(hstart, hend, pid, HPAGE_PMD_SIZE, MMU_PAGE_2M); + fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } } From 52396500f97c53860164debc7d4f759077853423 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 23 Mar 2018 15:53:38 +1000 Subject: [PATCH 15/15] powerpc/64s: Fix i-side SLB miss bad address handler saving nonvolatile GPRs The SLB bad address handler's trap number fixup does not preserve the low bit that indicates nonvolatile GPRs have not been saved. This leads save_nvgprs to skip saving them, and subsequent functions and return from interrupt will think they are saved. This causes kernel branch-to-garbage debugging to not have correct registers, can also cause userspace to have its registers clobbered after a segfault. Fixes: f0f558b131db ("powerpc/mm: Preserve CFAR value on SLB miss caused by access to bogus address") Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 3ac87e53b3da..1ecfd8ffb098 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -706,7 +706,7 @@ EXC_COMMON_BEGIN(bad_addr_slb) ld r3, PACA_EXSLB+EX_DAR(r13) std r3, _DAR(r1) beq cr6, 2f - li r10, 0x480 /* fix trap number for I-SLB miss */ + li r10, 0x481 /* fix trap number for I-SLB miss */ std r10, _TRAP(r1) 2: bl save_nvgprs addi r3, r1, STACK_FRAME_OVERHEAD