Merge branch 'fixes' into next

We have some dependencies & conflicts between patches in fixes and things to go in next, both in the radix TLB flush code and the IMC PMU driver. So merge fixes into next.
2017-11-10 20:55:03 +11:00 · 2017-11-10 20:55:03 +11:00 · a54c61f46e
parent 77fad8bfb1 7ecb37f62f
commit a54c61f46e
15 changed files with 125 additions and 60 deletions
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@ -83,16 +83,8 @@ static inline unsigned long ppc_function_entry(void *func)
 	 * On PPC64 ABIv1 the function pointer actually points to the
 	 * function's descriptor. The first entry in the descriptor is the
 	 * address of the function text.
-	 *
-	 * However, we may also receive pointer to an assembly symbol. To
-	 * detect that, we first check if the function pointer we receive
-	 * already points to kernel/module text and we only dereference it
-	 * if it doesn't.
 	 */
-	if (kernel_text_address((unsigned long)func))
-		return (unsigned long)func;
-	else
-		return ((func_descr_t *)func)->entry;
+	return ((func_descr_t *)func)->entry;
 #else
 	return (unsigned long)func;
 #endif
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@ -332,7 +332,7 @@ int fix_alignment(struct pt_regs *regs)
 	 * when pasting to a co-processor. Furthermore, paste_last is the
 	 * synchronisation point for preceding copy/paste sequences.
 	 */
-	if ((instr & 0xfc0006fe) == PPC_INST_COPY)
+	if ((instr & 0xfc0006fe) == (PPC_INST_COPY & 0xfc0006fe))
 		return -EIO;

 	r = analyse_instr(&op, regs, instr);
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@ -102,10 +102,10 @@ static void cpufeatures_flush_tlb(void)
 	case PVR_POWER8:
 	case PVR_POWER8E:
 	case PVR_POWER8NVL:
-		__flush_tlb_power8(POWER8_TLB_SETS);
+		__flush_tlb_power8(TLB_INVAL_SCOPE_GLOBAL);
 		break;
 	case PVR_POWER9:
-		__flush_tlb_power9(POWER9_TLB_SETS_HASH);
+		__flush_tlb_power9(TLB_INVAL_SCOPE_GLOBAL);
 		break;
 	default:
 		pr_err("unknown CPU version for boot TLB flush\n");
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@ -605,7 +605,12 @@ NOKPROBE_SYMBOL(kprobe_fault_handler);

 unsigned long arch_deref_entry_point(void *entry)
 {
-	return ppc_global_function_entry(entry);
+#ifdef PPC64_ELF_ABI_v1
+	if (!kernel_text_address((unsigned long)entry))
+		return ppc_global_function_entry(entry);
+	else
+#endif
+		return (unsigned long)entry;
 }
 NOKPROBE_SYMBOL(arch_deref_entry_point);

--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@ -717,5 +717,18 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs)

 long __machine_check_early_realmode_p9(struct pt_regs *regs)
 {
+	/*
+	 * On POWER9 DD2.1 and below, it's possible to get a machine check
+	 * caused by a paste instruction where only DSISR bit 25 is set. This
+	 * will result in the MCE handler seeing an unknown event and the kernel
+	 * crashing. An MCE that occurs like this is spurious, so we don't need
+	 * to do anything in terms of servicing it. If there is something that
+	 * needs to be serviced, the CPU will raise the MCE again with the
+	 * correct DSISR so that it can be serviced properly. So detect this
+	 * case and mark it as handled.
+	 */
+	if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000)
+		return 1;
+
 	return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table);
 }
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@ -904,9 +904,6 @@ void __init setup_arch(char **cmdline_p)
 #endif
 #endif

-#ifdef CONFIG_PPC_64K_PAGES
-	init_mm.context.pte_frag = NULL;
-#endif
 #ifdef CONFIG_SPAPR_TCE_IOMMU
 	mm_iommu_init(&init_mm);
 #endif
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@ -181,34 +181,25 @@ _GLOBAL(ftrace_stub)
 	 *  - we have no stack frame and can not allocate one
 	 *  - LR points back to the original caller (in A)
 	 *  - CTR holds the new NIP in C
-	 *  - r0 & r12 are free
-	 *
-	 * r0 can't be used as the base register for a DS-form load or store, so
-	 * we temporarily shuffle r1 (stack pointer) into r0 and then put it back.
+	 *  - r0, r11 & r12 are free
 	 */
 livepatch_handler:
 	CURRENT_THREAD_INFO(r12, r1)

-	/* Save stack pointer into r0 */
-	mr	r0, r1
-
 	/* Allocate 3 x 8 bytes */
-	ld	r1, TI_livepatch_sp(r12)
-	addi	r1, r1, 24
-	std	r1, TI_livepatch_sp(r12)
+	ld	r11, TI_livepatch_sp(r12)
+	addi	r11, r11, 24
+	std	r11, TI_livepatch_sp(r12)

 	/* Save toc & real LR on livepatch stack */
-	std	r2,  -24(r1)
+	std	r2,  -24(r11)
 	mflr	r12
-	std	r12, -16(r1)
+	std	r12, -16(r11)

 	/* Store stack end marker */
 	lis     r12, STACK_END_MAGIC@h
 	ori     r12, r12, STACK_END_MAGIC@l
-	std	r12, -8(r1)
-
-	/* Restore real stack pointer */
-	mr	r1, r0
+	std	r12, -8(r11)

 	/* Put ctr in r12 for global entry and branch there */
 	mfctr	r12
@ -216,36 +207,30 @@ livepatch_handler:

 	/*
 	 * Now we are returning from the patched function to the original
-	 * caller A. We are free to use r0 and r12, and we can use r2 until we
+	 * caller A. We are free to use r11, r12 and we can use r2 until we
 	 * restore it.
 	 */

 	CURRENT_THREAD_INFO(r12, r1)

-	/* Save stack pointer into r0 */
-	mr	r0, r1
-
-	ld	r1, TI_livepatch_sp(r12)
+	ld	r11, TI_livepatch_sp(r12)

 	/* Check stack marker hasn't been trashed */
 	lis     r2,  STACK_END_MAGIC@h
 	ori     r2,  r2, STACK_END_MAGIC@l
-	ld	r12, -8(r1)
+	ld	r12, -8(r11)
 1:	tdne	r12, r2
 	EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0

 	/* Restore LR & toc from livepatch stack */
-	ld	r12, -16(r1)
+	ld	r12, -16(r11)
 	mtlr	r12
-	ld	r2,  -24(r1)
+	ld	r2,  -24(r11)

 	/* Pop livepatch stack frame */
-	CURRENT_THREAD_INFO(r12, r0)
-	subi	r1, r1, 24
-	std	r1, TI_livepatch_sp(r12)
-
-	/* Restore real stack pointer */
-	mr	r1, r0
+	CURRENT_THREAD_INFO(r12, r1)
+	subi	r11, r11, 24
+	std	r11, TI_livepatch_sp(r12)

 	/* Return to original caller of live patched function */
 	blr
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@ -1699,11 +1699,13 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 * Logical instructions
 */
 		case 26:	/* cntlzw */
-			op->val = __builtin_clz((unsigned int) regs->gpr[rd]);
+			val = (unsigned int) regs->gpr[rd];
+			op->val = ( val ? __builtin_clz(val) : 32 );
 			goto logical_done;
 #ifdef __powerpc64__
 		case 58:	/* cntlzd */
-			op->val = __builtin_clzl(regs->gpr[rd]);
+			val = regs->gpr[rd];
+			op->val = ( val ? __builtin_clzl(val) : 64 );
 			goto logical_done;
 #endif
 		case 28:	/* and */
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@ -1477,7 +1477,6 @@ out:

 int arch_update_cpu_topology(void)
 {
-	lockdep_assert_cpus_held();
 	return numa_update_cpu_topology(true);
 }

--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@ -361,9 +361,9 @@ static int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 			break;
 	}
 	wmb();
+	local_irq_restore(flags);
 	flush_tlb_kernel_range((unsigned long)page_address(start),
 			       (unsigned long)page_address(page));
-	local_irq_restore(flags);
 	return err;
 }

--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@ -399,6 +399,20 @@ static void nest_imc_counters_release(struct perf_event *event)

 	/* Take the mutex lock for this node and then decrement the reference count */
 	mutex_lock(&ref->lock);
+	if (ref->refc == 0) {
+		/*
+		 * The scenario where this is true is, when perf session is
+		 * started, followed by offlining of all cpus in a given node.
+		 *
+		 * In the cpuhotplug offline path, ppc_nest_imc_cpu_offline()
+		 * function set the ref->count to zero, if the cpu which is
+		 * about to offline is the last cpu in a given node and make
+		 * an OPAL call to disable the engine in that node.
+		 *
+		 */
+		mutex_unlock(&ref->lock);
+		return;
+	}
 	ref->refc--;
 	if (ref->refc == 0) {
 		rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
@ -523,8 +537,8 @@ static int core_imc_mem_init(int cpu, int size)

 	/* We need only vbase for core counters */
 	mem_info->vbase = page_address(alloc_pages_node(phys_id,
-					  GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
-					  get_order(size)));
+					  GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+					  __GFP_NOWARN, get_order(size)));
 	if (!mem_info->vbase)
 		return -ENOMEM;

@ -593,6 +607,20 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
 	if (!cpumask_test_and_clear_cpu(cpu, &core_imc_cpumask))
 		return 0;

+	/*
+	 * Check whether core_imc is registered. We could end up here
+	 * if the cpuhotplug callback registration fails. i.e, callback
+	 * invokes the offline path for all sucessfully registered cpus.
+	 * At this stage, core_imc pmu will not be registered and we
+	 * should return here.
+	 *
+	 * We return with a zero since this is not an offline failure.
+	 * And cpuhp_setup_state() returns the actual failure reason
+	 * to the caller, which inturn will call the cleanup routine.
+	 */
+	if (!core_imc_pmu->pmu.event_init)
+		return 0;
+
 	/* Find any online cpu in that core except the current "cpu" */
 	ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);

@ -646,6 +674,20 @@ static void core_imc_counters_release(struct perf_event *event)
 		return;

 	mutex_lock(&ref->lock);
+	if (ref->refc == 0) {
+		/*
+		 * The scenario where this is true is, when perf session is
+		 * started, followed by offlining of all cpus in a given core.
+		 *
+		 * In the cpuhotplug offline path, ppc_core_imc_cpu_offline()
+		 * function set the ref->count to zero, if the cpu which is
+		 * about to offline is the last cpu in a given core and make
+		 * an OPAL call to disable the engine in that core.
+		 *
+		 */
+		mutex_unlock(&ref->lock);
+		return;
+	}
 	ref->refc--;
 	if (ref->refc == 0) {
 		rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
@ -763,8 +805,8 @@ static int thread_imc_mem_alloc(int cpu_id, int size)
 		 * free the memory in cpu offline path.
 		 */
 		local_mem = page_address(alloc_pages_node(phys_id,
-				  GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
-				  get_order(size)));
+				  GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+				  __GFP_NOWARN, get_order(size)));
 		if (!local_mem)
 			return -ENOMEM;

@ -1076,7 +1118,7 @@ static int init_nest_pmu_ref(void)

 static void cleanup_all_core_imc_memory(void)
 {
-	int i, nr_cores = num_present_cpus() / threads_per_core;
+	int i, nr_cores = DIV_ROUND_UP(num_present_cpus(), threads_per_core);
 	struct imc_mem_info *ptr = core_imc_pmu->mem_info;
 	int size = core_imc_pmu->counter_mem_size;

@ -1148,7 +1190,8 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
 	}

 	/* Only free the attr_groups which are dynamically allocated  */
-	kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
+	if (pmu_ptr->attr_groups[IMC_EVENT_ATTR])
+		kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
 	kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
 	kfree(pmu_ptr);
 	return;
@ -1183,7 +1226,7 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
 		if (!pmu_ptr->pmu.name)
 			return -ENOMEM;

-		nr_cores = num_present_cpus() / threads_per_core;
+		nr_cores = DIV_ROUND_UP(num_present_cpus(), threads_per_core);
 		pmu_ptr->mem_info = kcalloc(nr_cores, sizeof(struct imc_mem_info),
 								GFP_KERNEL);

--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@ -273,7 +273,15 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 static unsigned long pnv_memory_block_size(void)
 {
-	return 256UL * 1024 * 1024;
+	/*
+	 * We map the kernel linear region with 1GB large pages on radix. For
+	 * memory hot unplug to work our memory block size must be at least
+	 * this size.
+	 */
+	if (radix_enabled())
+		return 1UL * 1024 * 1024 * 1024;
+	else
+		return 256UL * 1024 * 1024;
 }
 #endif

--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@ -1402,6 +1402,14 @@ void xive_teardown_cpu(void)

 	if (xive_ops->teardown_cpu)
 		xive_ops->teardown_cpu(cpu, xc);
+
+#ifdef CONFIG_SMP
+	/* Get rid of IPI */
+	xive_cleanup_cpu_ipi(cpu, xc);
+#endif
+
+	/* Disable and free the queues */
+	xive_cleanup_cpu_queues(cpu, xc);
 }

 void xive_kexec_teardown_cpu(int secondary)
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@ -431,7 +431,11 @@ static int xive_spapr_get_ipi(unsigned int cpu, struct xive_cpu *xc)

 static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc)
 {
+	if (!xc->hw_ipi)
+		return;
+
 	xive_irq_bitmap_free(xc->hw_ipi);
+	xc->hw_ipi = 0;
 }
 #endif /* CONFIG_SMP */

--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@ -219,8 +219,17 @@ int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)

 	down_read(&mm->mmap_sem);

-	for (dar = addr; dar < addr + size; dar += page_size) {
-		if (!vma || dar < vma->vm_start || dar > vma->vm_end) {
+	vma = find_vma(mm, addr);
+	if (!vma) {
+		pr_err("Can't find vma for addr %016llx\n", addr);
+		rc = -EFAULT;
+		goto out;
+	}
+	/* get the size of the pages allocated */
+	page_size = vma_kernel_pagesize(vma);
+
+	for (dar = (addr & ~(page_size - 1)); dar < (addr + size); dar += page_size) {
+		if (dar < vma->vm_start || dar >= vma->vm_end) {
 			vma = find_vma(mm, addr);
 			if (!vma) {
 				pr_err("Can't find vma for addr %016llx\n", addr);