linux-stable/arch/powerpc/kernel/setup-common.c

1023 lines
25 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Common boot and setup code for both 32-bit and 64-bit.
* Extracted from arch/powerpc/kernel/setup_64.c.
*
* Copyright (C) 2001 PPC64 Team, IBM Corp
*/
#undef DEBUG
#include <linux/export.h>
kernel.h: split out panic and oops helpers kernel.h is being used as a dump for all kinds of stuff for a long time. Here is the attempt to start cleaning it up by splitting out panic and oops helpers. There are several purposes of doing this: - dropping dependency in bug.h - dropping a loop by moving out panic_notifier.h - unload kernel.h from something which has its own domain At the same time convert users tree-wide to use new headers, although for the time being include new header back to kernel.h to avoid twisted indirected includes for existing users. [akpm@linux-foundation.org: thread_info.h needs limits.h] [andriy.shevchenko@linux.intel.com: ia64 fix] Link: https://lkml.kernel.org/r/20210520130557.55277-1-andriy.shevchenko@linux.intel.com Link: https://lkml.kernel.org/r/20210511074137.33666-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org> Co-developed-by: Andrew Morton <akpm@linux-foundation.org> Acked-by: Mike Rapoport <rppt@linux.ibm.com> Acked-by: Corey Minyard <cminyard@mvista.com> Acked-by: Christian Brauner <christian.brauner@ubuntu.com> Acked-by: Arnd Bergmann <arnd@arndb.de> Acked-by: Kees Cook <keescook@chromium.org> Acked-by: Wei Liu <wei.liu@kernel.org> Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Acked-by: Sebastian Reichel <sre@kernel.org> Acked-by: Luis Chamberlain <mcgrof@kernel.org> Acked-by: Stephen Boyd <sboyd@kernel.org> Acked-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Acked-by: Helge Deller <deller@gmx.de> # parisc Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2021-07-01 01:54:59 +00:00
#include <linux/panic_notifier.h>
#include <linux/string.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/reboot.h>
#include <linux/delay.h>
#include <linux/initrd.h>
#include <linux/platform_device.h>
#include <linux/printk.h>
#include <linux/seq_file.h>
#include <linux/ioport.h>
#include <linux/console.h>
#include <linux/root_dev.h>
#include <linux/cpu.h>
#include <linux/unistd.h>
#include <linux/seq_buf.h>
#include <linux/serial.h>
#include <linux/serial_8250.h>
#include <linux/percpu.h>
#include <linux/memblock.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/of_irq.h>
#include <linux/hugetlb.h>
mm: reorder includes after introduction of linux/pgtable.h The replacement of <asm/pgrable.h> with <linux/pgtable.h> made the include of the latter in the middle of asm includes. Fix this up with the aid of the below script and manual adjustments here and there. import sys import re if len(sys.argv) is not 3: print "USAGE: %s <file> <header>" % (sys.argv[0]) sys.exit(1) hdr_to_move="#include <linux/%s>" % sys.argv[2] moved = False in_hdrs = False with open(sys.argv[1], "r") as f: lines = f.readlines() for _line in lines: line = _line.rstrip(' ') if line == hdr_to_move: continue if line.startswith("#include <linux/"): in_hdrs = True elif not moved and in_hdrs: moved = True print hdr_to_move print line Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Cain <bcain@codeaurora.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chris Zankel <chris@zankel.net> Cc: "David S. Miller" <davem@davemloft.net> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Greentime Hu <green.hu@gmail.com> Cc: Greg Ungerer <gerg@linux-m68k.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Guo Ren <guoren@kernel.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Helge Deller <deller@gmx.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: Ley Foon Tan <ley.foon.tan@intel.com> Cc: Mark Salter <msalter@redhat.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Matt Turner <mattst88@gmail.com> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Simek <monstr@monstr.eu> Cc: Nick Hu <nickhu@andestech.com> Cc: Paul Walmsley <paul.walmsley@sifive.com> Cc: Richard Weinberger <richard@nod.at> Cc: Rich Felker <dalias@libc.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Stafford Horne <shorne@gmail.com> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Cc: Vincent Chen <deanbo422@gmail.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Will Deacon <will@kernel.org> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Link: http://lkml.kernel.org/r/20200514170327.31389-4-rppt@kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-06-09 04:32:42 +00:00
#include <linux/pgtable.h>
#include <asm/io.h>
#include <asm/paca.h>
#include <asm/processor.h>
#include <asm/vdso_datapage.h>
#include <asm/smp.h>
#include <asm/elf.h>
#include <asm/machdep.h>
#include <asm/time.h>
#include <asm/cputable.h>
#include <asm/sections.h>
#include <asm/firmware.h>
#include <asm/btext.h>
#include <asm/nvram.h>
#include <asm/setup.h>
#include <asm/rtas.h>
#include <asm/iommu.h>
#include <asm/serial.h>
#include <asm/cache.h>
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/xmon.h>
#include <asm/cputhreads.h>
#include <mm/mmu_decl.h>
#include <asm/archrandom.h>
#include <asm/fadump.h>
#include <asm/udbg.h>
#include <asm/hugetlb.h>
#include <asm/livepatch.h>
#include <asm/mmu_context.h>
#include <asm/cpu_has_feature.h>
#include <asm/kasan.h>
#include <asm/mce.h>
#include "setup.h"
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
#else
#define DBG(fmt...)
#endif
/* The main machine-dep calls structure
*/
struct machdep_calls ppc_md;
EXPORT_SYMBOL(ppc_md);
struct machdep_calls *machine_id;
EXPORT_SYMBOL(machine_id);
int boot_cpuid = -1;
EXPORT_SYMBOL_GPL(boot_cpuid);
int __initdata boot_core_hwid = -1;
powerpc/64: Move paca allocation to early_setup() The early paca and boot cpuid dance is complicated and currently does not quite work as expected for boot cpuid != 0 cases. early_init_devtree() currently allocates the paca_ptrs and boot cpuid paca, but until that returns and early_setup() calls setup_paca(), this thread is currently still executing with smp_processor_id() == 0. One problem this causes is the paca_ptrs[smp_processor_id()] pointer is poisoned, so valid_emergency_stack() (any backtrace) and any similar users will crash. Another is that the hardware id which is set here will not be returned by get_hard_smp_processor_id(smp_processor_id()), but it would work correctly for boot_cpuid == 0, which could lead to difficult to reproduce or find bugs. The hard id does not seem to be used by the rest of early_init_devtree(), it just looks like all this code might have been put here to allocate somewhere to store boot CPU hardware id while scanning the devtree. Rearrange things so the hwid is put in a global variable like boot_cpuid, and do all the paca allocation and boot paca setup in the 64-bit early_setup() after we have everything ready to go. The paca_ptrs[0] re-poisoning code in early_setup does not seem to have ever worked, because paca_ptrs[0] was never not-poisoned when boot_cpuid is not 0. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> [mpe: Fix build error on 32-bit] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20221216115930.2667772-4-npiggin@gmail.com
2022-12-16 11:59:29 +00:00
#ifdef CONFIG_PPC64
int boot_cpu_hwid = -1;
#endif
/*
* These are used in binfmt_elf.c to put aux entries on the stack
* for each elf executable being started.
*/
int dcache_bsize;
int icache_bsize;
/* Variables required to store legacy IO irq routing */
int of_i8042_kbd_irq;
EXPORT_SYMBOL_GPL(of_i8042_kbd_irq);
int of_i8042_aux_irq;
EXPORT_SYMBOL_GPL(of_i8042_aux_irq);
#ifdef __DO_IRQ_CANON
/* XXX should go elsewhere eventually */
int ppc_do_canonicalize_irqs;
EXPORT_SYMBOL(ppc_do_canonicalize_irqs);
#endif
#ifdef CONFIG_CRASH_DUMP
/* This keeps a track of which one is the crashing cpu. */
int crashing_cpu = -1;
#endif
/* also used by kexec */
void machine_shutdown(void)
{
/*
* if fadump is active, cleanup the fadump registration before we
* shutdown.
*/
fadump_cleanup();
if (ppc_md.machine_shutdown)
ppc_md.machine_shutdown();
}
static void machine_hang(void)
{
pr_emerg("System Halted, OK to turn off power\n");
local_irq_disable();
while (1)
;
}
void machine_restart(char *cmd)
{
machine_shutdown();
if (ppc_md.restart)
ppc_md.restart(cmd);
smp_send_stop();
do_kernel_restart(cmd);
mdelay(1000);
machine_hang();
}
void machine_power_off(void)
{
machine_shutdown();
do_kernel_power_off();
smp_send_stop();
machine_hang();
}
/* Used by the G5 thermal driver */
EXPORT_SYMBOL_GPL(machine_power_off);
void (*pm_power_off)(void);
EXPORT_SYMBOL_GPL(pm_power_off);
Bitmap patches for v6.0-rc1 This branch consists of: Qu Wenruo: lib: bitmap: fix the duplicated comments on bitmap_to_arr64() https://lore.kernel.org/lkml/0d85e1dbad52ad7fb5787c4432bdb36cbd24f632.1656063005.git.wqu@suse.com/ Alexander Lobakin: bitops: let optimize out non-atomic bitops on compile-time constants https://lore.kernel.org/lkml/20220624121313.2382500-1-alexandr.lobakin@intel.com/T/ Yury Norov: lib: cleanup bitmap-related headers https://lore.kernel.org/linux-arm-kernel/YtCVeOGLiQ4gNPSf@yury-laptop/T/#m305522194c4d38edfdaffa71fcaaf2e2ca00a961 Alexander Lobakin: x86/olpc: fix 'logical not is only applied to the left hand side' https://www.spinics.net/lists/kernel/msg4440064.html Yury Norov: lib/nodemask: inline wrappers around bitmap https://lore.kernel.org/all/20220723214537.2054208-1-yury.norov@gmail.com/ -----BEGIN PGP SIGNATURE----- iQGzBAABCgAdFiEEi8GdvG6xMhdgpu/4sUSA/TofvsgFAmLpVvwACgkQsUSA/Tof vsiAHgwAwS9pl8GJ+fKYnue2CYo9349d2oT6BBUs/Rv8uqYEa4QkpYsR7NS733TG pos0hhoRvSOzrUP4qppXUjfJ+NkzLgpnKFOeWfFoNAKlHuaaMRvF3Y0Q/P8g0/Kg HPWcCQLHyCH9Wjs3e2TTgRjxTrHuruD2VJ401/PX/lw0DicUhmev5mUFa10uwFkP ZJRprjoFn9HJ0Hk16pFZDi36d3YumhACOcWRiJdoBDrEPV3S6lm9EeOy/yHBNp5k 9bKj+RboeT2t70KaZcKv+M5j1nu0cAhl7kRkjcxcmGyimI0l82Vgq9yFxhGqvWg8 RnCrJ5EaO08FGCAKG9GEwzdiNa24Gdq5XZSpQA7JZHmhmchpnnlNenJicyv0gOQi abChZeWSEsyA+78l2+kk9nezfVKUOnKDEZQxBVTOyWsmZYxHZV94oam340VjQDaY 4/fETdOy/qqPIxnpxAeFGWxZjcVaYiYPLj7KLPMsB0aAAF7pZrem465vSfgbrE81 +gCdqrWd =4dTW -----END PGP SIGNATURE----- Merge tag 'bitmap-6.0-rc1' of https://github.com/norov/linux Pull bitmap updates from Yury Norov: - fix the duplicated comments on bitmap_to_arr64() (Qu Wenruo) - optimize out non-atomic bitops on compile-time constants (Alexander Lobakin) - cleanup bitmap-related headers (Yury Norov) - x86/olpc: fix 'logical not is only applied to the left hand side' (Alexander Lobakin) - lib/nodemask: inline wrappers around bitmap (Yury Norov) * tag 'bitmap-6.0-rc1' of https://github.com/norov/linux: (26 commits) lib/nodemask: inline next_node_in() and node_random() powerpc: drop dependency on <asm/machdep.h> in archrandom.h x86/olpc: fix 'logical not is only applied to the left hand side' lib/cpumask: move some one-line wrappers to header file headers/deps: mm: align MANITAINERS and Docs with new gfp.h structure headers/deps: mm: Split <linux/gfp_types.h> out of <linux/gfp.h> headers/deps: mm: Optimize <linux/gfp.h> header dependencies lib/cpumask: move trivial wrappers around find_bit to the header lib/cpumask: change return types to unsigned where appropriate cpumask: change return types to bool where appropriate lib/bitmap: change type of bitmap_weight to unsigned long lib/bitmap: change return types to bool where appropriate arm: align find_bit declarations with generic kernel iommu/vt-d: avoid invalid memory access via node_online(NUMA_NO_NODE) lib/test_bitmap: test the tail after bitmap_to_arr64() lib/bitmap: fix off-by-one in bitmap_to_arr64() lib: test_bitmap: add compile-time optimization/evaluations assertions bitmap: don't assume compiler evaluates small mem*() builtins calls net/ice: fix initializing the bitmap in the switch code bitops: let optimize out non-atomic bitops on compile-time constants ...
2022-08-08 00:52:35 +00:00
size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs)
powerpc: drop dependency on <asm/machdep.h> in archrandom.h archrandom.h includes <asm/machdep.h> to refer ppc_md. This causes circular header dependency, if generic nodemask.h includes random.h: In file included from include/linux/cred.h:16, from include/linux/seq_file.h:13, from arch/powerpc/include/asm/machdep.h:6, from arch/powerpc/include/asm/archrandom.h:5, from include/linux/random.h:109, from include/linux/nodemask.h:97, from include/linux/list_lru.h:12, from include/linux/fs.h:13, from include/linux/compat.h:17, from arch/powerpc/kernel/asm-offsets.c:12: include/linux/sched.h:1203:9: error: unknown type name 'nodemask_t' 1203 | nodemask_t mems_allowed; | ^~~~~~~~~~ Fix it by removing <asm/machdep.h> dependency from archrandom.h Now as arch_get_random_seed_long() moved to c-file, and not exported, it's not available for modules. As Michael Ellerman says: I think we actually don't need it exported to modules, I think it's a private detail of the RNG <-> architecture interface, not something that modules should be calling. CC: Andy Shevchenko <andriy.shevchenko@linux.intel.com> CC: Benjamin Herrenschmidt <benh@kernel.crashing.org> CC: Michael Ellerman <mpe@ellerman.id.au> CC: Paul Mackerras <paulus@samba.org> CC: Rasmus Villemoes <linux@rasmusvillemoes.dk> CC: Stephen Rothwell <sfr@canb.auug.org.au> CC: linuxppc-dev@lists.ozlabs.org Suggested-by: Michael Ellerman <mpe@ellerman.id.au> (for non-exporting) Acked-by: Michael Ellerman <mpe@ellerman.id.au> Acked-by: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Yury Norov <yury.norov@gmail.com>
2022-07-23 21:45:36 +00:00
{
Bitmap patches for v6.0-rc1 This branch consists of: Qu Wenruo: lib: bitmap: fix the duplicated comments on bitmap_to_arr64() https://lore.kernel.org/lkml/0d85e1dbad52ad7fb5787c4432bdb36cbd24f632.1656063005.git.wqu@suse.com/ Alexander Lobakin: bitops: let optimize out non-atomic bitops on compile-time constants https://lore.kernel.org/lkml/20220624121313.2382500-1-alexandr.lobakin@intel.com/T/ Yury Norov: lib: cleanup bitmap-related headers https://lore.kernel.org/linux-arm-kernel/YtCVeOGLiQ4gNPSf@yury-laptop/T/#m305522194c4d38edfdaffa71fcaaf2e2ca00a961 Alexander Lobakin: x86/olpc: fix 'logical not is only applied to the left hand side' https://www.spinics.net/lists/kernel/msg4440064.html Yury Norov: lib/nodemask: inline wrappers around bitmap https://lore.kernel.org/all/20220723214537.2054208-1-yury.norov@gmail.com/ -----BEGIN PGP SIGNATURE----- iQGzBAABCgAdFiEEi8GdvG6xMhdgpu/4sUSA/TofvsgFAmLpVvwACgkQsUSA/Tof vsiAHgwAwS9pl8GJ+fKYnue2CYo9349d2oT6BBUs/Rv8uqYEa4QkpYsR7NS733TG pos0hhoRvSOzrUP4qppXUjfJ+NkzLgpnKFOeWfFoNAKlHuaaMRvF3Y0Q/P8g0/Kg HPWcCQLHyCH9Wjs3e2TTgRjxTrHuruD2VJ401/PX/lw0DicUhmev5mUFa10uwFkP ZJRprjoFn9HJ0Hk16pFZDi36d3YumhACOcWRiJdoBDrEPV3S6lm9EeOy/yHBNp5k 9bKj+RboeT2t70KaZcKv+M5j1nu0cAhl7kRkjcxcmGyimI0l82Vgq9yFxhGqvWg8 RnCrJ5EaO08FGCAKG9GEwzdiNa24Gdq5XZSpQA7JZHmhmchpnnlNenJicyv0gOQi abChZeWSEsyA+78l2+kk9nezfVKUOnKDEZQxBVTOyWsmZYxHZV94oam340VjQDaY 4/fETdOy/qqPIxnpxAeFGWxZjcVaYiYPLj7KLPMsB0aAAF7pZrem465vSfgbrE81 +gCdqrWd =4dTW -----END PGP SIGNATURE----- Merge tag 'bitmap-6.0-rc1' of https://github.com/norov/linux Pull bitmap updates from Yury Norov: - fix the duplicated comments on bitmap_to_arr64() (Qu Wenruo) - optimize out non-atomic bitops on compile-time constants (Alexander Lobakin) - cleanup bitmap-related headers (Yury Norov) - x86/olpc: fix 'logical not is only applied to the left hand side' (Alexander Lobakin) - lib/nodemask: inline wrappers around bitmap (Yury Norov) * tag 'bitmap-6.0-rc1' of https://github.com/norov/linux: (26 commits) lib/nodemask: inline next_node_in() and node_random() powerpc: drop dependency on <asm/machdep.h> in archrandom.h x86/olpc: fix 'logical not is only applied to the left hand side' lib/cpumask: move some one-line wrappers to header file headers/deps: mm: align MANITAINERS and Docs with new gfp.h structure headers/deps: mm: Split <linux/gfp_types.h> out of <linux/gfp.h> headers/deps: mm: Optimize <linux/gfp.h> header dependencies lib/cpumask: move trivial wrappers around find_bit to the header lib/cpumask: change return types to unsigned where appropriate cpumask: change return types to bool where appropriate lib/bitmap: change type of bitmap_weight to unsigned long lib/bitmap: change return types to bool where appropriate arm: align find_bit declarations with generic kernel iommu/vt-d: avoid invalid memory access via node_online(NUMA_NO_NODE) lib/test_bitmap: test the tail after bitmap_to_arr64() lib/bitmap: fix off-by-one in bitmap_to_arr64() lib: test_bitmap: add compile-time optimization/evaluations assertions bitmap: don't assume compiler evaluates small mem*() builtins calls net/ice: fix initializing the bitmap in the switch code bitops: let optimize out non-atomic bitops on compile-time constants ...
2022-08-08 00:52:35 +00:00
if (max_longs && ppc_md.get_random_seed && ppc_md.get_random_seed(v))
return 1;
return 0;
powerpc: drop dependency on <asm/machdep.h> in archrandom.h archrandom.h includes <asm/machdep.h> to refer ppc_md. This causes circular header dependency, if generic nodemask.h includes random.h: In file included from include/linux/cred.h:16, from include/linux/seq_file.h:13, from arch/powerpc/include/asm/machdep.h:6, from arch/powerpc/include/asm/archrandom.h:5, from include/linux/random.h:109, from include/linux/nodemask.h:97, from include/linux/list_lru.h:12, from include/linux/fs.h:13, from include/linux/compat.h:17, from arch/powerpc/kernel/asm-offsets.c:12: include/linux/sched.h:1203:9: error: unknown type name 'nodemask_t' 1203 | nodemask_t mems_allowed; | ^~~~~~~~~~ Fix it by removing <asm/machdep.h> dependency from archrandom.h Now as arch_get_random_seed_long() moved to c-file, and not exported, it's not available for modules. As Michael Ellerman says: I think we actually don't need it exported to modules, I think it's a private detail of the RNG <-> architecture interface, not something that modules should be calling. CC: Andy Shevchenko <andriy.shevchenko@linux.intel.com> CC: Benjamin Herrenschmidt <benh@kernel.crashing.org> CC: Michael Ellerman <mpe@ellerman.id.au> CC: Paul Mackerras <paulus@samba.org> CC: Rasmus Villemoes <linux@rasmusvillemoes.dk> CC: Stephen Rothwell <sfr@canb.auug.org.au> CC: linuxppc-dev@lists.ozlabs.org Suggested-by: Michael Ellerman <mpe@ellerman.id.au> (for non-exporting) Acked-by: Michael Ellerman <mpe@ellerman.id.au> Acked-by: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Yury Norov <yury.norov@gmail.com>
2022-07-23 21:45:36 +00:00
}
Bitmap patches for v6.0-rc1 This branch consists of: Qu Wenruo: lib: bitmap: fix the duplicated comments on bitmap_to_arr64() https://lore.kernel.org/lkml/0d85e1dbad52ad7fb5787c4432bdb36cbd24f632.1656063005.git.wqu@suse.com/ Alexander Lobakin: bitops: let optimize out non-atomic bitops on compile-time constants https://lore.kernel.org/lkml/20220624121313.2382500-1-alexandr.lobakin@intel.com/T/ Yury Norov: lib: cleanup bitmap-related headers https://lore.kernel.org/linux-arm-kernel/YtCVeOGLiQ4gNPSf@yury-laptop/T/#m305522194c4d38edfdaffa71fcaaf2e2ca00a961 Alexander Lobakin: x86/olpc: fix 'logical not is only applied to the left hand side' https://www.spinics.net/lists/kernel/msg4440064.html Yury Norov: lib/nodemask: inline wrappers around bitmap https://lore.kernel.org/all/20220723214537.2054208-1-yury.norov@gmail.com/ -----BEGIN PGP SIGNATURE----- iQGzBAABCgAdFiEEi8GdvG6xMhdgpu/4sUSA/TofvsgFAmLpVvwACgkQsUSA/Tof vsiAHgwAwS9pl8GJ+fKYnue2CYo9349d2oT6BBUs/Rv8uqYEa4QkpYsR7NS733TG pos0hhoRvSOzrUP4qppXUjfJ+NkzLgpnKFOeWfFoNAKlHuaaMRvF3Y0Q/P8g0/Kg HPWcCQLHyCH9Wjs3e2TTgRjxTrHuruD2VJ401/PX/lw0DicUhmev5mUFa10uwFkP ZJRprjoFn9HJ0Hk16pFZDi36d3YumhACOcWRiJdoBDrEPV3S6lm9EeOy/yHBNp5k 9bKj+RboeT2t70KaZcKv+M5j1nu0cAhl7kRkjcxcmGyimI0l82Vgq9yFxhGqvWg8 RnCrJ5EaO08FGCAKG9GEwzdiNa24Gdq5XZSpQA7JZHmhmchpnnlNenJicyv0gOQi abChZeWSEsyA+78l2+kk9nezfVKUOnKDEZQxBVTOyWsmZYxHZV94oam340VjQDaY 4/fETdOy/qqPIxnpxAeFGWxZjcVaYiYPLj7KLPMsB0aAAF7pZrem465vSfgbrE81 +gCdqrWd =4dTW -----END PGP SIGNATURE----- Merge tag 'bitmap-6.0-rc1' of https://github.com/norov/linux Pull bitmap updates from Yury Norov: - fix the duplicated comments on bitmap_to_arr64() (Qu Wenruo) - optimize out non-atomic bitops on compile-time constants (Alexander Lobakin) - cleanup bitmap-related headers (Yury Norov) - x86/olpc: fix 'logical not is only applied to the left hand side' (Alexander Lobakin) - lib/nodemask: inline wrappers around bitmap (Yury Norov) * tag 'bitmap-6.0-rc1' of https://github.com/norov/linux: (26 commits) lib/nodemask: inline next_node_in() and node_random() powerpc: drop dependency on <asm/machdep.h> in archrandom.h x86/olpc: fix 'logical not is only applied to the left hand side' lib/cpumask: move some one-line wrappers to header file headers/deps: mm: align MANITAINERS and Docs with new gfp.h structure headers/deps: mm: Split <linux/gfp_types.h> out of <linux/gfp.h> headers/deps: mm: Optimize <linux/gfp.h> header dependencies lib/cpumask: move trivial wrappers around find_bit to the header lib/cpumask: change return types to unsigned where appropriate cpumask: change return types to bool where appropriate lib/bitmap: change type of bitmap_weight to unsigned long lib/bitmap: change return types to bool where appropriate arm: align find_bit declarations with generic kernel iommu/vt-d: avoid invalid memory access via node_online(NUMA_NO_NODE) lib/test_bitmap: test the tail after bitmap_to_arr64() lib/bitmap: fix off-by-one in bitmap_to_arr64() lib: test_bitmap: add compile-time optimization/evaluations assertions bitmap: don't assume compiler evaluates small mem*() builtins calls net/ice: fix initializing the bitmap in the switch code bitops: let optimize out non-atomic bitops on compile-time constants ...
2022-08-08 00:52:35 +00:00
EXPORT_SYMBOL(arch_get_random_seed_longs);
powerpc: drop dependency on <asm/machdep.h> in archrandom.h archrandom.h includes <asm/machdep.h> to refer ppc_md. This causes circular header dependency, if generic nodemask.h includes random.h: In file included from include/linux/cred.h:16, from include/linux/seq_file.h:13, from arch/powerpc/include/asm/machdep.h:6, from arch/powerpc/include/asm/archrandom.h:5, from include/linux/random.h:109, from include/linux/nodemask.h:97, from include/linux/list_lru.h:12, from include/linux/fs.h:13, from include/linux/compat.h:17, from arch/powerpc/kernel/asm-offsets.c:12: include/linux/sched.h:1203:9: error: unknown type name 'nodemask_t' 1203 | nodemask_t mems_allowed; | ^~~~~~~~~~ Fix it by removing <asm/machdep.h> dependency from archrandom.h Now as arch_get_random_seed_long() moved to c-file, and not exported, it's not available for modules. As Michael Ellerman says: I think we actually don't need it exported to modules, I think it's a private detail of the RNG <-> architecture interface, not something that modules should be calling. CC: Andy Shevchenko <andriy.shevchenko@linux.intel.com> CC: Benjamin Herrenschmidt <benh@kernel.crashing.org> CC: Michael Ellerman <mpe@ellerman.id.au> CC: Paul Mackerras <paulus@samba.org> CC: Rasmus Villemoes <linux@rasmusvillemoes.dk> CC: Stephen Rothwell <sfr@canb.auug.org.au> CC: linuxppc-dev@lists.ozlabs.org Suggested-by: Michael Ellerman <mpe@ellerman.id.au> (for non-exporting) Acked-by: Michael Ellerman <mpe@ellerman.id.au> Acked-by: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Yury Norov <yury.norov@gmail.com>
2022-07-23 21:45:36 +00:00
void machine_halt(void)
{
machine_shutdown();
if (ppc_md.halt)
ppc_md.halt();
smp_send_stop();
machine_hang();
}
#ifdef CONFIG_SMP
DEFINE_PER_CPU(unsigned int, cpu_pvr);
#endif
static void show_cpuinfo_summary(struct seq_file *m)
{
struct device_node *root;
const char *model = NULL;
unsigned long bogosum = 0;
int i;
if (IS_ENABLED(CONFIG_SMP) && IS_ENABLED(CONFIG_PPC32)) {
for_each_online_cpu(i)
bogosum += loops_per_jiffy;
seq_printf(m, "total bogomips\t: %lu.%02lu\n",
bogosum / (500000 / HZ), bogosum / (5000 / HZ) % 100);
}
seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq);
if (ppc_md.name)
seq_printf(m, "platform\t: %s\n", ppc_md.name);
root = of_find_node_by_path("/");
if (root)
model = of_get_property(root, "model", NULL);
if (model)
seq_printf(m, "model\t\t: %s\n", model);
of_node_put(root);
if (ppc_md.show_cpuinfo != NULL)
ppc_md.show_cpuinfo(m);
/* Display the amount of memory */
if (IS_ENABLED(CONFIG_PPC32))
seq_printf(m, "Memory\t\t: %d MB\n",
(unsigned int)(total_memory / (1024 * 1024)));
}
static int show_cpuinfo(struct seq_file *m, void *v)
{
unsigned long cpu_id = (unsigned long)v - 1;
unsigned int pvr;
unsigned long proc_freq;
unsigned short maj;
unsigned short min;
#ifdef CONFIG_SMP
pvr = per_cpu(cpu_pvr, cpu_id);
#else
pvr = mfspr(SPRN_PVR);
#endif
maj = (pvr >> 8) & 0xFF;
min = pvr & 0xFF;
seq_printf(m, "processor\t: %lu\ncpu\t\t: ", cpu_id);
if (cur_cpu_spec->pvr_mask && cur_cpu_spec->cpu_name)
seq_puts(m, cur_cpu_spec->cpu_name);
else
seq_printf(m, "unknown (%08x)", pvr);
if (cpu_has_feature(CPU_FTR_ALTIVEC))
seq_puts(m, ", altivec supported");
seq_putc(m, '\n');
#ifdef CONFIG_TAU
if (cpu_has_feature(CPU_FTR_TAU)) {
if (IS_ENABLED(CONFIG_TAU_AVERAGE)) {
/* more straightforward, but potentially misleading */
seq_printf(m, "temperature \t: %u C (uncalibrated)\n",
cpu_temp(cpu_id));
} else {
/* show the actual temp sensor range */
u32 temp;
temp = cpu_temp_both(cpu_id);
seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n",
temp & 0xff, temp >> 16);
}
}
#endif /* CONFIG_TAU */
/*
* Platforms that have variable clock rates, should implement
* the method ppc_md.get_proc_freq() that reports the clock
* rate of a given cpu. The rest can use ppc_proc_freq to
* report the clock rate that is same across all cpus.
*/
if (ppc_md.get_proc_freq)
proc_freq = ppc_md.get_proc_freq(cpu_id);
else
proc_freq = ppc_proc_freq;
if (proc_freq)
seq_printf(m, "clock\t\t: %lu.%06luMHz\n",
proc_freq / 1000000, proc_freq % 1000000);
/* If we are a Freescale core do a simple check so
* we don't have to keep adding cases in the future */
if (PVR_VER(pvr) & 0x8000) {
switch (PVR_VER(pvr)) {
case 0x8000: /* 7441/7450/7451, Voyager */
case 0x8001: /* 7445/7455, Apollo 6 */
case 0x8002: /* 7447/7457, Apollo 7 */
case 0x8003: /* 7447A, Apollo 7 PM */
case 0x8004: /* 7448, Apollo 8 */
case 0x800c: /* 7410, Nitro */
maj = ((pvr >> 8) & 0xF);
min = PVR_MIN(pvr);
break;
default: /* e500/book-e */
maj = PVR_MAJ(pvr);
min = PVR_MIN(pvr);
break;
}
} else {
switch (PVR_VER(pvr)) {
case 0x1008: /* 740P/750P ?? */
maj = ((pvr >> 8) & 0xFF) - 1;
min = pvr & 0xFF;
break;
case 0x004e: /* POWER9 bits 12-15 give chip type */
case 0x0080: /* POWER10 bit 12 gives SMT8/4 */
maj = (pvr >> 8) & 0x0F;
min = pvr & 0xFF;
break;
default:
maj = (pvr >> 8) & 0xFF;
min = pvr & 0xFF;
break;
}
}
seq_printf(m, "revision\t: %hd.%hd (pvr %04x %04x)\n",
maj, min, PVR_VER(pvr), PVR_REV(pvr));
if (IS_ENABLED(CONFIG_PPC32))
seq_printf(m, "bogomips\t: %lu.%02lu\n", loops_per_jiffy / (500000 / HZ),
(loops_per_jiffy / (5000 / HZ)) % 100);
seq_putc(m, '\n');
/* If this is the last cpu, print the summary */
if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids)
show_cpuinfo_summary(m);
return 0;
}
static void *c_start(struct seq_file *m, loff_t *pos)
{
if (*pos == 0) /* just in case, cpu 0 is not the first */
*pos = cpumask_first(cpu_online_mask);
else
*pos = cpumask_next(*pos - 1, cpu_online_mask);
if ((*pos) < nr_cpu_ids)
return (void *)(unsigned long)(*pos + 1);
return NULL;
}
static void *c_next(struct seq_file *m, void *v, loff_t *pos)
{
(*pos)++;
return c_start(m, pos);
}
static void c_stop(struct seq_file *m, void *v)
{
}
const struct seq_operations cpuinfo_op = {
.start = c_start,
.next = c_next,
.stop = c_stop,
.show = show_cpuinfo,
};
void __init check_for_initrd(void)
{
#ifdef CONFIG_BLK_DEV_INITRD
DBG(" -> check_for_initrd() initrd_start=0x%lx initrd_end=0x%lx\n",
initrd_start, initrd_end);
/* If we were passed an initrd, set the ROOT_DEV properly if the values
* look sensible. If not, clear initrd reference.
*/
if (is_kernel_addr(initrd_start) && is_kernel_addr(initrd_end) &&
initrd_end > initrd_start)
ROOT_DEV = Root_RAM0;
else
initrd_start = initrd_end = 0;
if (initrd_start)
pr_info("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end);
DBG(" <- check_for_initrd()\n");
#endif /* CONFIG_BLK_DEV_INITRD */
}
#ifdef CONFIG_SMP
powerpc/64s: Reimplement book3s idle code in C Reimplement Book3S idle code in C, moving POWER7/8/9 implementation speific HV idle code to the powernv platform code. Book3S assembly stubs are kept in common code and used only to save the stack frame and non-volatile GPRs before executing architected idle instructions, and restoring the stack and reloading GPRs then returning to C after waking from idle. The complex logic dealing with threads and subcores, locking, SPRs, HMIs, timebase resync, etc., is all done in C which makes it more maintainable. This is not a strict translation to C code, there are some significant differences: - Idle wakeup no longer uses the ->cpu_restore call to reinit SPRs, but saves and restores them itself. - The optimisation where EC=ESL=0 idle modes did not have to save GPRs or change MSR is restored, because it's now simple to do. ESL=1 sleeps that do not lose GPRs can use this optimization too. - KVM secondary entry and cede is now more of a call/return style rather than branchy. nap_state_lost is not required because KVM always returns via NVGPR restoring path. - KVM secondary wakeup from offline sequence is moved entirely into the offline wakeup, which avoids a hwsync in the normal idle wakeup path. Performance measured with context switch ping-pong on different threads or cores, is possibly improved a small amount, 1-3% depending on stop state and core vs thread test for shallow states. Deep states it's in the noise compared with other latencies. KVM improvements: - Idle sleepers now always return to caller rather than branch out to KVM first. - This allows optimisations like very fast return to caller when no state has been lost. - KVM no longer requires nap_state_lost because it controls NVGPR save/restore itself on the way in and out. - The heavy idle wakeup KVM request check can be moved out of the normal host idle code and into the not-performance-critical offline code. - KVM nap code now returns from where it is called, which makes the flow a bit easier to follow. Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com> Signed-off-by: Nicholas Piggin <npiggin@gmail.com> [mpe: Squash the KVM changes in] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2019-04-12 14:30:52 +00:00
int threads_per_core, threads_per_subcore, threads_shift __read_mostly;
cpumask_t threads_core_mask __read_mostly;
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 00:21:34 +00:00
EXPORT_SYMBOL_GPL(threads_per_core);
EXPORT_SYMBOL_GPL(threads_per_subcore);
KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 00:21:34 +00:00
EXPORT_SYMBOL_GPL(threads_shift);
EXPORT_SYMBOL_GPL(threads_core_mask);
static void __init cpu_init_thread_core_maps(int tpc)
{
int i;
threads_per_core = tpc;
threads_per_subcore = tpc;
cpumask_clear(&threads_core_mask);
/* This implementation only supports power of 2 number of threads
* for simplicity and performance
*/
threads_shift = ilog2(tpc);
BUG_ON(tpc != (1 << threads_shift));
for (i = 0; i < tpc; i++)
cpumask_set_cpu(i, &threads_core_mask);
printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n",
tpc, tpc > 1 ? "s" : "");
printk(KERN_DEBUG " (thread shift is %d)\n", threads_shift);
}
u32 *cpu_to_phys_id = NULL;
static int assign_threads(unsigned int cpu, unsigned int nthreads, bool present,
const __be32 *hw_ids)
{
for (int i = 0; i < nthreads && cpu < nr_cpu_ids; i++) {
__be32 hwid;
hwid = be32_to_cpu(hw_ids[i]);
DBG(" thread %d -> cpu %d (hard id %d)\n", i, cpu, hwid);
set_cpu_present(cpu, present);
set_cpu_possible(cpu, true);
cpu_to_phys_id[cpu] = hwid;
cpu++;
}
return cpu;
}
/**
* setup_cpu_maps - initialize the following cpu maps:
* cpu_possible_mask
* cpu_present_mask
*
* Having the possible map set up early allows us to restrict allocations
* of things like irqstacks to nr_cpu_ids rather than NR_CPUS.
*
* We do not initialize the online map here; cpus set their own bits in
* cpu_online_mask as they come up.
*
* This function is valid only for Open Firmware systems. finish_device_tree
* must be called before using this.
*
* While we're here, we may as well set the "physical" cpu ids in the paca.
*
* NOTE: This must match the parsing done in early_init_dt_scan_cpus.
*/
void __init smp_setup_cpu_maps(void)
{
struct device_node *dn;
int cpu = 0;
int nthreads = 1;
DBG("smp_setup_cpu_maps()\n");
arch: simplify several early memory allocations There are several early memory allocations in arch/ code that use memblock_phys_alloc() to allocate memory, convert the returned physical address to the virtual address and then set the allocated memory to zero. Exactly the same behaviour can be achieved simply by calling memblock_alloc(): it allocates the memory in the same way as memblock_phys_alloc(), then it performs the phys_to_virt() conversion and clears the allocated memory. Replace the longer sequence with a simpler call to memblock_alloc(). Link: http://lkml.kernel.org/r/1546248566-14910-6-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Christoph Hellwig <hch@infradead.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Greentime Hu <green.hu@gmail.com> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Jonas Bonn <jonas@southpole.se> Cc: Mark Salter <msalter@redhat.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Hocko <mhocko@suse.com> Cc: Michal Simek <michal.simek@xilinx.com> Cc: Michal Simek <monstr@monstr.eu> Cc: Paul Mackerras <paulus@samba.org> Cc: Rich Felker <dalias@libc.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Stafford Horne <shorne@gmail.com> Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi> Cc: Vincent Chen <deanbo422@gmail.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-03-08 00:31:06 +00:00
cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32),
__alignof__(u32));
treewide: add checks for the return value of memblock_alloc*() Add check for the return value of memblock_alloc*() functions and call panic() in case of error. The panic message repeats the one used by panicing memblock allocators with adjustment of parameters to include only relevant ones. The replacement was mostly automated with semantic patches like the one below with manual massaging of format strings. @@ expression ptr, size, align; @@ ptr = memblock_alloc(size, align); + if (!ptr) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", __func__, size, align); [anders.roxell@linaro.org: use '%pa' with 'phys_addr_t' type] Link: http://lkml.kernel.org/r/20190131161046.21886-1-anders.roxell@linaro.org [rppt@linux.ibm.com: fix format strings for panics after memblock_alloc] Link: http://lkml.kernel.org/r/1548950940-15145-1-git-send-email-rppt@linux.ibm.com [rppt@linux.ibm.com: don't panic if the allocation in sparse_buffer_init fails] Link: http://lkml.kernel.org/r/20190131074018.GD28876@rapoport-lnx [akpm@linux-foundation.org: fix xtensa printk warning] Link: http://lkml.kernel.org/r/1548057848-15136-20-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> Signed-off-by: Anders Roxell <anders.roxell@linaro.org> Reviewed-by: Guo Ren <ren_guo@c-sky.com> [c-sky] Acked-by: Paul Burton <paul.burton@mips.com> [MIPS] Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> [s390] Reviewed-by: Juergen Gross <jgross@suse.com> [Xen] Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org> [m68k] Acked-by: Max Filippov <jcmvbkbc@gmail.com> [xtensa] Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christophe Leroy <christophe.leroy@c-s.fr> Cc: Christoph Hellwig <hch@lst.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Dennis Zhou <dennis@kernel.org> Cc: Greentime Hu <green.hu@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Guo Ren <guoren@kernel.org> Cc: Mark Salter <msalter@redhat.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Simek <monstr@monstr.eu> Cc: Petr Mladek <pmladek@suse.com> Cc: Richard Weinberger <richard@nod.at> Cc: Rich Felker <dalias@libc.org> Cc: Rob Herring <robh+dt@kernel.org> Cc: Rob Herring <robh@kernel.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Stafford Horne <shorne@gmail.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-03-12 06:30:31 +00:00
if (!cpu_to_phys_id)
panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
__func__, nr_cpu_ids * sizeof(u32), __alignof__(u32));
for_each_node_by_type(dn, "cpu") {
const __be32 *intserv;
__be32 cpu_be;
int len;
DBG(" * %pOF...\n", dn);
intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
&len);
if (intserv) {
DBG(" ibm,ppc-interrupt-server#s -> %lu threads\n",
(len / sizeof(int)));
} else {
DBG(" no ibm,ppc-interrupt-server#s -> 1 thread\n");
intserv = of_get_property(dn, "reg", &len);
if (!intserv) {
cpu_be = cpu_to_be32(cpu);
/* XXX: what is this? uninitialized?? */
intserv = &cpu_be; /* assume logical == phys */
len = 4;
}
}
nthreads = len / sizeof(int);
bool avail = of_device_is_available(dn);
if (!avail)
avail = !of_property_match_string(dn,
"enable-method", "spin-table");
if (boot_core_hwid >= 0) {
if (cpu == 0) {
pr_info("Skipping CPU node %pOF to allow for boot core.\n", dn);
cpu = nthreads;
continue;
}
if (be32_to_cpu(intserv[0]) == boot_core_hwid) {
pr_info("Renumbered boot core %pOF to logical 0\n", dn);
assign_threads(0, nthreads, avail, intserv);
of_node_put(dn);
break;
}
} else if (cpu >= nr_cpu_ids) {
of_node_put(dn);
break;
}
if (cpu < nr_cpu_ids)
cpu = assign_threads(cpu, nthreads, avail, intserv);
}
/* If no SMT supported, nthreads is forced to 1 */
if (!cpu_has_feature(CPU_FTR_SMT)) {
DBG(" SMT disabled ! nthreads forced to 1\n");
nthreads = 1;
}
#ifdef CONFIG_PPC64
/*
* On pSeries LPAR, we need to know how many cpus
* could possibly be added to this partition.
*/
if (firmware_has_feature(FW_FEATURE_LPAR) &&
(dn = of_find_node_by_path("/rtas"))) {
int num_addr_cell, num_size_cell, maxcpus;
const __be32 *ireg;
num_addr_cell = of_n_addr_cells(dn);
num_size_cell = of_n_size_cells(dn);
ireg = of_get_property(dn, "ibm,lrdr-capacity", NULL);
if (!ireg)
goto out;
maxcpus = be32_to_cpup(ireg + num_addr_cell + num_size_cell);
/* Double maxcpus for processors which have SMT capability */
if (cpu_has_feature(CPU_FTR_SMT))
maxcpus *= nthreads;
if (maxcpus > nr_cpu_ids) {
printk(KERN_WARNING
"Partition configured for %d cpus, "
"operating system maximum is %u.\n",
maxcpus, nr_cpu_ids);
maxcpus = nr_cpu_ids;
} else
printk(KERN_INFO "Partition configured for %d cpus.\n",
maxcpus);
for (cpu = 0; cpu < maxcpus; cpu++)
set_cpu_possible(cpu, true);
out:
of_node_put(dn);
}
vdso_data->processorCount = num_present_cpus();
#endif /* CONFIG_PPC64 */
/* Initialize CPU <=> thread mapping/
*
* WARNING: We assume that the number of threads is the same for
* every CPU in the system. If that is not the case, then some code
* here will have to be reworked
*/
cpu_init_thread_core_maps(nthreads);
/* Now that possible cpus are set, set nr_cpu_ids for later use */
setup_nr_cpu_ids();
free_unused_pacas();
}
#endif /* CONFIG_SMP */
#ifdef CONFIG_PCSPKR_PLATFORM
static __init int add_pcspkr(void)
{
struct device_node *np;
struct platform_device *pd;
int ret;
np = of_find_compatible_node(NULL, NULL, "pnpPNP,100");
of_node_put(np);
if (!np)
return -ENODEV;
pd = platform_device_alloc("pcspkr", -1);
if (!pd)
return -ENOMEM;
ret = platform_device_add(pd);
if (ret)
platform_device_put(pd);
return ret;
}
device_initcall(add_pcspkr);
#endif /* CONFIG_PCSPKR_PLATFORM */
static char ppc_hw_desc_buf[128] __initdata;
struct seq_buf ppc_hw_desc __initdata = {
.buffer = ppc_hw_desc_buf,
.size = sizeof(ppc_hw_desc_buf),
.len = 0,
};
static __init void probe_machine(void)
{
extern struct machdep_calls __machine_desc_start;
extern struct machdep_calls __machine_desc_end;
unsigned int i;
/*
* Iterate all ppc_md structures until we find the proper
* one for the current machine type
*/
DBG("Probing machine type ...\n");
/*
* Check ppc_md is empty, if not we have a bug, ie, we setup an
* entry before probe_machine() which will be overwritten
*/
for (i = 0; i < (sizeof(ppc_md) / sizeof(void *)); i++) {
if (((void **)&ppc_md)[i]) {
printk(KERN_ERR "Entry %d in ppc_md non empty before"
" machine probe !\n", i);
}
}
for (machine_id = &__machine_desc_start;
machine_id < &__machine_desc_end;
machine_id++) {
DBG(" %s ...\n", machine_id->name);
if (machine_id->compatible && !of_machine_is_compatible(machine_id->compatible))
continue;
if (machine_id->compatibles && !of_machine_compatible_match(machine_id->compatibles))
continue;
memcpy(&ppc_md, machine_id, sizeof(struct machdep_calls));
if (ppc_md.probe && !ppc_md.probe())
continue;
DBG(" %s match !\n", machine_id->name);
break;
}
/* What can we do if we didn't find ? */
if (machine_id >= &__machine_desc_end) {
pr_err("No suitable machine description found !\n");
for (;;);
}
// Append the machine name to other info we've gathered
seq_buf_puts(&ppc_hw_desc, ppc_md.name);
// Set the generic hardware description shown in oopses
dump_stack_set_arch_desc(ppc_hw_desc.buffer);
pr_info("Hardware name: %s\n", ppc_hw_desc.buffer);
}
/* Match a class of boards, not a specific device configuration. */
int check_legacy_ioport(unsigned long base_port)
{
struct device_node *parent, *np = NULL;
int ret = -ENODEV;
switch(base_port) {
case I8042_DATA_REG:
if (!(np = of_find_compatible_node(NULL, NULL, "pnpPNP,303")))
np = of_find_compatible_node(NULL, NULL, "pnpPNP,f03");
if (np) {
parent = of_get_parent(np);
of_i8042_kbd_irq = irq_of_parse_and_map(parent, 0);
if (!of_i8042_kbd_irq)
of_i8042_kbd_irq = 1;
of_i8042_aux_irq = irq_of_parse_and_map(parent, 1);
if (!of_i8042_aux_irq)
of_i8042_aux_irq = 12;
of_node_put(np);
np = parent;
break;
}
np = of_find_node_by_type(NULL, "8042");
/* Pegasos has no device_type on its 8042 node, look for the
* name instead */
if (!np)
np = of_find_node_by_name(NULL, "8042");
if (np) {
of_i8042_kbd_irq = 1;
of_i8042_aux_irq = 12;
}
break;
case FDC_BASE: /* FDC1 */
np = of_find_node_by_type(NULL, "fdc");
break;
default:
/* ipmi is supposed to fail here */
break;
}
if (!np)
return ret;
parent = of_get_parent(np);
if (parent) {
if (of_node_is_type(parent, "isa"))
ret = 0;
of_node_put(parent);
}
of_node_put(np);
return ret;
}
EXPORT_SYMBOL(check_legacy_ioport);
powerpc/setup: Refactor/untangle panic notifiers The panic notifiers infrastructure is a bit limited in the scope of the callbacks - basically every kind of functionality is dropped in a list that runs in the same point during the kernel panic path. This is not really on par with the complexities and particularities of architecture / hypervisors' needs, and a refactor is ongoing. As part of this refactor, it was observed that powerpc has 2 notifiers, with mixed goals: one is just a KASLR offset dumper, whereas the other aims to hard-disable IRQs (necessary on panic path), warn firmware of the panic event (fadump) and run low-level platform-specific machinery that might stop kernel execution and never come back. Clearly, the 2nd notifier has opposed goals: disable IRQs / fadump should run earlier while low-level platform actions should run late since it might not even return. Hence, this patch decouples the notifiers splitting them in three: - First one is responsible for hard-disable IRQs and fadump, should run early; - The kernel KASLR offset dumper is really an informative notifier, harmless and may run at any moment in the panic path; - The last notifier should run last, since it aims to perform low-level actions for specific platforms, and might never return. It is also only registered for 2 platforms, pseries and ps3. The patch better documents the notifiers and clears the code too, also removing a useless header. Currently no functionality change should be observed, but after the planned panic refactor we should expect more panic reliability with this patch. Signed-off-by: Guilherme G. Piccoli <gpiccoli@igalia.com> Reviewed-by: Hari Bathini <hbathini@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220427224924.592546-9-gpiccoli@igalia.com
2022-04-27 22:49:02 +00:00
/*
* Panic notifiers setup
*
* We have 3 notifiers for powerpc, each one from a different "nature":
*
* - ppc_panic_fadump_handler() is a hypervisor notifier, which hard-disables
* IRQs and deal with the Firmware-Assisted dump, when it is configured;
* should run early in the panic path.
*
* - dump_kernel_offset() is an informative notifier, just showing the KASLR
* offset if we have RANDOMIZE_BASE set.
*
* - ppc_panic_platform_handler() is a low-level handler that's registered
* only if the platform wishes to perform final actions in the panic path,
* hence it should run late and might not even return. Currently, only
* pseries and ps3 platforms register callbacks.
*/
static int ppc_panic_fadump_handler(struct notifier_block *this,
unsigned long event, void *ptr)
{
/*
* panic does a local_irq_disable, but we really
* want interrupts to be hard disabled.
*/
hard_irq_disable();
/*
* If firmware-assisted dump has been registered then trigger
powerpc/setup: Refactor/untangle panic notifiers The panic notifiers infrastructure is a bit limited in the scope of the callbacks - basically every kind of functionality is dropped in a list that runs in the same point during the kernel panic path. This is not really on par with the complexities and particularities of architecture / hypervisors' needs, and a refactor is ongoing. As part of this refactor, it was observed that powerpc has 2 notifiers, with mixed goals: one is just a KASLR offset dumper, whereas the other aims to hard-disable IRQs (necessary on panic path), warn firmware of the panic event (fadump) and run low-level platform-specific machinery that might stop kernel execution and never come back. Clearly, the 2nd notifier has opposed goals: disable IRQs / fadump should run earlier while low-level platform actions should run late since it might not even return. Hence, this patch decouples the notifiers splitting them in three: - First one is responsible for hard-disable IRQs and fadump, should run early; - The kernel KASLR offset dumper is really an informative notifier, harmless and may run at any moment in the panic path; - The last notifier should run last, since it aims to perform low-level actions for specific platforms, and might never return. It is also only registered for 2 platforms, pseries and ps3. The patch better documents the notifiers and clears the code too, also removing a useless header. Currently no functionality change should be observed, but after the planned panic refactor we should expect more panic reliability with this patch. Signed-off-by: Guilherme G. Piccoli <gpiccoli@igalia.com> Reviewed-by: Hari Bathini <hbathini@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220427224924.592546-9-gpiccoli@igalia.com
2022-04-27 22:49:02 +00:00
* its callback and let the firmware handles everything else.
*/
crash_fadump(NULL, ptr);
powerpc/setup: Refactor/untangle panic notifiers The panic notifiers infrastructure is a bit limited in the scope of the callbacks - basically every kind of functionality is dropped in a list that runs in the same point during the kernel panic path. This is not really on par with the complexities and particularities of architecture / hypervisors' needs, and a refactor is ongoing. As part of this refactor, it was observed that powerpc has 2 notifiers, with mixed goals: one is just a KASLR offset dumper, whereas the other aims to hard-disable IRQs (necessary on panic path), warn firmware of the panic event (fadump) and run low-level platform-specific machinery that might stop kernel execution and never come back. Clearly, the 2nd notifier has opposed goals: disable IRQs / fadump should run earlier while low-level platform actions should run late since it might not even return. Hence, this patch decouples the notifiers splitting them in three: - First one is responsible for hard-disable IRQs and fadump, should run early; - The kernel KASLR offset dumper is really an informative notifier, harmless and may run at any moment in the panic path; - The last notifier should run last, since it aims to perform low-level actions for specific platforms, and might never return. It is also only registered for 2 platforms, pseries and ps3. The patch better documents the notifiers and clears the code too, also removing a useless header. Currently no functionality change should be observed, but after the planned panic refactor we should expect more panic reliability with this patch. Signed-off-by: Guilherme G. Piccoli <gpiccoli@igalia.com> Reviewed-by: Hari Bathini <hbathini@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220427224924.592546-9-gpiccoli@igalia.com
2022-04-27 22:49:02 +00:00
return NOTIFY_DONE;
}
static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
void *p)
{
pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n",
kaslr_offset(), KERNELBASE);
powerpc/setup: Refactor/untangle panic notifiers The panic notifiers infrastructure is a bit limited in the scope of the callbacks - basically every kind of functionality is dropped in a list that runs in the same point during the kernel panic path. This is not really on par with the complexities and particularities of architecture / hypervisors' needs, and a refactor is ongoing. As part of this refactor, it was observed that powerpc has 2 notifiers, with mixed goals: one is just a KASLR offset dumper, whereas the other aims to hard-disable IRQs (necessary on panic path), warn firmware of the panic event (fadump) and run low-level platform-specific machinery that might stop kernel execution and never come back. Clearly, the 2nd notifier has opposed goals: disable IRQs / fadump should run earlier while low-level platform actions should run late since it might not even return. Hence, this patch decouples the notifiers splitting them in three: - First one is responsible for hard-disable IRQs and fadump, should run early; - The kernel KASLR offset dumper is really an informative notifier, harmless and may run at any moment in the panic path; - The last notifier should run last, since it aims to perform low-level actions for specific platforms, and might never return. It is also only registered for 2 platforms, pseries and ps3. The patch better documents the notifiers and clears the code too, also removing a useless header. Currently no functionality change should be observed, but after the planned panic refactor we should expect more panic reliability with this patch. Signed-off-by: Guilherme G. Piccoli <gpiccoli@igalia.com> Reviewed-by: Hari Bathini <hbathini@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220427224924.592546-9-gpiccoli@igalia.com
2022-04-27 22:49:02 +00:00
return NOTIFY_DONE;
}
powerpc/setup: Refactor/untangle panic notifiers The panic notifiers infrastructure is a bit limited in the scope of the callbacks - basically every kind of functionality is dropped in a list that runs in the same point during the kernel panic path. This is not really on par with the complexities and particularities of architecture / hypervisors' needs, and a refactor is ongoing. As part of this refactor, it was observed that powerpc has 2 notifiers, with mixed goals: one is just a KASLR offset dumper, whereas the other aims to hard-disable IRQs (necessary on panic path), warn firmware of the panic event (fadump) and run low-level platform-specific machinery that might stop kernel execution and never come back. Clearly, the 2nd notifier has opposed goals: disable IRQs / fadump should run earlier while low-level platform actions should run late since it might not even return. Hence, this patch decouples the notifiers splitting them in three: - First one is responsible for hard-disable IRQs and fadump, should run early; - The kernel KASLR offset dumper is really an informative notifier, harmless and may run at any moment in the panic path; - The last notifier should run last, since it aims to perform low-level actions for specific platforms, and might never return. It is also only registered for 2 platforms, pseries and ps3. The patch better documents the notifiers and clears the code too, also removing a useless header. Currently no functionality change should be observed, but after the planned panic refactor we should expect more panic reliability with this patch. Signed-off-by: Guilherme G. Piccoli <gpiccoli@igalia.com> Reviewed-by: Hari Bathini <hbathini@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220427224924.592546-9-gpiccoli@igalia.com
2022-04-27 22:49:02 +00:00
static int ppc_panic_platform_handler(struct notifier_block *this,
unsigned long event, void *ptr)
{
/*
* This handler is only registered if we have a panic callback
* on ppc_md, hence NULL check is not needed.
* Also, it may not return, so it runs really late on panic path.
*/
ppc_md.panic(ptr);
return NOTIFY_DONE;
}
static struct notifier_block ppc_fadump_block = {
.notifier_call = ppc_panic_fadump_handler,
.priority = INT_MAX, /* run early, to notify the firmware ASAP */
};
static struct notifier_block kernel_offset_notifier = {
powerpc/setup: Refactor/untangle panic notifiers The panic notifiers infrastructure is a bit limited in the scope of the callbacks - basically every kind of functionality is dropped in a list that runs in the same point during the kernel panic path. This is not really on par with the complexities and particularities of architecture / hypervisors' needs, and a refactor is ongoing. As part of this refactor, it was observed that powerpc has 2 notifiers, with mixed goals: one is just a KASLR offset dumper, whereas the other aims to hard-disable IRQs (necessary on panic path), warn firmware of the panic event (fadump) and run low-level platform-specific machinery that might stop kernel execution and never come back. Clearly, the 2nd notifier has opposed goals: disable IRQs / fadump should run earlier while low-level platform actions should run late since it might not even return. Hence, this patch decouples the notifiers splitting them in three: - First one is responsible for hard-disable IRQs and fadump, should run early; - The kernel KASLR offset dumper is really an informative notifier, harmless and may run at any moment in the panic path; - The last notifier should run last, since it aims to perform low-level actions for specific platforms, and might never return. It is also only registered for 2 platforms, pseries and ps3. The patch better documents the notifiers and clears the code too, also removing a useless header. Currently no functionality change should be observed, but after the planned panic refactor we should expect more panic reliability with this patch. Signed-off-by: Guilherme G. Piccoli <gpiccoli@igalia.com> Reviewed-by: Hari Bathini <hbathini@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220427224924.592546-9-gpiccoli@igalia.com
2022-04-27 22:49:02 +00:00
.notifier_call = dump_kernel_offset,
};
static struct notifier_block ppc_panic_block = {
.notifier_call = ppc_panic_platform_handler,
.priority = INT_MIN, /* may not return; must be done last */
};
void __init setup_panic(void)
{
powerpc/setup: Refactor/untangle panic notifiers The panic notifiers infrastructure is a bit limited in the scope of the callbacks - basically every kind of functionality is dropped in a list that runs in the same point during the kernel panic path. This is not really on par with the complexities and particularities of architecture / hypervisors' needs, and a refactor is ongoing. As part of this refactor, it was observed that powerpc has 2 notifiers, with mixed goals: one is just a KASLR offset dumper, whereas the other aims to hard-disable IRQs (necessary on panic path), warn firmware of the panic event (fadump) and run low-level platform-specific machinery that might stop kernel execution and never come back. Clearly, the 2nd notifier has opposed goals: disable IRQs / fadump should run earlier while low-level platform actions should run late since it might not even return. Hence, this patch decouples the notifiers splitting them in three: - First one is responsible for hard-disable IRQs and fadump, should run early; - The kernel KASLR offset dumper is really an informative notifier, harmless and may run at any moment in the panic path; - The last notifier should run last, since it aims to perform low-level actions for specific platforms, and might never return. It is also only registered for 2 platforms, pseries and ps3. The patch better documents the notifiers and clears the code too, also removing a useless header. Currently no functionality change should be observed, but after the planned panic refactor we should expect more panic reliability with this patch. Signed-off-by: Guilherme G. Piccoli <gpiccoli@igalia.com> Reviewed-by: Hari Bathini <hbathini@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220427224924.592546-9-gpiccoli@igalia.com
2022-04-27 22:49:02 +00:00
/* Hard-disables IRQs + deal with FW-assisted dump (fadump) */
atomic_notifier_chain_register(&panic_notifier_list,
&ppc_fadump_block);
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0)
atomic_notifier_chain_register(&panic_notifier_list,
&kernel_offset_notifier);
powerpc/setup: Refactor/untangle panic notifiers The panic notifiers infrastructure is a bit limited in the scope of the callbacks - basically every kind of functionality is dropped in a list that runs in the same point during the kernel panic path. This is not really on par with the complexities and particularities of architecture / hypervisors' needs, and a refactor is ongoing. As part of this refactor, it was observed that powerpc has 2 notifiers, with mixed goals: one is just a KASLR offset dumper, whereas the other aims to hard-disable IRQs (necessary on panic path), warn firmware of the panic event (fadump) and run low-level platform-specific machinery that might stop kernel execution and never come back. Clearly, the 2nd notifier has opposed goals: disable IRQs / fadump should run earlier while low-level platform actions should run late since it might not even return. Hence, this patch decouples the notifiers splitting them in three: - First one is responsible for hard-disable IRQs and fadump, should run early; - The kernel KASLR offset dumper is really an informative notifier, harmless and may run at any moment in the panic path; - The last notifier should run last, since it aims to perform low-level actions for specific platforms, and might never return. It is also only registered for 2 platforms, pseries and ps3. The patch better documents the notifiers and clears the code too, also removing a useless header. Currently no functionality change should be observed, but after the planned panic refactor we should expect more panic reliability with this patch. Signed-off-by: Guilherme G. Piccoli <gpiccoli@igalia.com> Reviewed-by: Hari Bathini <hbathini@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220427224924.592546-9-gpiccoli@igalia.com
2022-04-27 22:49:02 +00:00
/* Low-level platform-specific routines that should run on panic */
if (ppc_md.panic)
atomic_notifier_chain_register(&panic_notifier_list,
&ppc_panic_block);
}
#ifdef CONFIG_CHECK_CACHE_COHERENCY
/*
* For platforms that have configurable cache-coherency. This function
* checks that the cache coherency setting of the kernel matches the setting
* left by the firmware, as indicated in the device tree. Since a mismatch
* will eventually result in DMA failures, we print * and error and call
* BUG() in that case.
*/
#define KERNEL_COHERENCY (!IS_ENABLED(CONFIG_NOT_COHERENT_CACHE))
static int __init check_cache_coherency(void)
{
struct device_node *np;
const void *prop;
bool devtree_coherency;
np = of_find_node_by_path("/");
prop = of_get_property(np, "coherency-off", NULL);
of_node_put(np);
devtree_coherency = prop ? false : true;
if (devtree_coherency != KERNEL_COHERENCY) {
printk(KERN_ERR
"kernel coherency:%s != device tree_coherency:%s\n",
KERNEL_COHERENCY ? "on" : "off",
devtree_coherency ? "on" : "off");
BUG();
}
return 0;
}
late_initcall(check_cache_coherency);
#endif /* CONFIG_CHECK_CACHE_COHERENCY */
void ppc_printk_progress(char *s, unsigned short hex)
{
pr_info("%s\n", s);
}
static __init void print_system_info(void)
{
pr_info("-----------------------------------------------------\n");
pr_info("phys_mem_size = 0x%llx\n",
(unsigned long long)memblock_phys_mem_size());
pr_info("dcache_bsize = 0x%x\n", dcache_bsize);
pr_info("icache_bsize = 0x%x\n", icache_bsize);
pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features);
pr_info(" possible = 0x%016lx\n",
(unsigned long)CPU_FTRS_POSSIBLE);
pr_info(" always = 0x%016lx\n",
(unsigned long)CPU_FTRS_ALWAYS);
pr_info("cpu_user_features = 0x%08x 0x%08x\n",
cur_cpu_spec->cpu_user_features,
cur_cpu_spec->cpu_user_features2);
pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features);
#ifdef CONFIG_PPC64
pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
#ifdef CONFIG_PPC_BOOK3S
pr_info("vmalloc start = 0x%lx\n", KERN_VIRT_START);
pr_info("IO start = 0x%lx\n", KERN_IO_START);
pr_info("vmemmap start = 0x%lx\n", (unsigned long)vmemmap);
#endif
#endif
if (!early_radix_enabled())
print_system_hash_info();
if (PHYSICAL_START > 0)
pr_info("physical_start = 0x%llx\n",
(unsigned long long)PHYSICAL_START);
pr_info("-----------------------------------------------------\n");
}
#ifdef CONFIG_SMP
static void __init smp_setup_pacas(void)
{
int cpu;
for_each_possible_cpu(cpu) {
if (cpu == smp_processor_id())
continue;
allocate_paca(cpu);
set_hard_smp_processor_id(cpu, cpu_to_phys_id[cpu]);
}
memblock_free(cpu_to_phys_id, nr_cpu_ids * sizeof(u32));
cpu_to_phys_id = NULL;
}
#endif
/*
* Called into from start_kernel this initializes memblock, which is used
* to manage page allocation until mem_init is called.
*/
void __init setup_arch(char **cmdline_p)
{
kasan_init();
*cmdline_p = boot_command_line;
/* Set a half-reasonable default so udelay does something sensible */
loops_per_jiffy = 500000000 / HZ;
/* Unflatten the device-tree passed by prom_init or kexec */
unflatten_device_tree();
/*
* Initialize cache line/block info from device-tree (on ppc64) or
* just cputable (on ppc32).
*/
initialize_cache_info();
/* Initialize RTAS if available. */
rtas_initialize();
/* Check if we have an initrd provided via the device-tree. */
check_for_initrd();
/* Probe the machine type, establish ppc_md. */
probe_machine();
/* Setup panic notifier if requested by the platform. */
setup_panic();
/*
* Configure ppc_md.power_save (ppc32 only, 64-bit machines do
* it from their respective probe() function.
*/
setup_power_save();
/* Discover standard serial ports. */
find_legacy_serial_ports();
/* Register early console with the printk subsystem. */
register_early_udbg_console();
/* Setup the various CPU maps based on the device-tree. */
smp_setup_cpu_maps();
/* Initialize xmon. */
xmon_setup();
/* Check the SMT related command line arguments (ppc64). */
check_smt_enabled();
/* Parse memory topology */
mem_topology_setup();
powerpc/mm: Fix boot crash with FLATMEM Erhard reported that his G5 was crashing with v6.6-rc kernels: mpic: Setting up HT PICs workarounds for U3/U4 BUG: Unable to handle kernel data access at 0xfeffbb62ffec65fe Faulting instruction address: 0xc00000000005dc40 Oops: Kernel access of bad area, sig: 11 [#1] BE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2 PowerMac Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Tainted: G T 6.6.0-rc3-PMacGS #1 Hardware name: PowerMac11,2 PPC970MP 0x440101 PowerMac NIP: c00000000005dc40 LR: c000000000066660 CTR: c000000000007730 REGS: c0000000022bf510 TRAP: 0380 Tainted: G T (6.6.0-rc3-PMacGS) MSR: 9000000000001032 <SF,HV,ME,IR,DR,RI> CR: 44004242 XER: 00000000 IRQMASK: 3 GPR00: 0000000000000000 c0000000022bf7b0 c0000000010c0b00 00000000000001ac GPR04: 0000000003c80000 0000000000000300 c0000000f20001ae 0000000000000300 GPR08: 0000000000000006 feffbb62ffec65ff 0000000000000001 0000000000000000 GPR12: 9000000000001032 c000000002362000 c000000000f76b80 000000000349ecd8 GPR16: 0000000002367ba8 0000000002367f08 0000000000000006 0000000000000000 GPR20: 00000000000001ac c000000000f6f920 c0000000022cd985 000000000000000c GPR24: 0000000000000300 00000003b0a3691d c0003e008030000e 0000000000000000 GPR28: c00000000000000c c0000000f20001ee feffbb62ffec65fe 00000000000001ac NIP hash_page_do_lazy_icache+0x50/0x100 LR __hash_page_4K+0x420/0x590 Call Trace: hash_page_mm+0x364/0x6f0 do_hash_fault+0x114/0x2b0 data_access_common_virt+0x198/0x1f0 --- interrupt: 300 at mpic_init+0x4bc/0x10c4 NIP: c000000002020a5c LR: c000000002020a04 CTR: 0000000000000000 REGS: c0000000022bf9f0 TRAP: 0300 Tainted: G T (6.6.0-rc3-PMacGS) MSR: 9000000000001032 <SF,HV,ME,IR,DR,RI> CR: 24004248 XER: 00000000 DAR: c0003e008030000e DSISR: 40000000 IRQMASK: 1 ... NIP mpic_init+0x4bc/0x10c4 LR mpic_init+0x464/0x10c4 --- interrupt: 300 pmac_setup_one_mpic+0x258/0x2dc pmac_pic_init+0x28c/0x3d8 init_IRQ+0x90/0x140 start_kernel+0x57c/0x78c start_here_common+0x1c/0x20 A bisect pointed to the breakage beginning with commit 9fee28baa601 ("powerpc: implement the new page table range API"). Analysis of the oops pointed to a struct page with a corrupted compound_head being loaded via page_folio() -> _compound_head() in hash_page_do_lazy_icache(). The access by the mpic code is to an MMIO address, so the expectation is that the struct page for that address would be initialised by init_unavailable_range(), as pointed out by Aneesh. Instrumentation showed that was not the case, which eventually lead to the realisation that pfn_valid() was returning false for that address, causing the struct page to not be initialised. Because the system is using FLATMEM, the version of pfn_valid() in memory_model.h is used: static inline int pfn_valid(unsigned long pfn) { ... return pfn >= pfn_offset && (pfn - pfn_offset) < max_mapnr; } Which relies on max_mapnr being initialised. Early in boot max_mapnr is zero meaning no PFNs are valid. max_mapnr is initialised in mem_init() called via: start_kernel() mm_core_init() # init/main.c:928 mem_init() But that is too late for the usage in init_unavailable_range() called via: start_kernel() setup_arch() # init/main.c:893 paging_init() free_area_init() init_unavailable_range() Although max_mapnr is currently set in mem_init(), the value is actually already available much earlier, as soon as mem_topology_setup() has completed, which is also before paging_init() is called. So move the initialisation there, which causes paging_init() to correctly initialise the struct page and fixes the bug. This bug seems to have been lurking for years, but went unnoticed because the pre-folio code was inspecting the uninitialised page->flags but not dereferencing it. Thanks to Erhard and Aneesh for help debugging. Reported-by: Erhard Furtner <erhard_f@mailbox.org> Closes: https://lore.kernel.org/all/20230929132750.3cd98452@yea/ Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20231023112500.1550208-1-mpe@ellerman.id.au
2023-10-23 11:25:00 +00:00
/* Set max_mapnr before paging_init() */
set_max_mapnr(max_pfn);
/*
* Release secondary cpus out of their spinloops at 0x60 now that
* we can map physical -> logical CPU ids.
*
* Freescale Book3e parts spin in a loop provided by firmware,
* so smp_release_cpus() does nothing for them.
*/
#ifdef CONFIG_SMP
smp_setup_pacas();
/* On BookE, setup per-core TLB data structures. */
setup_tlb_core_data();
#endif
/* Print various info about the machine that has been gathered so far. */
print_system_info();
klp_init_thread_info(&init_task);
setup_initial_init_mm(_stext, _etext, _edata, _end);
/* sched_init() does the mmgrab(&init_mm) for the primary CPU */
VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm)));
cpumask_set_cpu(smp_processor_id(), mm_cpumask(&init_mm));
inc_mm_active_cpus(&init_mm);
mm_iommu_init(&init_mm);
irqstack_early_init();
exc_lvl_early_init();
emergency_stack_init();
mce_init();
powerpc/64: Set up a kernel stack for secondaries before cpu_restore() Currently in generic_secondary_smp_init(), cur_cpu_spec->cpu_restore() is called before a stack has been set up in r1. This was previously fine as the cpu_restore() functions were implemented in assembly and did not use a stack. However commit 5a61ef74f269 ("powerpc/64s: Support new device tree binding for discovering CPU features") used __restore_cpu_cpufeatures() as the cpu_restore() function for a device-tree features based cputable entry. This is a C function and hence uses a stack in r1. generic_secondary_smp_init() is entered on the secondary cpus via the primary cpu using the OPAL call opal_start_cpu(). In OPAL, each hardware thread has its own stack. The OPAL call is ran in the primary's hardware thread. During the call, a job is scheduled on a secondary cpu that will start executing at the address of generic_secondary_smp_init(). Hence the value that will be left in r1 when the secondary cpu enters the kernel is part of that secondary cpu's individual OPAL stack. This means that __restore_cpu_cpufeatures() will write to that OPAL stack. This is not horribly bad as each hardware thread has its own stack and the call that enters the kernel from OPAL never returns, but it is still wrong and should be corrected. Create the temp kernel stack before calling cpu_restore(). As noted by mpe, for a kexec boot, the secondary CPUs are released from the spin loop at address 0x60 by smp_release_cpus() and then jump to generic_secondary_smp_init(). The call to smp_release_cpus() is in setup_arch(), and it comes before the call to emergency_stack_init(). emergency_stack_init() allocates an emergency stack in the PACA for each CPU. This address in the PACA is what is used to set up the temp kernel stack in generic_secondary_smp_init(). Move releasing the secondary CPUs to after the PACAs have been allocated an emergency stack, otherwise the PACA stack pointer will contain garbage and hence the temp kernel stack created from it will be broken. Fixes: 5a61ef74f269 ("powerpc/64s: Support new device tree binding for discovering CPU features") Signed-off-by: Jordan Niethe <jniethe5@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20201014072837.24539-1-jniethe5@gmail.com
2020-10-14 07:28:36 +00:00
smp_release_cpus();
initmem_init();
/*
* Reserve large chunks of memory for use by CMA for KVM and hugetlb. These must
* be called after initmem_init(), so that pageblock_order is initialised.
*/
kvm_cma_reserve();
gigantic_hugetlb_cma_reserve();
early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
if (ppc_md.setup_arch)
ppc_md.setup_arch();
setup_barrier_nospec();
setup_spectre_v2();
paging_init();
/* Initialize the MMU context management stuff. */
mmu_context_init();
/* Interrupt code needs to be 64K-aligned. */
if (IS_ENABLED(CONFIG_PPC64) && (unsigned long)_stext & 0xffff)
panic("Kernelbase not 64K-aligned (0x%lx)!\n",
(unsigned long)_stext);
}