x86/cpu: Detect real BSP on crash kernels

When a kdump kernel is started from a crashing CPU then there is no
guarantee that this CPU is the real boot CPU (BSP). If the kdump kernel
tries to online the BSP then the INIT sequence will reset the machine.

There is a command line option to prevent this, but in case of nested kdump
kernels this is wrong.

But that command line option is not required at all because the real
BSP is enumerated as the first CPU by firmware. Support for the only
known system which was different (Voyager) got removed long ago.

Detect whether the boot CPU APIC ID is the first APIC ID enumerated by
the firmware. If the first APIC ID enumerated is not matching the boot
CPU APIC ID then skip registering it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Tested-by: Sohil Mehta <sohil.mehta@intel.com>
Link: https://lore.kernel.org/r/20240213210252.348542071@linutronix.de
This commit is contained in:
Thomas Gleixner 2024-02-13 22:05:54 +01:00
parent 7c0edad364
commit 5c5682b9f8
3 changed files with 66 additions and 57 deletions

View File

@ -191,9 +191,7 @@ Dump-capture kernel config options (Arch Dependent, i386 and x86_64)
CPU is enough for kdump kernel to dump vmcore on most of systems.
However, you can also specify nr_cpus=X to enable multiple processors
in kdump kernel. In this case, "disable_cpu_apicid=" is needed to
tell kdump kernel which cpu is 1st kernel's BSP. Please refer to
admin-guide/kernel-parameters.txt for more details.
in kdump kernel.
With CONFIG_SMP=n, the above things are not related.
@ -454,8 +452,7 @@ Notes on loading the dump-capture kernel:
to use multi-thread programs with it, such as parallel dump feature of
makedumpfile. Otherwise, the multi-thread program may have a great
performance degradation. To enable multi-cpu support, you should bring up an
SMP dump-capture kernel and specify maxcpus/nr_cpus, disable_cpu_apicid=[X]
options while loading it.
SMP dump-capture kernel and specify maxcpus/nr_cpus options while loading it.
* For s390x there are two kdump modes: If a ELF header is specified with
the elfcorehdr= kernel parameter, it is used by the kdump kernel as it

View File

@ -1100,15 +1100,6 @@
Disable TLBIE instruction. Currently does not work
with KVM, with HASH MMU, or with coherent accelerators.
disable_cpu_apicid= [X86,APIC,SMP]
Format: <int>
The number of initial APIC ID for the
corresponding CPU to be disabled at boot,
mostly used for the kdump 2nd kernel to
disable BSP to wake up multiple CPUs without
causing system reset or hang due to sending
INIT from AP to BSP.
disable_ddw [PPC/PSERIES]
Disable Dynamic DMA Window support. Use this
to workaround buggy firmware.

View File

@ -32,18 +32,13 @@ static struct {
unsigned int nr_disabled_cpus;
unsigned int nr_rejected_cpus;
u32 boot_cpu_apic_id;
u32 real_bsp_apic_id;
} topo_info __read_mostly = {
.nr_assigned_cpus = 1,
.boot_cpu_apic_id = BAD_APICID,
.real_bsp_apic_id = BAD_APICID,
};
/*
* Processor to be disabled specified by kernel parameter
* disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to
* avoid undefined behaviour caused by sending INIT from AP to BSP.
*/
static u32 disabled_cpu_apicid __ro_after_init = BAD_APICID;
bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
{
return phys_id == (u64)cpuid_to_apicid[cpu];
@ -123,35 +118,41 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id)
cpu_mark_primary_thread(cpu, apic_id);
}
/**
* topology_register_apic - Register an APIC in early topology maps
* @apic_id: The APIC ID to set up
* @acpi_id: The ACPI ID associated to the APIC
* @present: True if the corresponding CPU is present
*/
void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present)
static __init bool check_for_real_bsp(u32 apic_id)
{
/*
* There is no real good way to detect whether this a kdump()
* kernel, but except on the Voyager SMP monstrosity which is not
* longer supported, the real BSP APIC ID is the first one which is
* enumerated by firmware. That allows to detect whether the boot
* CPU is the real BSP. If it is not, then do not register the APIC
* because sending INIT to the real BSP would reset the whole
* system.
*
* The first APIC ID which is enumerated by firmware is detectable
* because the boot CPU APIC ID is registered before that without
* invoking this code.
*/
if (topo_info.real_bsp_apic_id != BAD_APICID)
return false;
if (apic_id == topo_info.boot_cpu_apic_id) {
topo_info.real_bsp_apic_id = apic_id;
return false;
}
pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x > %x\n",
topo_info.boot_cpu_apic_id, apic_id);
pr_warn("Crash kernel detected. Disabling real BSP to prevent machine INIT\n");
topo_info.real_bsp_apic_id = apic_id;
return true;
}
static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
{
int cpu;
if (apic_id >= MAX_LOCAL_APIC) {
pr_err_once("APIC ID %x exceeds kernel limit of: %x\n", apic_id, MAX_LOCAL_APIC - 1);
topo_info.nr_rejected_cpus++;
return;
}
if (disabled_cpu_apicid == apic_id) {
pr_info("Disabling CPU as requested via 'disable_cpu_apicid=0x%x'.\n", apic_id);
topo_info.nr_rejected_cpus++;
return;
}
/* CPU numbers exhausted? */
if (apic_id != topo_info.boot_cpu_apic_id && topo_info.nr_assigned_cpus >= nr_cpu_ids) {
pr_warn_once("CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids);
topo_info.nr_rejected_cpus++;
return;
}
if (present) {
set_bit(apic_id, phys_cpu_present_map);
@ -171,6 +172,35 @@ void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present)
}
}
/**
* topology_register_apic - Register an APIC in early topology maps
* @apic_id: The APIC ID to set up
* @acpi_id: The ACPI ID associated to the APIC
* @present: True if the corresponding CPU is present
*/
void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present)
{
if (apic_id >= MAX_LOCAL_APIC) {
pr_err_once("APIC ID %x exceeds kernel limit of: %x\n", apic_id, MAX_LOCAL_APIC - 1);
topo_info.nr_rejected_cpus++;
return;
}
if (check_for_real_bsp(apic_id)) {
topo_info.nr_rejected_cpus++;
return;
}
/* CPU numbers exhausted? */
if (apic_id != topo_info.boot_cpu_apic_id && topo_info.nr_assigned_cpus >= nr_cpu_ids) {
pr_warn_once("CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids);
topo_info.nr_rejected_cpus++;
return;
}
topo_register_apic(apic_id, acpi_id, present);
}
/**
* topology_register_boot_apic - Register the boot CPU APIC
* @apic_id: The APIC ID to set up
@ -182,7 +212,7 @@ void __init topology_register_boot_apic(u32 apic_id)
WARN_ON_ONCE(topo_info.boot_cpu_apic_id != BAD_APICID);
topo_info.boot_cpu_apic_id = apic_id;
topology_register_apic(apic_id, CPU_ACPIID_INVALID, true);
topo_register_apic(apic_id, CPU_ACPIID_INVALID, true);
}
#ifdef CONFIG_ACPI_HOTPLUG_CPU
@ -335,12 +365,3 @@ static int __init setup_possible_cpus(char *str)
}
early_param("possible_cpus", setup_possible_cpus);
#endif
static int __init apic_set_disabled_cpu_apicid(char *arg)
{
if (!arg || !get_option(&arg, &disabled_cpu_apicid))
return -EINVAL;
return 0;
}
early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid);