linux-stable/arch/x86/xen/smp_hvm.c
Dongli Zhang eed0574432 xen: delay xen_hvm_init_time_ops() if kdump is boot on vcpu>=32
The sched_clock() can be used very early since commit 857baa87b6
("sched/clock: Enable sched clock early"). In addition, with commit
38669ba205 ("x86/xen/time: Output xen sched_clock time from 0"), kdump
kernel in Xen HVM guest may panic at very early stage when accessing
&__this_cpu_read(xen_vcpu)->time as in below:

setup_arch()
 -> init_hypervisor_platform()
     -> x86_init.hyper.init_platform = xen_hvm_guest_init()
         -> xen_hvm_init_time_ops()
             -> xen_clocksource_read()
                 -> src = &__this_cpu_read(xen_vcpu)->time;

This is because Xen HVM supports at most MAX_VIRT_CPUS=32 'vcpu_info'
embedded inside 'shared_info' during early stage until xen_vcpu_setup() is
used to allocate/relocate 'vcpu_info' for boot cpu at arbitrary address.

However, when Xen HVM guest panic on vcpu >= 32, since
xen_vcpu_info_reset(0) would set per_cpu(xen_vcpu, cpu) = NULL when
vcpu >= 32, xen_clocksource_read() on vcpu >= 32 would panic.

This patch calls xen_hvm_init_time_ops() again later in
xen_hvm_smp_prepare_boot_cpu() after the 'vcpu_info' for boot vcpu is
registered when the boot vcpu is >= 32.

This issue can be reproduced on purpose via below command at the guest
side when kdump/kexec is enabled:

"taskset -c 33 echo c > /proc/sysrq-trigger"

The bugfix for PVM is not implemented due to the lack of testing
environment.

[boris: xen_hvm_init_time_ops() returns on errors instead of jumping to end]

Cc: Joe Jin <joe.jin@oracle.com>
Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Link: https://lore.kernel.org/r/20220302164032.14569-3-dongli.zhang@oracle.com
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
2022-03-10 09:27:55 -06:00

92 lines
2 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <linux/thread_info.h>
#include <asm/smp.h>
#include <xen/events.h>
#include "xen-ops.h"
#include "smp.h"
static void __init xen_hvm_smp_prepare_boot_cpu(void)
{
BUG_ON(smp_processor_id() != 0);
native_smp_prepare_boot_cpu();
/*
* Setup vcpu_info for boot CPU. Secondary CPUs get their vcpu_info
* in xen_cpu_up_prepare_hvm().
*/
xen_vcpu_setup(0);
/*
* Called again in case the kernel boots on vcpu >= MAX_VIRT_CPUS.
* Refer to comments in xen_hvm_init_time_ops().
*/
xen_hvm_init_time_ops();
/*
* The alternative logic (which patches the unlock/lock) runs before
* the smp bootup up code is activated. Hence we need to set this up
* the core kernel is being patched. Otherwise we will have only
* modules patched but not core code.
*/
xen_init_spinlocks();
}
static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
{
int cpu;
native_smp_prepare_cpus(max_cpus);
if (xen_have_vector_callback) {
WARN_ON(xen_smp_intr_init(0));
xen_init_lock_cpu(0);
}
for_each_possible_cpu(cpu) {
if (cpu == 0)
continue;
/* Set default vcpu_id to make sure that we don't use cpu-0's */
per_cpu(xen_vcpu_id, cpu) = XEN_VCPU_ID_INVALID;
}
}
#ifdef CONFIG_HOTPLUG_CPU
static void xen_hvm_cpu_die(unsigned int cpu)
{
if (common_cpu_die(cpu) == 0) {
if (xen_have_vector_callback) {
xen_smp_intr_free(cpu);
xen_uninit_lock_cpu(cpu);
xen_teardown_timer(cpu);
}
}
}
#else
static void xen_hvm_cpu_die(unsigned int cpu)
{
BUG();
}
#endif
void __init xen_hvm_smp_init(void)
{
smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu;
smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
smp_ops.smp_cpus_done = xen_smp_cpus_done;
smp_ops.cpu_die = xen_hvm_cpu_die;
if (!xen_have_vector_callback) {
#ifdef CONFIG_PARAVIRT_SPINLOCKS
nopvspin = true;
#endif
return;
}
smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
}