diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 768a9f977c00..3a5c568b1e89 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -113,10 +113,8 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) * We assume that if the condition is recovered then linux host * will have generated an error log event that we will pick * up and log later. - * Don't release mce event now. In case if condition is not - * recovered we do guest exit and go back to linux host machine - * check handler. Hence we need make sure that current mce event - * is available for linux host to consume. + * Don't release mce event now. We will queue up the event so that + * we can log the MCE event info on host console. */ if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE)) goto out; @@ -128,11 +126,12 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) out: /* - * If we have handled the error, then release the mce event because - * we will be delivering machine check to guest. + * We are now going enter guest either through machine check + * interrupt (for unhandled errors) or will continue from + * current HSRR0 (for handled errors) in guest. Hence + * queue up the event so that we can log it from host console later. */ - if (handled) - release_mce_event(); + machine_check_queue_event(); return handled; } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 77356fd25ccc..868347ef09fd 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2257,15 +2257,28 @@ machine_check_realmode: mr r3, r9 /* get vcpu pointer */ bl kvmppc_realmode_machine_check nop - cmpdi r3, 0 /* continue exiting from guest? */ + cmpdi r3, 0 /* Did we handle MCE ? */ ld r9, HSTATE_KVM_VCPU(r13) li r12, BOOK3S_INTERRUPT_MACHINE_CHECK - beq mc_cont + /* + * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through + * machine check interrupt (set HSRR0 to 0x200). And for handled + * errors (no-fatal), just go back to guest execution with current + * HSRR0 instead of exiting guest. This new approach will inject + * machine check to guest for fatal error causing guest to crash. + * + * The old code used to return to host for unhandled errors which + * was causing guest to hang with soft lockups inside guest and + * makes it difficult to recover guest instance. + */ + ld r10, VCPU_PC(r9) + ld r11, VCPU_MSR(r9) + bne 2f /* Continue guest execution. */ /* If not, deliver a machine check. SRR0/1 are already set */ li r10, BOOK3S_INTERRUPT_MACHINE_CHECK ld r11, VCPU_MSR(r9) bl kvmppc_msr_interrupt - b fast_interrupt_c_return +2: b fast_interrupt_c_return /* * Check the reason we woke from nap, and take appropriate action.