x86/sev: Split up runtime #VC handler for correct state tracking

Split up the #VC handler code into a from-user and a from-kernel part.
This allows clean and correct state tracking, as the #VC handler needs
to enter NMI-state when raised from kernel mode and plain IRQ state when
raised from user-mode.

Fixes: 62441a1fb5 ("x86/sev-es: Correctly track IRQ states in runtime #VC handler")
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210618115409.22735-3-joro@8bytes.org
This commit is contained in:
Joerg Roedel 2021-06-18 13:54:09 +02:00 committed by Borislav Petkov
parent d187f21733
commit be1a540886
3 changed files with 92 additions and 91 deletions

View file

@ -506,7 +506,7 @@ SYM_CODE_START(\asmsym)
movq %rsp, %rdi /* pt_regs pointer */ movq %rsp, %rdi /* pt_regs pointer */
call \cfunc call kernel_\cfunc
/* /*
* No need to switch back to the IST stack. The current stack is either * No need to switch back to the IST stack. The current stack is either
@ -517,7 +517,7 @@ SYM_CODE_START(\asmsym)
/* Switch to the regular task stack */ /* Switch to the regular task stack */
.Lfrom_usermode_switch_stack_\@: .Lfrom_usermode_switch_stack_\@:
idtentry_body safe_stack_\cfunc, has_error_code=1 idtentry_body user_\cfunc, has_error_code=1
_ASM_NOKPROBE(\asmsym) _ASM_NOKPROBE(\asmsym)
SYM_CODE_END(\asmsym) SYM_CODE_END(\asmsym)

View file

@ -312,8 +312,8 @@ static __always_inline void __##func(struct pt_regs *regs)
*/ */
#define DECLARE_IDTENTRY_VC(vector, func) \ #define DECLARE_IDTENTRY_VC(vector, func) \
DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \ DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \
__visible noinstr void ist_##func(struct pt_regs *regs, unsigned long error_code); \ __visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code); \
__visible noinstr void safe_stack_##func(struct pt_regs *regs, unsigned long error_code) __visible noinstr void user_##func(struct pt_regs *regs, unsigned long error_code)
/** /**
* DEFINE_IDTENTRY_IST - Emit code for IST entry points * DEFINE_IDTENTRY_IST - Emit code for IST entry points
@ -355,33 +355,24 @@ static __always_inline void __##func(struct pt_regs *regs)
DEFINE_IDTENTRY_RAW_ERRORCODE(func) DEFINE_IDTENTRY_RAW_ERRORCODE(func)
/** /**
* DEFINE_IDTENTRY_VC_SAFE_STACK - Emit code for VMM communication handler * DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler
which runs on a safe stack. when raised from kernel mode
* @func: Function name of the entry point * @func: Function name of the entry point
* *
* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
*/ */
#define DEFINE_IDTENTRY_VC_SAFE_STACK(func) \ #define DEFINE_IDTENTRY_VC_KERNEL(func) \
DEFINE_IDTENTRY_RAW_ERRORCODE(safe_stack_##func) DEFINE_IDTENTRY_RAW_ERRORCODE(kernel_##func)
/** /**
* DEFINE_IDTENTRY_VC_IST - Emit code for VMM communication handler * DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler
which runs on the VC fall-back stack when raised from user mode
* @func: Function name of the entry point * @func: Function name of the entry point
* *
* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
*/ */
#define DEFINE_IDTENTRY_VC_IST(func) \ #define DEFINE_IDTENTRY_VC_USER(func) \
DEFINE_IDTENTRY_RAW_ERRORCODE(ist_##func) DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func)
/**
* DEFINE_IDTENTRY_VC - Emit code for VMM communication handler
* @func: Function name of the entry point
*
* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
*/
#define DEFINE_IDTENTRY_VC(func) \
DEFINE_IDTENTRY_RAW_ERRORCODE(func)
#else /* CONFIG_X86_64 */ #else /* CONFIG_X86_64 */

View file

@ -793,7 +793,7 @@ void __init sev_es_init_vc_handling(void)
sev_es_setup_play_dead(); sev_es_setup_play_dead();
/* Secondary CPUs use the runtime #VC handler */ /* Secondary CPUs use the runtime #VC handler */
initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication; initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
} }
static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
@ -1231,14 +1231,6 @@ static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
return ES_EXCEPTION; return ES_EXCEPTION;
} }
static __always_inline void vc_handle_trap_db(struct pt_regs *regs)
{
if (user_mode(regs))
noist_exc_debug(regs);
else
exc_debug(regs);
}
static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
struct ghcb *ghcb, struct ghcb *ghcb,
unsigned long exit_code) unsigned long exit_code)
@ -1334,41 +1326,13 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
} }
/* static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code)
* Main #VC exception handler. It is called when the entry code was able to
* switch off the IST to a safe kernel stack.
*
* With the current implementation it is always possible to switch to a safe
* stack because #VC exceptions only happen at known places, like intercepted
* instructions or accesses to MMIO areas/IO ports. They can also happen with
* code instrumentation when the hypervisor intercepts #DB, but the critical
* paths are forbidden to be instrumented, so #DB exceptions currently also
* only happen in safe places.
*/
DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
{ {
irqentry_state_t irq_state;
struct ghcb_state state; struct ghcb_state state;
struct es_em_ctxt ctxt; struct es_em_ctxt ctxt;
enum es_result result; enum es_result result;
struct ghcb *ghcb; struct ghcb *ghcb;
bool ret = true;
/*
* Handle #DB before calling into !noinstr code to avoid recursive #DB.
*/
if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) {
vc_handle_trap_db(regs);
return;
}
irq_state = irqentry_nmi_enter(regs);
instrumentation_begin();
/*
* This is invoked through an interrupt gate, so IRQs are disabled. The
* code below might walk page-tables for user or kernel addresses, so
* keep the IRQs disabled to protect us against concurrent TLB flushes.
*/
ghcb = __sev_get_ghcb(&state); ghcb = __sev_get_ghcb(&state);
@ -1388,15 +1352,18 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
case ES_UNSUPPORTED: case ES_UNSUPPORTED:
pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n", pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n",
error_code, regs->ip); error_code, regs->ip);
goto fail; ret = false;
break;
case ES_VMM_ERROR: case ES_VMM_ERROR:
pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
error_code, regs->ip); error_code, regs->ip);
goto fail; ret = false;
break;
case ES_DECODE_FAILED: case ES_DECODE_FAILED:
pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
error_code, regs->ip); error_code, regs->ip);
goto fail; ret = false;
break;
case ES_EXCEPTION: case ES_EXCEPTION:
vc_forward_exception(&ctxt); vc_forward_exception(&ctxt);
break; break;
@ -1412,24 +1379,52 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
BUG(); BUG();
} }
out: return ret;
instrumentation_end(); }
irqentry_nmi_exit(regs, irq_state);
return; static __always_inline bool vc_is_db(unsigned long error_code)
{
return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB;
}
fail: /*
if (user_mode(regs)) { * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode
/* * and will panic when an error happens.
* Do not kill the machine if user-space triggered the
* exception. Send SIGBUS instead and let user-space deal with
* it.
*/ */
force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
} else { {
pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n", irqentry_state_t irq_state;
result);
/*
* With the current implementation it is always possible to switch to a
* safe stack because #VC exceptions only happen at known places, like
* intercepted instructions or accesses to MMIO areas/IO ports. They can
* also happen with code instrumentation when the hypervisor intercepts
* #DB, but the critical paths are forbidden to be instrumented, so #DB
* exceptions currently also only happen in safe places.
*
* But keep this here in case the noinstr annotations are violated due
* to bug elsewhere.
*/
if (unlikely(on_vc_fallback_stack(regs))) {
instrumentation_begin();
panic("Can't handle #VC exception from unsupported context\n");
instrumentation_end();
}
/*
* Handle #DB before calling into !noinstr code to avoid recursive #DB.
*/
if (vc_is_db(error_code)) {
exc_debug(regs);
return;
}
irq_state = irqentry_nmi_enter(regs);
instrumentation_begin();
if (!vc_raw_handle_exception(regs, error_code)) {
/* Show some debug info */ /* Show some debug info */
show_regs(regs); show_regs(regs);
@ -1440,23 +1435,38 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
panic("Returned from Terminate-Request to Hypervisor\n"); panic("Returned from Terminate-Request to Hypervisor\n");
} }
goto out;
}
/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */
DEFINE_IDTENTRY_VC_IST(exc_vmm_communication)
{
instrumentation_begin();
panic("Can't handle #VC exception from unsupported context\n");
instrumentation_end(); instrumentation_end();
irqentry_nmi_exit(regs, irq_state);
} }
DEFINE_IDTENTRY_VC(exc_vmm_communication) /*
* Runtime #VC exception handler when raised from user mode. Runs in IRQ mode
* and will kill the current task with SIGBUS when an error happens.
*/
DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
{ {
if (likely(!on_vc_fallback_stack(regs))) /*
safe_stack_exc_vmm_communication(regs, error_code); * Handle #DB before calling into !noinstr code to avoid recursive #DB.
else */
ist_exc_vmm_communication(regs, error_code); if (vc_is_db(error_code)) {
noist_exc_debug(regs);
return;
}
irqentry_enter_from_user_mode(regs);
instrumentation_begin();
if (!vc_raw_handle_exception(regs, error_code)) {
/*
* Do not kill the machine if user-space triggered the
* exception. Send SIGBUS instead and let user-space deal with
* it.
*/
force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
}
instrumentation_end();
irqentry_exit_to_user_mode(regs);
} }
bool __init handle_vc_boot_ghcb(struct pt_regs *regs) bool __init handle_vc_boot_ghcb(struct pt_regs *regs)