Merge branch 'topic/ppc-kvm' into next

Merge the topic branch we share with kvm-ppc, this brings in two xive
commits, one from Paul to rework HMI handling, and a minor cleanup to
drop an unused flag.
This commit is contained in:
Michael Ellerman 2018-01-21 22:43:43 +11:00
commit 5400fc229e
11 changed files with 206 additions and 78 deletions

View file

@ -42,4 +42,8 @@ extern void wait_for_tb_resync(void);
static inline void wait_for_subcore_guest_exit(void) { }
static inline void wait_for_tb_resync(void) { }
#endif
struct pt_regs;
extern long hmi_handle_debugtrig(struct pt_regs *regs);
#endif /* __ASM_PPC64_HMI_H__ */

View file

@ -241,6 +241,7 @@
#define H_GET_HCA_INFO 0x1B8
#define H_GET_PERF_COUNT 0x1BC
#define H_MANAGE_TRACE 0x1C0
#define H_GET_CPU_CHARACTERISTICS 0x1C8
#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
#define H_QUERY_INT_STATE 0x1E4
#define H_POLL_PENDING 0x1D8
@ -330,6 +331,17 @@
#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2
/* >= 0 values are CPU number */
/* H_GET_CPU_CHARACTERISTICS return values */
#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0
#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1
#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2
#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3
#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4
#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0
#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1
#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2
/* Flag values used in H_REGISTER_PROC_TBL hcall */
#define PROC_TABLE_OP_MASK 0x18
#define PROC_TABLE_DEREG 0x10
@ -436,6 +448,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc)
}
}
struct h_cpu_char_result {
u64 character;
u64 behaviour;
};
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HVCALL_H */

View file

@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu)
return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
}
static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
{
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
long rc;
rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
if (rc == H_SUCCESS) {
p->character = retbuf[0];
p->behaviour = retbuf[1];
}
return rc;
}
#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */

View file

@ -431,8 +431,9 @@
#define SPRN_LPID 0x13F /* Logical Partition Identifier */
#endif
#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */
#define SPRN_HMER 0x150 /* Hardware m? error recovery */
#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */
#define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */
#define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */
#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */
#define SPRN_PCR 0x152 /* Processor compatibility register */
#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */

View file

@ -9,6 +9,41 @@
#ifndef _ASM_POWERPC_XIVE_REGS_H
#define _ASM_POWERPC_XIVE_REGS_H
/*
* "magic" Event State Buffer (ESB) MMIO offsets.
*
* Each interrupt source has a 2-bit state machine called ESB
* which can be controlled by MMIO. It's made of 2 bits, P and
* Q. P indicates that an interrupt is pending (has been sent
* to a queue and is waiting for an EOI). Q indicates that the
* interrupt has been triggered while pending.
*
* This acts as a coalescing mechanism in order to guarantee
* that a given interrupt only occurs at most once in a queue.
*
* When doing an EOI, the Q bit will indicate if the interrupt
* needs to be re-triggered.
*
* The following offsets into the ESB MMIO allow to read or
* manipulate the PQ bits. They must be used with an 8-bytes
* load instruction. They all return the previous state of the
* interrupt (atomically).
*
* Additionally, some ESB pages support doing an EOI via a
* store at 0 and some ESBs support doing a trigger via a
* separate trigger page.
*/
#define XIVE_ESB_STORE_EOI 0x400 /* Store */
#define XIVE_ESB_LOAD_EOI 0x000 /* Load */
#define XIVE_ESB_GET 0x800 /* Load */
#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
#define XIVE_ESB_VAL_P 0x2
#define XIVE_ESB_VAL_Q 0x1
/*
* Thread Management (aka "TM") registers
*/

View file

@ -58,6 +58,9 @@ struct xive_irq_data {
#define XIVE_IRQ_FLAG_EOI_FW 0x10
#define XIVE_IRQ_FLAG_H_INT_ESB 0x20
/* Special flag set by KVM for excalation interrupts */
#define XIVE_IRQ_NO_EOI 0x80
#define XIVE_INVALID_CHIP_ID -1
/* A queue tracking structure in a CPU */
@ -72,41 +75,6 @@ struct xive_q {
atomic_t pending_count;
};
/*
* "magic" Event State Buffer (ESB) MMIO offsets.
*
* Each interrupt source has a 2-bit state machine called ESB
* which can be controlled by MMIO. It's made of 2 bits, P and
* Q. P indicates that an interrupt is pending (has been sent
* to a queue and is waiting for an EOI). Q indicates that the
* interrupt has been triggered while pending.
*
* This acts as a coalescing mechanism in order to guarantee
* that a given interrupt only occurs at most once in a queue.
*
* When doing an EOI, the Q bit will indicate if the interrupt
* needs to be re-triggered.
*
* The following offsets into the ESB MMIO allow to read or
* manipulate the PQ bits. They must be used with an 8-bytes
* load instruction. They all return the previous state of the
* interrupt (atomically).
*
* Additionally, some ESB pages support doing an EOI via a
* store at 0 and some ESBs support doing a trigger via a
* separate trigger page.
*/
#define XIVE_ESB_STORE_EOI 0x400 /* Store */
#define XIVE_ESB_LOAD_EOI 0x000 /* Load */
#define XIVE_ESB_GET 0x800 /* Load */
#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
#define XIVE_ESB_VAL_P 0x2
#define XIVE_ESB_VAL_Q 0x1
/* Global enable flags for the XIVE support */
extern bool __xive_enabled;

View file

@ -495,37 +495,123 @@ long machine_check_early(struct pt_regs *regs)
return handled;
}
long hmi_exception_realmode(struct pt_regs *regs)
/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
static enum {
DTRIG_UNKNOWN,
DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
} hmer_debug_trig_function;
static int init_debug_trig_function(void)
{
int pvr;
struct device_node *cpun;
struct property *prop = NULL;
const char *str;
/* First look in the device tree */
preempt_disable();
cpun = of_get_cpu_node(smp_processor_id(), NULL);
if (cpun) {
of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
prop, str) {
if (strcmp(str, "bit17-vector-ci-load") == 0)
hmer_debug_trig_function = DTRIG_VECTOR_CI;
else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
}
of_node_put(cpun);
}
preempt_enable();
/* If we found the property, don't look at PVR */
if (prop)
goto out;
pvr = mfspr(SPRN_PVR);
/* Check for POWER9 Nimbus (scale-out) */
if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
/* DD2.2 and later */
if ((pvr & 0xfff) >= 0x202)
hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
/* DD2.0 and DD2.1 - used for vector CI load emulation */
else if ((pvr & 0xfff) >= 0x200)
hmer_debug_trig_function = DTRIG_VECTOR_CI;
}
out:
switch (hmer_debug_trig_function) {
case DTRIG_VECTOR_CI:
pr_debug("HMI debug trigger used for vector CI load\n");
break;
case DTRIG_SUSPEND_ESCAPE:
pr_debug("HMI debug trigger used for TM suspend escape\n");
break;
default:
break;
}
return 0;
}
__initcall(init_debug_trig_function);
/*
* Handle HMIs that occur as a result of a debug trigger.
* Return values:
* -1 means this is not a HMI cause that we know about
* 0 means no further handling is required
* 1 means further handling is required
*/
long hmi_handle_debugtrig(struct pt_regs *regs)
{
unsigned long hmer = mfspr(SPRN_HMER);
long ret = 0;
/* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
if (!((hmer & HMER_DEBUG_TRIG)
&& hmer_debug_trig_function != DTRIG_UNKNOWN))
return -1;
hmer &= ~HMER_DEBUG_TRIG;
/* HMER is a write-AND register */
mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
switch (hmer_debug_trig_function) {
case DTRIG_VECTOR_CI:
/*
* Now to avoid problems with soft-disable we
* only do the emulation if we are coming from
* host user space
*/
if (regs && user_mode(regs))
ret = local_paca->hmi_p9_special_emu = 1;
break;
default:
break;
}
/*
* See if any other HMI causes remain to be handled
*/
if (hmer & mfspr(SPRN_HMEER))
return -1;
return ret;
}
/*
* Return values:
*/
long hmi_exception_realmode(struct pt_regs *regs)
{
int ret;
__this_cpu_inc(irq_stat.hmi_exceptions);
#ifdef CONFIG_PPC_BOOK3S_64
/* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */
if (pvr_version_is(PVR_POWER9)) {
unsigned long hmer = mfspr(SPRN_HMER);
/* Do we have the debug bit set */
if (hmer & PPC_BIT(17)) {
hmer &= ~PPC_BIT(17);
mtspr(SPRN_HMER, hmer);
/*
* Now to avoid problems with soft-disable we
* only do the emulation if we are coming from
* user space
*/
if (user_mode(regs))
local_paca->hmi_p9_special_emu = 1;
/*
* Don't bother going to OPAL if that's the
* only relevant bit.
*/
if (!(hmer & mfspr(SPRN_HMEER)))
return local_paca->hmi_p9_special_emu;
}
}
#endif /* CONFIG_PPC_BOOK3S_64 */
ret = hmi_handle_debugtrig(regs);
if (ret >= 0)
return ret;
wait_for_subcore_guest_exit();

View file

@ -266,17 +266,19 @@ static void kvmppc_tb_resync_done(void)
* secondary threads to proceed.
* - All secondary threads will eventually call opal hmi handler on
* their exit path.
*
* Returns 1 if the timebase offset should be applied, 0 if not.
*/
long kvmppc_realmode_hmi_handler(void)
{
int ptid = local_paca->kvm_hstate.ptid;
bool resync_req;
/* This is only called on primary thread. */
BUG_ON(ptid != 0);
__this_cpu_inc(irq_stat.hmi_exceptions);
if (hmi_handle_debugtrig(NULL) >= 0)
return 1;
/*
* By now primary thread has already completed guest->host
* partition switch but haven't signaled secondaries yet.

View file

@ -42,7 +42,7 @@ static void *real_vmalloc_addr(void *x)
}
/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
static int global_invalidates(struct kvm *kvm, unsigned long flags)
static int global_invalidates(struct kvm *kvm)
{
int global;
int cpu;
@ -522,7 +522,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
if (v & HPTE_V_VALID) {
hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
rb = compute_tlbie_rb(v, pte_r, pte_index);
do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
/*
* The reference (R) and change (C) bits in a HPT
* entry can be set by hardware at any time up until
@ -572,7 +572,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
if (kvm_is_radix(kvm))
return H_FUNCTION;
global = global_invalidates(kvm, 0);
global = global_invalidates(kvm);
for (i = 0; i < 4 && ret == H_SUCCESS; ) {
n = 0;
for (; i < 4; ++i) {
@ -732,8 +732,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
rb = compute_tlbie_rb(v, r, pte_index);
hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) |
HPTE_V_ABSENT);
do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
true);
do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
/* Don't lose R/C bit updates done by hardware */
r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C);
hpte[1] = cpu_to_be64(r);

View file

@ -1909,16 +1909,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
bne 27f
bl kvmppc_realmode_hmi_handler
nop
cmpdi r3, 0
li r12, BOOK3S_INTERRUPT_HMI
/*
* At this point kvmppc_realmode_hmi_handler would have resync-ed
* the TB. Hence it is not required to subtract guest timebase
* offset from timebase. So, skip it.
* At this point kvmppc_realmode_hmi_handler may have resync-ed
* the TB, and if it has, we must not subtract the guest timebase
* offset from the timebase. So, skip it.
*
* Also, do not call kvmppc_subcore_exit_guest() because it has
* been invoked as part of kvmppc_realmode_hmi_handler().
*/
b 30f
beq 30f
27:
/* Subtract timebase offset from timebase */

View file

@ -367,7 +367,8 @@ static void xive_irq_eoi(struct irq_data *d)
* EOI the source if it hasn't been disabled and hasn't
* been passed-through to a KVM guest
*/
if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d))
if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
!(xd->flags & XIVE_IRQ_NO_EOI))
xive_do_source_eoi(irqd_to_hwirq(d), xd);
/*