// SPDX-License-Identifier: GPL-2.0 /* * Implement support for AMD Fam19h Branch Sampling feature * Based on specifications published in AMD PPR Fam19 Model 01 * * Copyright 2021 Google LLC * Contributed by Stephane Eranian */ #include #include #include #include #include "../perf_event.h" #define BRS_POISON 0xFFFFFFFFFFFFFFFEULL /* mark limit of valid entries */ /* Debug Extension Configuration register layout */ union amd_debug_extn_cfg { __u64 val; struct { __u64 rsvd0:2, /* reserved */ brsmen:1, /* branch sample enable */ rsvd4_3:2,/* reserved - must be 0x3 */ vb:1, /* valid branches recorded */ rsvd2:10, /* reserved */ msroff:4, /* index of next entry to write */ rsvd3:4, /* reserved */ pmc:3, /* #PMC holding the sampling event */ rsvd4:37; /* reserved */ }; }; static inline unsigned int brs_from(int idx) { return MSR_AMD_SAMP_BR_FROM + 2 * idx; } static inline unsigned int brs_to(int idx) { return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1; } static inline void set_debug_extn_cfg(u64 val) { /* bits[4:3] must always be set to 11b */ wrmsrl(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3); } static inline u64 get_debug_extn_cfg(void) { u64 val; rdmsrl(MSR_AMD_DBG_EXTN_CFG, val); return val; } static bool __init amd_brs_detect(void) { if (!cpu_feature_enabled(X86_FEATURE_BRS)) return false; switch (boot_cpu_data.x86) { case 0x19: /* AMD Fam19h (Zen3) */ x86_pmu.lbr_nr = 16; /* No hardware filtering supported */ x86_pmu.lbr_sel_map = NULL; x86_pmu.lbr_sel_mask = 0; break; default: return false; } return true; } /* * Current BRS implementation does not support branch type or privilege level * filtering. Therefore, this function simply enforces these limitations. No need for * a br_sel_map. Software filtering is not supported because it would not correlate well * with a sampling period. */ static int amd_brs_setup_filter(struct perf_event *event) { u64 type = event->attr.branch_sample_type; /* No BRS support */ if (!x86_pmu.lbr_nr) return -EOPNOTSUPP; /* Can only capture all branches, i.e., no filtering */ if ((type & ~PERF_SAMPLE_BRANCH_PLM_ALL) != PERF_SAMPLE_BRANCH_ANY) return -EINVAL; return 0; } static inline int amd_is_brs_event(struct perf_event *e) { return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT; } int amd_brs_hw_config(struct perf_event *event) { int ret = 0; /* * Due to interrupt holding, BRS is not recommended in * counting mode. */ if (!is_sampling_event(event)) return -EINVAL; /* * Due to the way BRS operates by holding the interrupt until * lbr_nr entries have been captured, it does not make sense * to allow sampling on BRS with an event that does not match * what BRS is capturing, i.e., retired taken branches. * Otherwise the correlation with the event's period is even * more loose: * * With retired taken branch: * Effective P = P + 16 + X * With any other event: * Effective P = P + Y + X * * Where X is the number of taken branches due to interrupt * skid. Skid is large. * * Where Y is the occurences of the event while BRS is * capturing the lbr_nr entries. * * By using retired taken branches, we limit the impact on the * Y variable. We know it cannot be more than the depth of * BRS. */ if (!amd_is_brs_event(event)) return -EINVAL; /* * BRS implementation does not work with frequency mode * reprogramming of the period. */ if (event->attr.freq) return -EINVAL; /* * The kernel subtracts BRS depth from period, so it must * be big enough. */ if (event->attr.sample_period <= x86_pmu.lbr_nr) return -EINVAL; /* * Check if we can allow PERF_SAMPLE_BRANCH_STACK */ ret = amd_brs_setup_filter(event); /* only set in case of success */ if (!ret) event->hw.flags |= PERF_X86_EVENT_AMD_BRS; return ret; } /* tos = top of stack, i.e., last valid entry written */ static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg) { /* * msroff: index of next entry to write so top-of-stack is one off * if BRS is full then msroff is set back to 0. */ return (cfg->msroff ? cfg->msroff : x86_pmu.lbr_nr) - 1; } /* * make sure we have a sane BRS offset to begin with * especially with kexec */ void amd_brs_reset(void) { if (!cpu_feature_enabled(X86_FEATURE_BRS)) return; /* * Reset config */ set_debug_extn_cfg(0); /* * Mark first entry as poisoned */ wrmsrl(brs_to(0), BRS_POISON); } int __init amd_brs_init(void) { if (!amd_brs_detect()) return -EOPNOTSUPP; pr_cont("%d-deep BRS, ", x86_pmu.lbr_nr); return 0; } void amd_brs_enable(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); union amd_debug_extn_cfg cfg; /* Activate only on first user */ if (++cpuc->brs_active > 1) return; cfg.val = 0; /* reset all fields */ cfg.brsmen = 1; /* enable branch sampling */ /* Set enable bit */ set_debug_extn_cfg(cfg.val); } void amd_brs_enable_all(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->lbr_users) amd_brs_enable(); } void amd_brs_disable(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); union amd_debug_extn_cfg cfg; /* Check if active (could be disabled via x86_pmu_disable_all()) */ if (!cpuc->brs_active) return; /* Only disable for last user */ if (--cpuc->brs_active) return; /* * Clear the brsmen bit but preserve the others as they contain * useful state such as vb and msroff */ cfg.val = get_debug_extn_cfg(); /* * When coming in on interrupt and BRS is full, then hw will have * already stopped BRS, no need to issue wrmsr again */ if (cfg.brsmen) { cfg.brsmen = 0; set_debug_extn_cfg(cfg.val); } } void amd_brs_disable_all(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->lbr_users) amd_brs_disable(); } static bool amd_brs_match_plm(struct perf_event *event, u64 to) { int type = event->attr.branch_sample_type; int plm_k = PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_HV; int plm_u = PERF_SAMPLE_BRANCH_USER; if (!(type & plm_k) && kernel_ip(to)) return 0; if (!(type & plm_u) && !kernel_ip(to)) return 0; return 1; } /* * Caller must ensure amd_brs_inuse() is true before calling * return: */ void amd_brs_drain(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_event *event = cpuc->events[0]; struct perf_branch_entry *br = cpuc->lbr_entries; union amd_debug_extn_cfg cfg; u32 i, nr = 0, num, tos, start; u32 shift = 64 - boot_cpu_data.x86_virt_bits; /* * BRS event forced on PMC0, * so check if there is an event. * It is possible to have lbr_users > 0 but the event * not yet scheduled due to long latency PMU irq */ if (!event) goto empty; cfg.val = get_debug_extn_cfg(); /* Sanity check [0-x86_pmu.lbr_nr] */ if (WARN_ON_ONCE(cfg.msroff >= x86_pmu.lbr_nr)) goto empty; /* No valid branch */ if (cfg.vb == 0) goto empty; /* * msr.off points to next entry to be written * tos = most recent entry index = msr.off - 1 * BRS register buffer saturates, so we know we have * start < tos and that we have to read from start to tos */ start = 0; tos = amd_brs_get_tos(&cfg); num = tos - start + 1; /* * BRS is only one pass (saturation) from MSROFF to depth-1 * MSROFF wraps to zero when buffer is full */ for (i = 0; i < num; i++) { u32 brs_idx = tos - i; u64 from, to; rdmsrl(brs_to(brs_idx), to); /* Entry does not belong to us (as marked by kernel) */ if (to == BRS_POISON) break; /* * Sign-extend SAMP_BR_TO to 64 bits, bits 61-63 are reserved. * Necessary to generate proper virtual addresses suitable for * symbolization */ to = (u64)(((s64)to << shift) >> shift); if (!amd_brs_match_plm(event, to)) continue; rdmsrl(brs_from(brs_idx), from); perf_clear_branch_entry_bitfields(br+nr); br[nr].from = from; br[nr].to = to; nr++; } empty: /* Record number of sampled branches */ cpuc->lbr_stack.nr = nr; } /* * Poison most recent entry to prevent reuse by next task * required because BRS entry are not tagged by PID */ static void amd_brs_poison_buffer(void) { union amd_debug_extn_cfg cfg; unsigned int idx; /* Get current state */ cfg.val = get_debug_extn_cfg(); /* idx is most recently written entry */ idx = amd_brs_get_tos(&cfg); /* Poison target of entry */ wrmsrl(brs_to(idx), BRS_POISON); } /* * On context switch in, we need to make sure no samples from previous user * are left in the BRS. * * On ctxswin, sched_in = true, called after the PMU has started * On ctxswout, sched_in = false, called before the PMU is stopped */ void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); /* no active users */ if (!cpuc->lbr_users) return; /* * On context switch in, we need to ensure we do not use entries * from previous BRS user on that CPU, so we poison the buffer as * a faster way compared to resetting all entries. */ if (sched_in) amd_brs_poison_buffer(); } /* * called from ACPI processor_idle.c or acpi_pad.c * with interrupts disabled */ void perf_amd_brs_lopwr_cb(bool lopwr_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); union amd_debug_extn_cfg cfg; /* * on mwait in, we may end up in non C0 state. * we must disable branch sampling to avoid holding the NMI * for too long. We disable it in hardware but we * keep the state in cpuc, so we can re-enable. * * The hardware will deliver the NMI if needed when brsmen cleared */ if (cpuc->brs_active) { cfg.val = get_debug_extn_cfg(); cfg.brsmen = !lopwr_in; set_debug_extn_cfg(cfg.val); } } DEFINE_STATIC_CALL_NULL(perf_lopwr_cb, perf_amd_brs_lopwr_cb); EXPORT_STATIC_CALL_TRAMP_GPL(perf_lopwr_cb); void __init amd_brs_lopwr_init(void) { static_call_update(perf_lopwr_cb, perf_amd_brs_lopwr_cb); }