From bc4000fdb009a531fc5b94339f8531eee64bc232 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 14 Mar 2023 10:00:39 -0700 Subject: [PATCH 1/5] perf/x86/intel: Add Granite Rapids From core PMU's perspective, Granite Rapids is similar to the Sapphire Rapids. The key differences include: - Doesn't need the AUX event workaround for the mem load event. (Implement in this patch). - Support Retire Latency (Has been implemented in the commit c87a31093c70 ("perf/x86: Support Retire Latency")) - The event list, which will be supported in the perf tool later. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230314170041.2967712-1-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a3fb996a86a1..070cc4ef2672 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -5469,6 +5469,15 @@ pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i) return x86_pmu.pebs ? attr->mode : 0; } +static umode_t +mem_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + if (attr == &event_attr_mem_ld_aux.attr.attr) + return x86_pmu.flags & PMU_FL_MEM_LOADS_AUX ? attr->mode : 0; + + return pebs_is_visible(kobj, attr, i); +} + static umode_t lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i) { @@ -5496,7 +5505,7 @@ static struct attribute_group group_events_td = { static struct attribute_group group_events_mem = { .name = "events", - .is_visible = pebs_is_visible, + .is_visible = mem_is_visible, }; static struct attribute_group group_events_tsx = { @@ -6486,6 +6495,10 @@ __init int intel_pmu_init(void) case INTEL_FAM6_SAPPHIRERAPIDS_X: case INTEL_FAM6_EMERALDRAPIDS_X: + x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; + fallthrough; + case INTEL_FAM6_GRANITERAPIDS_X: + case INTEL_FAM6_GRANITERAPIDS_D: pmem = true; x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6502,7 +6515,6 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; x86_pmu.flags |= PMU_FL_INSTR_LATENCY; - x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = spr_get_event_constraints; From 5a796d5cb5d11f5aad4893a59f22715810769928 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 14 Mar 2023 10:00:40 -0700 Subject: [PATCH 2/5] perf/x86/msr: Add Granite Rapids The same as Sapphire Rapids, the SMI_COUNT MSR is also supported on Granite Rapids. Add Granite Rapids model. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230314170041.2967712-2-kan.liang@linux.intel.com --- arch/x86/events/msr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index c65d8906cbcf..0feaaa571303 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -70,6 +70,8 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_BROADWELL_X: case INTEL_FAM6_SAPPHIRERAPIDS_X: case INTEL_FAM6_EMERALDRAPIDS_X: + case INTEL_FAM6_GRANITERAPIDS_X: + case INTEL_FAM6_GRANITERAPIDS_D: case INTEL_FAM6_ATOM_SILVERMONT: case INTEL_FAM6_ATOM_SILVERMONT_D: From 872d28001be56b205bd9b3f97cea1571a1bde317 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 14 Mar 2023 10:00:41 -0700 Subject: [PATCH 3/5] perf/x86/cstate: Add Granite Rapids support Granite Rapids Xeon is successor or Emerald Rapids Xeon, and it will use the same C-state residency counters as Emerald Rapids (and previous Xeons, all the way back to Ice Lake Xeon). Add Granite Rapids Xeon support. Signed-off-by: Artem Bityutskiy Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230314170041.2967712-3-kan.liang@linux.intel.com --- arch/x86/events/intel/cstate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 551741e79e03..835862c548cc 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -678,6 +678,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_cstates), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &icx_cstates), X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &icx_cstates), + X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X, &icx_cstates), + X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_D, &icx_cstates), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates), From 15def34e2635ab7e0e96f1bc32e1b69609f14942 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Mon, 27 Feb 2023 10:35:08 +0800 Subject: [PATCH 4/5] perf/core: Fix hardlockup failure caused by perf throttle commit e050e3f0a71bf ("perf: Fix broken interrupt rate throttling") introduces a change in throttling threshold judgment. Before this, compare hwc->interrupts and max_samples_per_tick, then increase hwc->interrupts by 1, but this commit reverses order of these two behaviors, causing the semantics of max_samples_per_tick to change. In literal sense of "max_samples_per_tick", if hwc->interrupts == max_samples_per_tick, it should not be throttled, therefore, the judgment condition should be changed to "hwc->interrupts > max_samples_per_tick". In fact, this may cause the hardlockup to fail, The minimum value of max_samples_per_tick may be 1, in this case, the return value of __perf_event_account_interrupt function is 1. As a result, nmi_watchdog gets throttled, which would stop PMU (Use x86 architecture as an example, see x86_pmu_handle_irq). Fixes: e050e3f0a71b ("perf: Fix broken interrupt rate throttling") Signed-off-by: Yang Jihong Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230227023508.102230-1-yangjihong1@huawei.com --- kernel/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index fb3e436bcd4a..82b95b8e0409 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9433,8 +9433,8 @@ __perf_event_account_interrupt(struct perf_event *event, int throttle) hwc->interrupts = 1; } else { hwc->interrupts++; - if (unlikely(throttle - && hwc->interrupts >= max_samples_per_tick)) { + if (unlikely(throttle && + hwc->interrupts > max_samples_per_tick)) { __this_cpu_inc(perf_throttled_count); tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS); hwc->interrupts = MAX_INTERRUPTS; From 743767d6f6b8f28228be181fe369657f7ecd1eb2 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 19 Apr 2023 14:42:41 -0700 Subject: [PATCH 5/5] perf/x86/intel/uncore: Add events for Intel SPR IMC PMU Add missing clockticks and cas_count_* events for Intel SapphireRapids IMC PMU. These events are useful to measure memory bandwidth. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20230419214241.2310385-1-eranian@google.com --- arch/x86/events/intel/uncore_snbep.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 7d1199554fe3..fa9b209a11fa 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -6068,6 +6068,17 @@ static struct intel_uncore_ops spr_uncore_mmio_ops = { .read_counter = uncore_mmio_read_counter, }; +static struct uncore_event_desc spr_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x01,umask=0x00"), + INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x05,umask=0xcf"), + INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x05,umask=0xf0"), + INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"), + { /* end: all zeroes */ }, +}; + static struct intel_uncore_type spr_uncore_imc = { SPR_UNCORE_COMMON_FORMAT(), .name = "imc", @@ -6075,6 +6086,7 @@ static struct intel_uncore_type spr_uncore_imc = { .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, .ops = &spr_uncore_mmio_ops, + .event_descs = spr_uncore_imc_events, }; static void spr_uncore_pci_enable_event(struct intel_uncore_box *box,