perf vendor events intel: Update alderlake events/metrics

Update events to v21 including the new event SQ_MISC.BUS_LOCK and
improved comments. Metrics are updated to make TMA info metric names
synchronized. Events and metrics were generated by:
https://github.com/intel/perfmon/blob/main/scripts/create_perf_json.py

Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20230517173805.602113-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Ian Rogers 2023-05-17 10:37:50 -07:00 committed by Arnaldo Carvalho de Melo
parent 68d1241826
commit c04fcf7c8c
5 changed files with 1169 additions and 1210 deletions

File diff suppressed because it is too large Load diff

View file

@ -1017,6 +1017,15 @@
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts bus locks, accounts for cache line split locks and UC locks.",
"EventCode": "0x2c",
"EventName": "SQ_MISC.BUS_LOCK",
"PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
"BriefDescription": "Number of PREFETCHNTA instructions executed.",
"EventCode": "0x40",

View file

@ -93,19 +93,21 @@
"Unit": "cpu_core"
},
{
"BriefDescription": "MEMORY_ACTIVITY.STALLS_L2_MISS",
"BriefDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding.",
"CounterMask": "5",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS",
"PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x5",
"Unit": "cpu_core"
},
{
"BriefDescription": "MEMORY_ACTIVITY.STALLS_L3_MISS",
"BriefDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding.",
"CounterMask": "9",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS",
"PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x9",
"Unit": "cpu_core"

View file

@ -86,7 +86,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions.",
"MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / tma_info_slots",
"MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_alloc_restriction",
"MetricThreshold": "tma_alloc_restriction > 0.1",
@ -94,7 +94,7 @@
},
{
"BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls",
"MetricExpr": "TOPDOWN_BE_BOUND.ALL / tma_info_slots",
"MetricExpr": "TOPDOWN_BE_BOUND.ALL / tma_info_core_slots",
"MetricGroup": "TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.1",
@ -114,7 +114,7 @@
},
{
"BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear",
"MetricExpr": "(tma_info_slots - (TOPDOWN_FE_BOUND.ALL + TOPDOWN_BE_BOUND.ALL + TOPDOWN_RETIRING.ALL)) / tma_info_slots",
"MetricExpr": "(tma_info_core_slots - (TOPDOWN_FE_BOUND.ALL + TOPDOWN_BE_BOUND.ALL + TOPDOWN_RETIRING.ALL)) / tma_info_core_slots",
"MetricGroup": "TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
@ -124,7 +124,7 @@
},
{
"BriefDescription": "Counts the number of uops that are not from the microsequencer.",
"MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS) / tma_info_slots",
"MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS) / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_base",
"MetricThreshold": "tma_base > 0.6",
@ -133,7 +133,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend",
"MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / tma_info_slots",
"MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_detect",
"MetricThreshold": "tma_branch_detect > 0.05",
@ -142,7 +142,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts.",
"MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / tma_info_slots",
"MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.05",
@ -151,7 +151,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.",
"MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / tma_info_slots",
"MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_branch_resteer",
"MetricThreshold": "tma_branch_resteer > 0.05",
@ -159,7 +159,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).",
"MetricExpr": "TOPDOWN_FE_BOUND.CISC / tma_info_slots",
"MetricExpr": "TOPDOWN_FE_BOUND.CISC / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_cisc",
"MetricThreshold": "tma_cisc > 0.05",
@ -176,7 +176,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.",
"MetricExpr": "TOPDOWN_FE_BOUND.DECODE / tma_info_slots",
"MetricExpr": "TOPDOWN_FE_BOUND.DECODE / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_decode",
"MetricThreshold": "tma_decode > 0.05",
@ -193,7 +193,7 @@
{
"BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / tma_info_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_clks, 0) * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD",
"MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1",
@ -201,7 +201,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear classified as a fast nuke due to memory ordering, memory disambiguation and memory renaming.",
"MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / tma_info_slots",
"MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group",
"MetricName": "tma_fast_nuke",
"MetricThreshold": "tma_fast_nuke > 0.05",
@ -209,7 +209,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
"MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / tma_info_slots",
"MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_bandwidth",
"MetricThreshold": "tma_fetch_bandwidth > 0.1",
@ -218,7 +218,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
"MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / tma_info_slots",
"MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.15",
@ -235,7 +235,7 @@
},
{
"BriefDescription": "Counts the number of floating point divide operations per uop.",
"MetricExpr": "UOPS_RETIRED.FPDIV / tma_info_slots",
"MetricExpr": "UOPS_RETIRED.FPDIV / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_base_group",
"MetricName": "tma_fpdiv_uops",
"MetricThreshold": "tma_fpdiv_uops > 0.2",
@ -243,7 +243,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.",
"MetricExpr": "TOPDOWN_FE_BOUND.ALL / tma_info_slots",
"MetricExpr": "TOPDOWN_FE_BOUND.ALL / tma_info_core_slots",
"MetricGroup": "TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.2",
@ -252,218 +252,192 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.",
"MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / tma_info_slots",
"MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_icache_misses",
"MetricThreshold": "tma_icache_misses > 0.05",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Percentage of total non-speculative loads with a address aliasing block",
"MetricExpr": "100 * LD_BLOCKS.4K_ALIAS / MEM_UOPS_RETIRED.ALL_LOADS",
"MetricName": "tma_info_address_alias_blocks"
},
{
"BriefDescription": "Ratio of all branches which mispredict",
"MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": " ",
"MetricName": "tma_info_branch_mispredict_ratio"
},
{
"BriefDescription": "Ratio between Mispredicted branches and unknown branches",
"MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY",
"MetricGroup": " ",
"MetricName": "tma_info_branch_mispredict_to_unknown_branch_ratio"
},
{
"BriefDescription": "",
"MetricExpr": "CPU_CLK_UNHALTED.CORE",
"MetricGroup": " ",
"MetricName": "tma_info_clks"
"MetricName": "tma_info_core_clks"
},
{
"BriefDescription": "",
"MetricExpr": "CPU_CLK_UNHALTED.CORE_P",
"MetricGroup": " ",
"MetricName": "tma_info_clks_p"
"MetricName": "tma_info_core_clks_p"
},
{
"BriefDescription": "Cycles Per Instruction",
"MetricExpr": "tma_info_clks / INST_RETIRED.ANY",
"MetricGroup": " ",
"MetricName": "tma_info_cpi"
},
{
"BriefDescription": "Average CPU Utilization",
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
"MetricGroup": " ",
"MetricName": "tma_info_cpu_utilization"
},
{
"BriefDescription": "Cycle cost per DRAM hit",
"MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
"MetricGroup": " ",
"MetricName": "tma_info_cycles_per_demand_load_dram_hit"
},
{
"BriefDescription": "Cycle cost per L2 hit",
"MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_LOAD_UOPS_RETIRED.L2_HIT",
"MetricGroup": " ",
"MetricName": "tma_info_cycles_per_demand_load_l2_hit"
},
{
"BriefDescription": "Cycle cost per LLC hit",
"MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_LOAD_UOPS_RETIRED.L3_HIT",
"MetricGroup": " ",
"MetricName": "tma_info_cycles_per_demand_load_l3_hit"
},
{
"BriefDescription": "Percentage of all uops which are FPDiv uops",
"MetricExpr": "100 * UOPS_RETIRED.FPDIV / UOPS_RETIRED.ALL",
"MetricGroup": " ",
"MetricName": "tma_info_fpdiv_uop_ratio"
},
{
"BriefDescription": "Percentage of all uops which are IDiv uops",
"MetricExpr": "100 * UOPS_RETIRED.IDIV / UOPS_RETIRED.ALL",
"MetricGroup": " ",
"MetricName": "tma_info_idiv_uop_ratio"
},
{
"BriefDescription": "Percent of instruction miss cost that hit in DRAM",
"MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_DRAM_HIT / MEM_BOUND_STALLS.IFETCH",
"MetricGroup": " ",
"MetricName": "tma_info_inst_miss_cost_dramhit_percent"
},
{
"BriefDescription": "Percent of instruction miss cost that hit in the L2",
"MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_L2_HIT / MEM_BOUND_STALLS.IFETCH",
"MetricGroup": " ",
"MetricName": "tma_info_inst_miss_cost_l2hit_percent"
},
{
"BriefDescription": "Percent of instruction miss cost that hit in the L3",
"MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_LLC_HIT / MEM_BOUND_STALLS.IFETCH",
"MetricGroup": " ",
"MetricName": "tma_info_inst_miss_cost_l3hit_percent"
},
{
"BriefDescription": "Instructions per Branch (lower number means higher occurance rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricGroup": " ",
"MetricName": "tma_info_ipbranch"
"MetricExpr": "tma_info_core_clks / INST_RETIRED.ANY",
"MetricName": "tma_info_core_cpi"
},
{
"BriefDescription": "Instructions Per Cycle",
"MetricExpr": "INST_RETIRED.ANY / tma_info_clks",
"MetricGroup": " ",
"MetricName": "tma_info_ipc"
},
{
"BriefDescription": "Instruction per (near) call (lower number means higher occurance rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL",
"MetricGroup": " ",
"MetricName": "tma_info_ipcall"
},
{
"BriefDescription": "Instructions per Far Branch",
"MetricExpr": "INST_RETIRED.ANY / (BR_INST_RETIRED.FAR_BRANCH / 2)",
"MetricGroup": " ",
"MetricName": "tma_info_ipfarbranch"
},
{
"BriefDescription": "Instructions per Load",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
"MetricGroup": " ",
"MetricName": "tma_info_ipload"
},
{
"BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was not taken",
"MetricExpr": "INST_RETIRED.ANY / (BR_MISP_RETIRED.COND - BR_MISP_RETIRED.COND_TAKEN)",
"MetricName": "tma_info_ipmisp_cond_ntaken"
},
{
"BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was taken",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
"MetricName": "tma_info_ipmisp_cond_taken"
},
{
"BriefDescription": "Instructions per retired indirect call or jump Branch Misprediction",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT",
"MetricName": "tma_info_ipmisp_indirect"
},
{
"BriefDescription": "Instructions per retired return Branch Misprediction",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RETURN",
"MetricName": "tma_info_ipmisp_ret"
},
{
"BriefDescription": "Instructions per retired Branch Misprediction",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricGroup": " ",
"MetricName": "tma_info_ipmispredict"
},
{
"BriefDescription": "Instructions per Store",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
"MetricGroup": " ",
"MetricName": "tma_info_ipstore"
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
"MetricExpr": "cpu@CPU_CLK_UNHALTED.CORE@k / CPU_CLK_UNHALTED.CORE",
"MetricGroup": " ",
"MetricName": "tma_info_kernel_utilization"
},
{
"BriefDescription": "Percentage of total non-speculative loads that are splits",
"MetricExpr": "100 * MEM_UOPS_RETIRED.SPLIT_LOADS / MEM_UOPS_RETIRED.ALL_LOADS",
"MetricName": "tma_info_load_splits"
},
{
"BriefDescription": "load ops retired per 1000 instruction",
"MetricExpr": "1e3 * MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY",
"MetricGroup": " ",
"MetricName": "tma_info_memloadpki"
},
{
"BriefDescription": "Percentage of all uops which are ucode ops",
"MetricExpr": "100 * UOPS_RETIRED.MS / UOPS_RETIRED.ALL",
"MetricGroup": " ",
"MetricName": "tma_info_microcode_uop_ratio"
"MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks",
"MetricName": "tma_info_core_ipc"
},
{
"BriefDescription": "",
"MetricExpr": "5 * tma_info_clks",
"MetricGroup": " ",
"MetricName": "tma_info_slots"
},
{
"BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block",
"MetricExpr": "100 * LD_BLOCKS.DATA_UNKNOWN / MEM_UOPS_RETIRED.ALL_LOADS",
"MetricName": "tma_info_store_fwd_blocks"
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
"MetricExpr": "tma_info_clks / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": " ",
"MetricName": "tma_info_turbo_utilization"
"MetricExpr": "5 * tma_info_core_clks",
"MetricName": "tma_info_core_slots"
},
{
"BriefDescription": "Uops Per Instruction",
"MetricExpr": "UOPS_RETIRED.ALL / INST_RETIRED.ANY",
"MetricGroup": " ",
"MetricName": "tma_info_upi"
"MetricName": "tma_info_core_upi"
},
{
"BriefDescription": "Percent of instruction miss cost that hit in DRAM",
"MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_DRAM_HIT / MEM_BOUND_STALLS.IFETCH",
"MetricName": "tma_info_frontend_inst_miss_cost_dramhit_percent"
},
{
"BriefDescription": "Percent of instruction miss cost that hit in the L2",
"MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_L2_HIT / MEM_BOUND_STALLS.IFETCH",
"MetricName": "tma_info_frontend_inst_miss_cost_l2hit_percent"
},
{
"BriefDescription": "Percent of instruction miss cost that hit in the L3",
"MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_LLC_HIT / MEM_BOUND_STALLS.IFETCH",
"MetricName": "tma_info_frontend_inst_miss_cost_l3hit_percent"
},
{
"BriefDescription": "Ratio of all branches which mispredict",
"MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES",
"MetricName": "tma_info_inst_mix_branch_mispredict_ratio"
},
{
"BriefDescription": "Ratio between Mispredicted branches and unknown branches",
"MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY",
"MetricName": "tma_info_inst_mix_branch_mispredict_to_unknown_branch_ratio"
},
{
"BriefDescription": "Percentage of all uops which are FPDiv uops",
"MetricExpr": "100 * UOPS_RETIRED.FPDIV / UOPS_RETIRED.ALL",
"MetricName": "tma_info_inst_mix_fpdiv_uop_ratio"
},
{
"BriefDescription": "Percentage of all uops which are IDiv uops",
"MetricExpr": "100 * UOPS_RETIRED.IDIV / UOPS_RETIRED.ALL",
"MetricName": "tma_info_inst_mix_idiv_uop_ratio"
},
{
"BriefDescription": "Instructions per Branch (lower number means higher occurance rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
"MetricName": "tma_info_inst_mix_ipbranch"
},
{
"BriefDescription": "Instruction per (near) call (lower number means higher occurance rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL",
"MetricName": "tma_info_inst_mix_ipcall"
},
{
"BriefDescription": "Instructions per Far Branch",
"MetricExpr": "INST_RETIRED.ANY / (BR_INST_RETIRED.FAR_BRANCH / 2)",
"MetricName": "tma_info_inst_mix_ipfarbranch"
},
{
"BriefDescription": "Instructions per Load",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
"MetricName": "tma_info_inst_mix_ipload"
},
{
"BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was not taken",
"MetricExpr": "INST_RETIRED.ANY / (BR_MISP_RETIRED.COND - BR_MISP_RETIRED.COND_TAKEN)",
"MetricName": "tma_info_inst_mix_ipmisp_cond_ntaken"
},
{
"BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was taken",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN",
"MetricName": "tma_info_inst_mix_ipmisp_cond_taken"
},
{
"BriefDescription": "Instructions per retired indirect call or jump Branch Misprediction",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT",
"MetricName": "tma_info_inst_mix_ipmisp_indirect"
},
{
"BriefDescription": "Instructions per retired return Branch Misprediction",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RETURN",
"MetricName": "tma_info_inst_mix_ipmisp_ret"
},
{
"BriefDescription": "Instructions per retired Branch Misprediction",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
"MetricName": "tma_info_inst_mix_ipmispredict"
},
{
"BriefDescription": "Instructions per Store",
"MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
"MetricName": "tma_info_inst_mix_ipstore"
},
{
"BriefDescription": "Percentage of all uops which are ucode ops",
"MetricExpr": "100 * UOPS_RETIRED.MS / UOPS_RETIRED.ALL",
"MetricName": "tma_info_inst_mix_microcode_uop_ratio"
},
{
"BriefDescription": "Percentage of all uops which are x87 uops",
"MetricExpr": "100 * UOPS_RETIRED.X87 / UOPS_RETIRED.ALL",
"MetricGroup": " ",
"MetricName": "tma_info_x87_uop_ratio"
"MetricName": "tma_info_inst_mix_x87_uop_ratio"
},
{
"BriefDescription": "Percentage of total non-speculative loads with a address aliasing block",
"MetricExpr": "100 * LD_BLOCKS.4K_ALIAS / MEM_UOPS_RETIRED.ALL_LOADS",
"MetricName": "tma_info_l1_bound_address_alias_blocks"
},
{
"BriefDescription": "Percentage of total non-speculative loads that are splits",
"MetricExpr": "100 * MEM_UOPS_RETIRED.SPLIT_LOADS / MEM_UOPS_RETIRED.ALL_LOADS",
"MetricName": "tma_info_l1_bound_load_splits"
},
{
"BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block",
"MetricExpr": "100 * LD_BLOCKS.DATA_UNKNOWN / MEM_UOPS_RETIRED.ALL_LOADS",
"MetricName": "tma_info_l1_bound_store_fwd_blocks"
},
{
"BriefDescription": "Cycle cost per DRAM hit",
"MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
"MetricName": "tma_info_memory_cycles_per_demand_load_dram_hit"
},
{
"BriefDescription": "Cycle cost per L2 hit",
"MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_LOAD_UOPS_RETIRED.L2_HIT",
"MetricName": "tma_info_memory_cycles_per_demand_load_l2_hit"
},
{
"BriefDescription": "Cycle cost per LLC hit",
"MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_LOAD_UOPS_RETIRED.L3_HIT",
"MetricName": "tma_info_memory_cycles_per_demand_load_l3_hit"
},
{
"BriefDescription": "load ops retired per 1000 instruction",
"MetricExpr": "1e3 * MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY",
"MetricName": "tma_info_memory_memloadpki"
},
{
"BriefDescription": "Average CPU Utilization",
"MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
"MetricName": "tma_info_system_cpu_utilization"
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
"MetricExpr": "cpu@CPU_CLK_UNHALTED.CORE@k / CPU_CLK_UNHALTED.CORE",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_kernel_utilization"
},
{
"BriefDescription": "Average Frequency Utilization relative nominal frequency",
"MetricExpr": "tma_info_core_clks / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Power",
"MetricName": "tma_info_system_turbo_utilization"
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.",
"MetricExpr": "TOPDOWN_FE_BOUND.ITLB / tma_info_slots",
"MetricExpr": "TOPDOWN_FE_BOUND.ITLB / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group",
"MetricName": "tma_itlb_misses",
"MetricThreshold": "tma_itlb_misses > 0.05",
@ -471,7 +445,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a load block.",
"MetricExpr": "LD_HEAD.L1_BOUND_AT_RET / tma_info_clks",
"MetricExpr": "LD_HEAD.L1_BOUND_AT_RET / tma_info_core_clks",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l1_bound",
"MetricThreshold": "tma_l1_bound > 0.1",
@ -480,7 +454,7 @@
{
"BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / tma_info_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_clks, 0) * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD",
"MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l2_bound",
"MetricThreshold": "tma_l2_bound > 0.1",
@ -488,7 +462,7 @@
},
{
"BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
"MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_clks, 0) * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD",
"MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD",
"MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
"MetricThreshold": "tma_l3_bound > 0.1",
@ -504,7 +478,7 @@
},
{
"BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.",
"MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / tma_info_slots",
"MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group",
"MetricName": "tma_machine_clears",
"MetricThreshold": "tma_machine_clears > 0.05",
@ -513,7 +487,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.",
"MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / tma_info_slots",
"MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_mem_scheduler",
"MetricThreshold": "tma_mem_scheduler > 0.1",
@ -521,7 +495,7 @@
},
{
"BriefDescription": "Counts the number of cycles the core is stalled due to stores or loads.",
"MetricExpr": "min(tma_backend_bound, LD_HEAD.ANY_AT_RET / tma_info_clks + tma_store_bound)",
"MetricExpr": "min(tma_backend_bound, LD_HEAD.ANY_AT_RET / tma_info_core_clks + tma_store_bound)",
"MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2",
@ -538,7 +512,7 @@
},
{
"BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS)",
"MetricExpr": "UOPS_RETIRED.MS / tma_info_slots",
"MetricExpr": "UOPS_RETIRED.MS / tma_info_core_slots",
"MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_ms_uops",
"MetricThreshold": "tma_ms_uops > 0.05",
@ -548,7 +522,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.",
"MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / tma_info_slots",
"MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_non_mem_scheduler",
"MetricThreshold": "tma_non_mem_scheduler > 0.1",
@ -556,7 +530,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear (slow nuke).",
"MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / tma_info_slots",
"MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group",
"MetricName": "tma_nuke",
"MetricThreshold": "tma_nuke > 0.05",
@ -564,7 +538,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.",
"MetricExpr": "TOPDOWN_FE_BOUND.OTHER / tma_info_slots",
"MetricExpr": "TOPDOWN_FE_BOUND.OTHER / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_other_fb",
"MetricThreshold": "tma_other_fb > 0.05",
@ -572,7 +546,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a number of other load blocks.",
"MetricExpr": "LD_HEAD.OTHER_AT_RET / tma_info_clks",
"MetricExpr": "LD_HEAD.OTHER_AT_RET / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_other_l1",
"MetricThreshold": "tma_other_l1 > 0.05",
@ -588,7 +562,7 @@
},
{
"BriefDescription": "Counts the number of uops retired excluding ms and fp div uops.",
"MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS - UOPS_RETIRED.FPDIV) / tma_info_slots",
"MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS - UOPS_RETIRED.FPDIV) / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_base_group",
"MetricName": "tma_other_ret",
"MetricThreshold": "tma_other_ret > 0.3",
@ -604,7 +578,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.",
"MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / tma_info_slots",
"MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_predecode",
"MetricThreshold": "tma_predecode > 0.05",
@ -612,7 +586,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).",
"MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / tma_info_slots",
"MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_register",
"MetricThreshold": "tma_register > 0.1",
@ -620,7 +594,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls).",
"MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / tma_info_slots",
"MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_reorder_buffer",
"MetricThreshold": "tma_reorder_buffer > 0.1",
@ -638,7 +612,7 @@
},
{
"BriefDescription": "Counts the numer of issue slots that result in retirement slots.",
"MetricExpr": "TOPDOWN_RETIRING.ALL / tma_info_slots",
"MetricExpr": "TOPDOWN_RETIRING.ALL / tma_info_core_slots",
"MetricGroup": "TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.75",
@ -655,7 +629,7 @@
},
{
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).",
"MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / tma_info_slots",
"MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / tma_info_core_slots",
"MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group",
"MetricName": "tma_serialization",
"MetricThreshold": "tma_serialization > 0.1",
@ -679,7 +653,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a first level TLB miss.",
"MetricExpr": "LD_HEAD.DTLB_MISS_AT_RET / tma_info_clks",
"MetricExpr": "LD_HEAD.DTLB_MISS_AT_RET / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_stlb_hit",
"MetricThreshold": "tma_stlb_hit > 0.05",
@ -687,7 +661,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a second level TLB miss requiring a page walk.",
"MetricExpr": "LD_HEAD.PGWALK_AT_RET / tma_info_clks",
"MetricExpr": "LD_HEAD.PGWALK_AT_RET / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_stlb_miss",
"MetricThreshold": "tma_stlb_miss > 0.05",
@ -703,7 +677,7 @@
},
{
"BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.",
"MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_clks",
"MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
"MetricThreshold": "tma_store_fwd_blk > 0.05",

View file

@ -1,6 +1,6 @@
Family-model,Version,Filename,EventType
GenuineIntel-6-(97|9A|B7|BA|BF),v1.20,alderlake,core
GenuineIntel-6-BE,v1.20,alderlaken,core
GenuineIntel-6-(97|9A|B7|BA|BF),v1.21,alderlake,core
GenuineIntel-6-BE,v1.21,alderlaken,core
GenuineIntel-6-(1C|26|27|35|36),v4,bonnell,core
GenuineIntel-6-(3D|47),v27,broadwell,core
GenuineIntel-6-56,v9,broadwellde,core

1 Family-model Version Filename EventType
2 GenuineIntel-6-(97|9A|B7|BA|BF) v1.20 v1.21 alderlake core
3 GenuineIntel-6-BE v1.20 v1.21 alderlaken core
4 GenuineIntel-6-(1C|26|27|35|36) v4 bonnell core
5 GenuineIntel-6-(3D|47) v27 broadwell core
6 GenuineIntel-6-56 v9 broadwellde core