tools/perf: Add mem_hops field in perf_mem_data_src structure

Going forward, future generation systems can have more hierarchy
within the node/package level but currently we don't have any data source
encoding field in perf, which can be used to represent this level of data.

Add a new field called 'mem_hops' in the perf_mem_data_src structure
which can be used to represent intra-node/package or inter-node/off-package
details. This field is of size 3 bits where PERF_MEM_HOPS_{NA, 0..6} value
can be used to present different hop levels data.

Also add corresponding macros to define mem_hop field values
and shift value.

Currently we define macro for HOPS_0 which corresponds
to data coming from another core but same node.

Add functionality to represent mem_hop field data in
perf_mem__lvl_scnprintf function with the help of added string
array called mem_hops.

For ex: Encodings for mem_hops fields with L2 cache:

L2                      - local L2
L2 | REMOTE | HOPS_0    - remote core, same node L2

Since with the addition of HOPS field, now remote can be used to
denote cache access from the same node but different core, a check
is added in the c2c_decode_stats function to set mrem only when HOPS
is zero along with set remote field.

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20211006140654.298352-4-kjain@linux.ibm.com
This commit is contained in:
Kajol Jain 2021-10-06 19:36:53 +05:30 committed by Peter Zijlstra
parent fec9cc6175
commit cae1d75906
2 changed files with 27 additions and 3 deletions

View file

@ -1210,14 +1210,16 @@ union perf_mem_data_src {
mem_remote:1, /* remote */ mem_remote:1, /* remote */
mem_snoopx:2, /* snoop mode, ext */ mem_snoopx:2, /* snoop mode, ext */
mem_blk:3, /* access blocked */ mem_blk:3, /* access blocked */
mem_rsvd:21; mem_hops:3, /* hop level */
mem_rsvd:18;
}; };
}; };
#elif defined(__BIG_ENDIAN_BITFIELD) #elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src { union perf_mem_data_src {
__u64 val; __u64 val;
struct { struct {
__u64 mem_rsvd:21, __u64 mem_rsvd:18,
mem_hops:3, /* hop level */
mem_blk:3, /* access blocked */ mem_blk:3, /* access blocked */
mem_snoopx:2, /* snoop mode, ext */ mem_snoopx:2, /* snoop mode, ext */
mem_remote:1, /* remote */ mem_remote:1, /* remote */
@ -1313,6 +1315,11 @@ union perf_mem_data_src {
#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ #define PERF_MEM_BLK_ADDR 0x04 /* address conflict */
#define PERF_MEM_BLK_SHIFT 40 #define PERF_MEM_BLK_SHIFT 40
/* hop level */
#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
/* 2-7 available */
#define PERF_MEM_HOPS_SHIFT 43
#define PERF_MEM_S(a, s) \ #define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)

View file

@ -301,6 +301,16 @@ static const char * const mem_lvlnum[] = {
[PERF_MEM_LVLNUM_NA] = "N/A", [PERF_MEM_LVLNUM_NA] = "N/A",
}; };
static const char * const mem_hops[] = {
"N/A",
/*
* While printing, 'Remote' will be added to represent
* 'Remote core, same node' accesses as remote field need
* to be set with mem_hops field.
*/
"core, same node",
};
int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
{ {
size_t i, l = 0; size_t i, l = 0;
@ -325,6 +335,9 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
l += 7; l += 7;
} }
if (mem_info && mem_info->data_src.mem_hops)
l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]);
printed = 0; printed = 0;
for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
if (!(m & 0x1)) if (!(m & 0x1))
@ -471,8 +484,12 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
/* /*
* Skylake might report unknown remote level via this * Skylake might report unknown remote level via this
* bit, consider it when evaluating remote HITMs. * bit, consider it when evaluating remote HITMs.
*
* Incase of power, remote field can also be used to denote cache
* accesses from the another core of same node. Hence, setting
* mrem only when HOPS is zero along with set remote field.
*/ */
bool mrem = data_src->mem_remote; bool mrem = (data_src->mem_remote && !data_src->mem_hops);
int err = 0; int err = 0;
#define HITM_INC(__f) \ #define HITM_INC(__f) \