perf tools changes for v6.0: 2nd batch

- 'perf c2c' now supports ARM64, adjust its output to cope with differences with
   what is in x86_64. Now go find false sharing on ARM64 (at least Neoverse) as well!
 
 - Refactor the JSON processing, making the output more compact and thus reducing the
   size of the resulting perf binary.
 
 - Improvements for 'perf offcpu' profiling, including tracking child processes.
 
 - Update Intel JSON metrics and events files for broadwellde, broadwellx,
   cascadelakex, haswellx, icelakex, ivytown, jaketown, knightslanding,
   sapphirerapids, skylakex and snowridgex.
 
 - Add 'perf stat' JSON output and a 'perf test' entry for it.
 
 - Ignore memfd and anonymous mmap events if jitdump present.
 
 - Refactor 'perf test' shell tests allowing subdirs.
 
 - Fix an error handling path in 'parse_perf_probe_command()'
 
 - Fixes for the guest Intel PT tracing patchkit in the 1st batch of this merge window.
 
 - Print debuginfod queries if -v option is used, to explain delays in processing when
   debuginfo servers are enabled to fetch DSOs with richer symbol tables.
 
 - Improve error message for 'perf record -p not_existing_pid'
 
 - Fix openssl and libbpf feature detection.
 
 - Add PMU pai_crypto event description for IBM z16 on 'perf list'.
 
 - Fix typos and duplicated words on comments in various places.
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCYvg1aQAKCRCyPKLppCJ+
 J12hAQCCBXh+t7ab2PyXqDDwr/1aFc94nXA2A5wQ9N4MzD66YgEA1O34rxBiyetU
 c1pStjrOqR6HGlXh+jAI4zTnK6w1wAs=
 =m+oE
 -----END PGP SIGNATURE-----

Merge tag 'perf-tools-fixes-for-v6.0-2022-08-13' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull more perf tool updates from Arnaldo Carvalho de Melo:

 - 'perf c2c' now supports ARM64, adjust its output to cope with
   differences with what is in x86_64. Now go find false sharing on
   ARM64 (at least Neoverse) as well!

 - Refactor the JSON processing, making the output more compact and thus
   reducing the size of the resulting perf binary

 - Improvements for 'perf offcpu' profiling, including tracking child
   processes

 - Update Intel JSON metrics and events files for broadwellde,
   broadwellx, cascadelakex, haswellx, icelakex, ivytown, jaketown,
   knightslanding, sapphirerapids, skylakex and snowridgex

 - Add 'perf stat' JSON output and a 'perf test' entry for it

 - Ignore memfd and anonymous mmap events if jitdump present

 - Refactor 'perf test' shell tests allowing subdirs

 - Fix an error handling path in 'parse_perf_probe_command()'

 - Fixes for the guest Intel PT tracing patchkit in the 1st batch of
   this merge window

 - Print debuginfod queries if -v option is used, to explain delays in
   processing when debuginfo servers are enabled to fetch DSOs with
   richer symbol tables

 - Improve error message for 'perf record -p not_existing_pid'

 - Fix openssl and libbpf feature detection

 - Add PMU pai_crypto event description for IBM z16 on 'perf list'

 - Fix typos and duplicated words on comments in various places

* tag 'perf-tools-fixes-for-v6.0-2022-08-13' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (81 commits)
  perf test: Refactor shell tests allowing subdirs
  perf vendor events: Update events for snowridgex
  perf vendor events: Update events and metrics for skylakex
  perf vendor events: Update metrics for sapphirerapids
  perf vendor events: Update events for knightslanding
  perf vendor events: Update metrics for jaketown
  perf vendor events: Update metrics for ivytown
  perf vendor events: Update events and metrics for icelakex
  perf vendor events: Update events and metrics for haswellx
  perf vendor events: Update events and metrics for cascadelakex
  perf vendor events: Update events and metrics for broadwellx
  perf vendor events: Update metrics for broadwellde
  perf jevents: Fold strings optimization
  perf jevents: Compress the pmu_events_table
  perf metrics: Copy entire pmu_event in find metric
  perf pmu-events: Hide the pmu_events
  perf pmu-events: Don't assume pmu_event is an array
  perf pmu-events: Move test events/metrics to JSON
  perf test: Use full metric resolution
  perf pmu-events: Hide pmu_events_map
  ...
This commit is contained in:
Linus Torvalds 2022-08-14 09:22:11 -07:00
commit 96f86ff083
112 changed files with 95927 additions and 7120 deletions

View File

@ -93,9 +93,11 @@ INSTALL ?= install
RM ?= rm -f
FEATURE_USER = .bpftool
FEATURE_TESTS = libbfd disassembler-four-args disassembler-init-styled libcap \
FEATURE_TESTS = libbfd libbfd-liberty libbfd-liberty-z \
disassembler-four-args disassembler-init-styled libcap \
clang-bpf-co-re
FEATURE_DISPLAY = libbfd libcap clang-bpf-co-re
FEATURE_DISPLAY = libbfd libbfd-liberty libbfd-liberty-z \
libcap clang-bpf-co-re
check_feat := 1
NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall

View File

@ -90,6 +90,8 @@ all: $(FILES)
__BUILD = $(CC) $(CFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
BUILD_BFD = $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
BUILD_ALL = $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd -lcap
__BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1
@ -97,7 +99,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(
###############################
$(OUTPUT)test-all.bin:
$(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd -lcap
$(BUILD_ALL) || $(BUILD_ALL) -lopcodes -liberty
$(OUTPUT)test-hello.bin:
$(BUILD)
@ -241,16 +243,18 @@ $(OUTPUT)test-libpython.bin:
$(BUILD) $(FLAGS_PYTHON_EMBED)
$(OUTPUT)test-libbfd.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
$(BUILD_BFD)
$(OUTPUT)test-libbfd-buildid.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
$(BUILD_BFD) || $(BUILD_BFD) -liberty || $(BUILD_BFD) -liberty -lz
$(OUTPUT)test-disassembler-four-args.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes
$(BUILD_BFD) -lopcodes || $(BUILD_BFD) -lopcodes -liberty || \
$(BUILD_BFD) -lopcodes -liberty -lz
$(OUTPUT)test-disassembler-init-styled.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes
$(BUILD_BFD) -lopcodes || $(BUILD_BFD) -lopcodes -liberty || \
$(BUILD_BFD) -lopcodes -liberty -lz
$(OUTPUT)test-reallocarray.bin:
$(BUILD)

View File

@ -1,22 +1,23 @@
// SPDX-License-Identifier: GPL-2.0
#include <openssl/evp.h>
#include <openssl/sha.h>
#include <openssl/md5.h>
/*
* The MD5_* API have been deprecated since OpenSSL 3.0, which causes the
* feature test to fail silently. This is a workaround.
*/
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
int main(void)
{
MD5_CTX context;
EVP_MD_CTX *mdctx;
unsigned char md[MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH];
unsigned char dat[] = "12345";
unsigned int digest_len;
MD5_Init(&context);
MD5_Update(&context, &dat[0], sizeof(dat));
MD5_Final(&md[0], &context);
mdctx = EVP_MD_CTX_new();
if (!mdctx)
return 0;
EVP_DigestInit_ex(mdctx, EVP_md5(), NULL);
EVP_DigestUpdate(mdctx, &dat[0], sizeof(dat));
EVP_DigestFinal_ex(mdctx, &md[0], &digest_len);
EVP_MD_CTX_free(mdctx);
SHA1(&dat[0], sizeof(dat), &md[0]);

View File

@ -1310,7 +1310,7 @@ union perf_mem_data_src {
#define PERF_MEM_SNOOP_SHIFT 19
#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
/* 1 free */
#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */
#define PERF_MEM_SNOOPX_SHIFT 38
/* locked instruction */

View File

@ -0,0 +1,16 @@
include::guestmount.txt[]
--guestkallsyms=<path>::
Guest OS /proc/kallsyms file copy. perf reads it to get guest
kernel symbols. Users copy it out from guest OS.
--guestmodules=<path>::
Guest OS /proc/modules file copy. perf reads it to get guest
kernel module information. Users copy it out from guest OS.
--guestvmlinux=<path>::
Guest OS kernel vmlinux.
--guest-code::
Indicate that guest code can be found in the hypervisor process,
which is a common case for KVM test programs.

View File

@ -0,0 +1,11 @@
--guestmount=<path>::
Guest OS root file system mount directory. Users mount guest OS
root directories under <path> by a specific filesystem access method,
typically, sshfs.
For example, start 2 guest OS, one's pid is 8888 and the other's is 9999:
[verse]
$ mkdir \~/guestmount
$ cd \~/guestmount
$ sshfs -o allow_other,direct_io -p 5551 localhost:/ 8888/
$ sshfs -o allow_other,direct_io -p 5552 localhost:/ 9999/
$ perf {GMEXAMPLECMD} --guestmount=~/guestmount {GMEXAMPLESUBCMD}

View File

@ -109,7 +109,9 @@ REPORT OPTIONS
-d::
--display::
Switch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
Switch to HITM type (rmt, lcl) or peer snooping type (peer) to display
and sort on. Total HITMs (tot) as default, except Arm64 uses peer mode
as default.
--stitch-lbr::
Show callgraph with stitched LBRs, which may have more complete
@ -174,12 +176,18 @@ For each cacheline in the 1) list we display following data:
Cacheline
- cacheline address (hex number)
Rmt/Lcl Hitm
Rmt/Lcl Hitm (Display with HITM types)
- cacheline percentage of all Remote/Local HITM accesses
LLC Load Hitm - Total, LclHitm, RmtHitm
Peer Snoop (Display with peer type)
- cacheline percentage of all peer accesses
LLC Load Hitm - Total, LclHitm, RmtHitm (For display with HITM types)
- count of Total/Local/Remote load HITMs
Load Peer - Total, Local, Remote (For display with peer type)
- count of Total/Local/Remote load from peer cache or DRAM
Total records
- sum of all cachelines accesses
@ -201,16 +209,21 @@ For each cacheline in the 1) list we display following data:
- count of LLC load accesses, includes LLC hits and LLC HITMs
RMT Load Hit - RmtHit, RmtHitm
- count of remote load accesses, includes remote hits and remote HITMs
- count of remote load accesses, includes remote hits and remote HITMs;
on Arm neoverse cores, RmtHit is used to account remote accesses,
includes remote DRAM or any upward cache level in remote node
Load Dram - Lcl, Rmt
- count of local and remote DRAM accesses
For each offset in the 2) list we display following data:
HITM - Rmt, Lcl
HITM - Rmt, Lcl (Display with HITM types)
- % of Remote/Local HITM accesses for given offset within cacheline
Peer Snoop - Rmt, Lcl (Display with peer type)
- % of Remote/Local peer accesses for given offset within cacheline
Store Refs - L1 Hit, L1 Miss, N/A
- % of store accesses that hit L1, missed L1 and N/A (no available) memory
level for given offset within cacheline
@ -227,9 +240,12 @@ For each offset in the 2) list we display following data:
Code address
- code address responsible for the accesses
cycles - rmt hitm, lcl hitm, load
cycles - rmt hitm, lcl hitm, load (Display with HITM types)
- sum of cycles for given accesses - Remote/Local HITM and generic load
cycles - rmt peer, lcl peer, load (Display with peer type)
- sum of cycles for given accesses - Remote/Local peer load and generic load
cpu cnt
- number of cpus that participated on the access
@ -251,7 +267,8 @@ The 'Node' field displays nodes that accesses given cacheline
offset. Its output comes in 3 flavors:
- node IDs separated by ','
- node IDs with stats for each ID, in following format:
Node{cpus %hitms %stores}
Node{cpus %hitms %stores} (Display with HITM types)
Node{cpus %peers %stores} (Display with peer type)
- node IDs with list of affected CPUs in following format:
Node{cpu list}

View File

@ -102,6 +102,10 @@ include::itrace.txt[]
should be used, and also --buildid-all and --switch-events may be
useful.
:GMEXAMPLECMD: inject
:GMEXAMPLESUBCMD:
include::guestmount.txt[]
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1],

View File

@ -77,26 +77,11 @@ OPTIONS
Collect host side performance profile.
--guest::
Collect guest side performance profile.
--guestmount=<path>::
Guest os root file system mount directory. Users mounts guest os
root directories under <path> by a specific filesystem access method,
typically, sshfs. For example, start 2 guest os. The one's pid is 8888
and the other's is 9999.
#mkdir ~/guestmount; cd ~/guestmount
#sshfs -o allow_other,direct_io -p 5551 localhost:/ 8888/
#sshfs -o allow_other,direct_io -p 5552 localhost:/ 9999/
#perf kvm --host --guest --guestmount=~/guestmount top
--guestkallsyms=<path>::
Guest os /proc/kallsyms file copy. 'perf' kvm' reads it to get guest
kernel symbols. Users copy it out from guest os.
--guestmodules=<path>::
Guest os /proc/modules file copy. 'perf' kvm' reads it to get guest
kernel module information. Users copy it out from guest os.
--guestvmlinux=<path>::
Guest os kernel vmlinux.
--guest-code::
Indicate that guest code can be found in the hypervisor process,
which is a common case for KVM test programs.
:GMEXAMPLECMD: kvm --host --guest
:GMEXAMPLESUBCMD: top
include::guest-files.txt[]
-v::
--verbose::
Be more verbose (show counter open errors, etc).

View File

@ -228,7 +228,7 @@ OPTIONS
Instruction Trace decoding.
The machine_pid and vcpu fields are derived from data resulting from using
perf insert to insert a perf.data file recorded inside a virtual machine into
perf inject to insert a perf.data file recorded inside a virtual machine into
a perf.data file recorded on the host at the same time.
Finally, a user may not set fields to none for all event types.
@ -507,9 +507,9 @@ include::itrace.txt[]
The known limitations include exception handing such as
setjmp/longjmp will have calls/returns not match.
--guest-code::
Indicate that guest code can be found in the hypervisor process,
which is a common case for KVM test programs.
:GMEXAMPLECMD: script
:GMEXAMPLESUBCMD:
include::guest-files.txt[]
SEE ALSO
--------

View File

@ -570,6 +570,27 @@ Additional metrics may be printed with all earlier fields being empty.
include::intel-hybrid.txt[]
JSON FORMAT
-----------
With -j, perf stat is able to print out a JSON format output
that can be used for parsing.
- timestamp : optional usec time stamp in fractions of second (with -I)
- optional aggregate options:
- core : core identifier (with --per-core)
- die : die identifier (with --per-die)
- socket : socket identifier (with --per-socket)
- node : node identifier (with --per-node)
- thread : thread identifier (with --per-thread)
- counter-value : counter value
- unit : unit of the counter value or empty
- event : event name
- variance : optional variance if multiple values are collected (with -r)
- runtime : run time of counter
- metric-value : optional metric value
- metric-unit : optional unit of metric
SEE ALSO
--------
linkperf:perf-top[1], linkperf:perf-list[1]

View File

@ -297,9 +297,6 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
FEATURE_CHECK_LDFLAGS-libaio = -lrt
FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl
FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl
CORE_CFLAGS += -fno-omit-frame-pointer
CORE_CFLAGS += -ggdb3
CORE_CFLAGS += -funwind-tables
@ -329,8 +326,8 @@ ifneq ($(TCMALLOC),)
endif
ifeq ($(FEATURES_DUMP),)
# We will display at the end of this Makefile.config, using $(call feature_display_entries)
# As we may retry some feature detection here, see the disassembler-four-args case, for instance
# We will display at the end of this Makefile.config, using $(call feature_display_entries),
# as we may retry some feature detection here.
FEATURE_DISPLAY_DEFERRED := 1
include $(srctree)/tools/build/Makefile.feature
else
@ -924,13 +921,9 @@ ifndef NO_LIBBFD
ifeq ($(feature-libbfd-liberty), 1)
EXTLIBS += -lbfd -lopcodes -liberty
FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl
FEATURE_CHECK_LDFLAGS-disassembler-init-styled += -liberty -ldl
else
ifeq ($(feature-libbfd-liberty-z), 1)
EXTLIBS += -lbfd -lopcodes -liberty -lz
FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl
FEATURE_CHECK_LDFLAGS-disassembler-init-styled += -liberty -lz -ldl
endif
endif
$(call feature_check,disassembler-four-args)
@ -1356,7 +1349,7 @@ endif
# re-generate FEATURE-DUMP as we may have called feature_check, found out
# extra libraries to add to LDFLAGS of some other test and then redo those
# tests, see the block about libbfd, disassembler-four-args, for instance.
# tests.
$(shell rm -f $(FEATURE_DUMP_FILENAME))
$(foreach feat,$(FEATURE_TESTS),$(shell echo "$(call feature_assign,$(feat))" >> $(FEATURE_DUMP_FILENAME)))

View File

@ -1005,7 +1005,8 @@ install-tests: all install-gtk
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \
$(INSTALL) tests/shell/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \
$(INSTALL) tests/shell/lib/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'
$(INSTALL) tests/shell/lib/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \
$(INSTALL) tests/shell/lib/*.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'
install-bin: install-tools install-tests install-traceevent-plugins

View File

@ -438,7 +438,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
if (opts->full_auxtrace) {
struct evsel *tracking_evsel;
err = parse_events(evlist, "dummy:u", NULL);
err = parse_event(evlist, "dummy:u");
if (err)
goto out;

View File

@ -257,7 +257,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
evsel__set_sample_bit(arm_spe_evsel, PHYS_ADDR);
/* Add dummy event to keep tracking */
err = parse_events(evlist, "dummy:u", NULL);
err = parse_event(evlist, "dummy:u");
if (err)
return err;

View File

@ -3,7 +3,7 @@
#include "../../../util/cpumap.h"
#include "../../../util/pmu.h"
const struct pmu_events_map *pmu_events_map__find(void)
const struct pmu_events_table *pmu_events_table__find(void)
{
struct perf_pmu *pmu = NULL;
@ -18,7 +18,7 @@ const struct pmu_events_map *pmu_events_map__find(void)
if (pmu->cpus->nr != cpu__max_cpu().cpu)
return NULL;
return perf_pmu__find_map(pmu);
return perf_pmu__find_table(pmu);
}
return NULL;

View File

@ -56,7 +56,7 @@ int test__intel_cqm_count_nmi_context(struct test_suite *test __maybe_unused, in
return TEST_FAIL;
}
ret = parse_events(evlist, "intel_cqm/llc_occupancy/", NULL);
ret = parse_event(evlist, "intel_cqm/llc_occupancy/");
if (ret) {
pr_debug("parse_events failed, is \"intel_cqm/llc_occupancy/\" available?\n");
err = TEST_SKIP;

View File

@ -233,7 +233,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
struct evsel *tracking_evsel;
int err;
err = parse_events(evlist, "dummy:u", NULL);
err = parse_event(evlist, "dummy:u");
if (err)
return err;

View File

@ -426,7 +426,7 @@ static int intel_pt_track_switches(struct evlist *evlist)
if (!evlist__can_select_event(evlist, sched_switch))
return -EPERM;
err = parse_events(evlist, sched_switch, NULL);
err = parse_event(evlist, sched_switch);
if (err) {
pr_debug2("%s: failed to parse %s, error %d\n",
__func__, sched_switch, err);

View File

@ -316,7 +316,7 @@ static int iostat_event_group(struct evlist *evl,
sprintf(iostat_cmd, iostat_cmd_template,
list->rps[idx]->pmu_idx, list->rps[idx]->pmu_idx,
list->rps[idx]->pmu_idx, list->rps[idx]->pmu_idx);
ret = parse_events(evl, iostat_cmd, NULL);
ret = parse_event(evl, iostat_cmd);
if (ret)
goto err;
}

View File

@ -122,5 +122,5 @@ int topdown_parse_events(struct evlist *evlist)
topdown_events = TOPDOWN_L1_EVENTS;
}
return parse_events(evlist, topdown_events, NULL);
return parse_event(evlist, topdown_events);
}

View File

@ -55,6 +55,8 @@ struct c2c_hists {
struct compute_stats {
struct stats lcl_hitm;
struct stats rmt_hitm;
struct stats lcl_peer;
struct stats rmt_peer;
struct stats load;
};
@ -113,16 +115,18 @@ struct perf_c2c {
};
enum {
DISPLAY_LCL,
DISPLAY_RMT,
DISPLAY_TOT,
DISPLAY_LCL_HITM,
DISPLAY_RMT_HITM,
DISPLAY_TOT_HITM,
DISPLAY_SNP_PEER,
DISPLAY_MAX,
};
static const char *display_str[DISPLAY_MAX] = {
[DISPLAY_LCL] = "Local",
[DISPLAY_RMT] = "Remote",
[DISPLAY_TOT] = "Total",
[DISPLAY_LCL_HITM] = "Local HITMs",
[DISPLAY_RMT_HITM] = "Remote HITMs",
[DISPLAY_TOT_HITM] = "Total HITMs",
[DISPLAY_SNP_PEER] = "Peer Snoop",
};
static const struct option c2c_options[] = {
@ -154,6 +158,8 @@ static void *c2c_he_zalloc(size_t size)
init_stats(&c2c_he->cstats.lcl_hitm);
init_stats(&c2c_he->cstats.rmt_hitm);
init_stats(&c2c_he->cstats.lcl_peer);
init_stats(&c2c_he->cstats.rmt_peer);
init_stats(&c2c_he->cstats.load);
return &c2c_he->he;
@ -253,6 +259,10 @@ static void compute_stats(struct c2c_hist_entry *c2c_he,
update_stats(&cstats->rmt_hitm, weight);
else if (stats->lcl_hitm)
update_stats(&cstats->lcl_hitm, weight);
else if (stats->rmt_peer)
update_stats(&cstats->rmt_peer, weight);
else if (stats->lcl_peer)
update_stats(&cstats->lcl_peer, weight);
else if (stats->load)
update_stats(&cstats->load, weight);
}
@ -650,6 +660,9 @@ __f ## _cmp(struct perf_hpp_fmt *fmt __maybe_unused, \
STAT_FN(rmt_hitm)
STAT_FN(lcl_hitm)
STAT_FN(rmt_peer)
STAT_FN(lcl_peer)
STAT_FN(tot_peer)
STAT_FN(store)
STAT_FN(st_l1hit)
STAT_FN(st_l1miss)
@ -787,7 +800,7 @@ percent_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, per);
}
static double percent_hitm(struct c2c_hist_entry *c2c_he)
static double percent_costly_snoop(struct c2c_hist_entry *c2c_he)
{
struct c2c_hists *hists;
struct c2c_stats *stats;
@ -800,17 +813,22 @@ static double percent_hitm(struct c2c_hist_entry *c2c_he)
total = &hists->stats;
switch (c2c.display) {
case DISPLAY_RMT:
case DISPLAY_RMT_HITM:
st = stats->rmt_hitm;
tot = total->rmt_hitm;
break;
case DISPLAY_LCL:
case DISPLAY_LCL_HITM:
st = stats->lcl_hitm;
tot = total->lcl_hitm;
break;
case DISPLAY_TOT:
case DISPLAY_TOT_HITM:
st = stats->tot_hitm;
tot = total->tot_hitm;
break;
case DISPLAY_SNP_PEER:
st = stats->tot_peer;
tot = total->tot_peer;
break;
default:
break;
}
@ -827,8 +845,8 @@ static double percent_hitm(struct c2c_hist_entry *c2c_he)
})
static int
percent_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he)
percent_costly_snoop_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he)
{
struct c2c_hist_entry *c2c_he;
int width = c2c_width(fmt, hpp, he->hists);
@ -836,20 +854,20 @@ percent_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
double per;
c2c_he = container_of(he, struct c2c_hist_entry, he);
per = percent_hitm(c2c_he);
per = percent_costly_snoop(c2c_he);
return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
}
static int
percent_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he)
percent_costly_snoop_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he)
{
return percent_color(fmt, hpp, he, percent_hitm);
return percent_color(fmt, hpp, he, percent_costly_snoop);
}
static int64_t
percent_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *left, struct hist_entry *right)
percent_costly_snoop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *left, struct hist_entry *right)
{
struct c2c_hist_entry *c2c_left;
struct c2c_hist_entry *c2c_right;
@ -859,8 +877,8 @@ percent_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
c2c_left = container_of(left, struct c2c_hist_entry, he);
c2c_right = container_of(right, struct c2c_hist_entry, he);
per_left = percent_hitm(c2c_left);
per_right = percent_hitm(c2c_right);
per_left = percent_costly_snoop(c2c_left);
per_right = percent_costly_snoop(c2c_right);
return per_left - per_right;
}
@ -899,6 +917,8 @@ static double percent_ ## __f(struct c2c_hist_entry *c2c_he) \
PERCENT_FN(rmt_hitm)
PERCENT_FN(lcl_hitm)
PERCENT_FN(rmt_peer)
PERCENT_FN(lcl_peer)
PERCENT_FN(st_l1hit)
PERCENT_FN(st_l1miss)
PERCENT_FN(st_na)
@ -965,6 +985,68 @@ percent_lcl_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
return per_left - per_right;
}
static int
percent_lcl_peer_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he)
{
int width = c2c_width(fmt, hpp, he->hists);
double per = PERCENT(he, lcl_peer);
char buf[10];
return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
}
static int
percent_lcl_peer_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he)
{
return percent_color(fmt, hpp, he, percent_lcl_peer);
}
static int64_t
percent_lcl_peer_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *left, struct hist_entry *right)
{
double per_left;
double per_right;
per_left = PERCENT(left, lcl_peer);
per_right = PERCENT(right, lcl_peer);
return per_left - per_right;
}
static int
percent_rmt_peer_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he)
{
int width = c2c_width(fmt, hpp, he->hists);
double per = PERCENT(he, rmt_peer);
char buf[10];
return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
}
static int
percent_rmt_peer_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he)
{
return percent_color(fmt, hpp, he, percent_rmt_peer);
}
static int64_t
percent_rmt_peer_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *left, struct hist_entry *right)
{
double per_left;
double per_right;
per_left = PERCENT(left, rmt_peer);
per_right = PERCENT(right, rmt_peer);
return per_left - per_right;
}
static int
percent_stores_l1hit_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he)
@ -1142,18 +1224,22 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
advance_hpp(hpp, ret);
switch (c2c.display) {
case DISPLAY_RMT:
case DISPLAY_RMT_HITM:
ret = display_metrics(hpp, stats->rmt_hitm,
c2c_he->stats.rmt_hitm);
break;
case DISPLAY_LCL:
case DISPLAY_LCL_HITM:
ret = display_metrics(hpp, stats->lcl_hitm,
c2c_he->stats.lcl_hitm);
break;
case DISPLAY_TOT:
case DISPLAY_TOT_HITM:
ret = display_metrics(hpp, stats->tot_hitm,
c2c_he->stats.tot_hitm);
break;
case DISPLAY_SNP_PEER:
ret = display_metrics(hpp, stats->tot_peer,
c2c_he->stats.tot_peer);
break;
default:
break;
}
@ -1213,6 +1299,8 @@ __func(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) \
MEAN_ENTRY(mean_rmt_entry, rmt_hitm);
MEAN_ENTRY(mean_lcl_entry, lcl_hitm);
MEAN_ENTRY(mean_load_entry, load);
MEAN_ENTRY(mean_rmt_peer_entry, rmt_peer);
MEAN_ENTRY(mean_lcl_peer_entry, lcl_peer);
static int
cpucnt_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
@ -1360,6 +1448,30 @@ static struct c2c_dimension dim_rmt_hitm = {
.width = 7,
};
static struct c2c_dimension dim_tot_peer = {
.header = HEADER_SPAN("------- Load Peer -------", "Total", 2),
.name = "tot_peer",
.cmp = tot_peer_cmp,
.entry = tot_peer_entry,
.width = 7,
};
static struct c2c_dimension dim_lcl_peer = {
.header = HEADER_SPAN_LOW("Local"),
.name = "lcl_peer",
.cmp = lcl_peer_cmp,
.entry = lcl_peer_entry,
.width = 7,
};
static struct c2c_dimension dim_rmt_peer = {
.header = HEADER_SPAN_LOW("Remote"),
.name = "rmt_peer",
.cmp = rmt_peer_cmp,
.entry = rmt_peer_entry,
.width = 7,
};
static struct c2c_dimension dim_cl_rmt_hitm = {
.header = HEADER_SPAN("----- HITM -----", "Rmt", 1),
.name = "cl_rmt_hitm",
@ -1376,6 +1488,22 @@ static struct c2c_dimension dim_cl_lcl_hitm = {
.width = 7,
};
static struct c2c_dimension dim_cl_rmt_peer = {
.header = HEADER_SPAN("----- Peer -----", "Rmt", 1),
.name = "cl_rmt_peer",
.cmp = rmt_peer_cmp,
.entry = rmt_peer_entry,
.width = 7,
};
static struct c2c_dimension dim_cl_lcl_peer = {
.header = HEADER_SPAN_LOW("Lcl"),
.name = "cl_lcl_peer",
.cmp = lcl_peer_cmp,
.entry = lcl_peer_entry,
.width = 7,
};
static struct c2c_dimension dim_tot_stores = {
.header = HEADER_BOTH("Total", "Stores"),
.name = "tot_stores",
@ -1488,17 +1616,18 @@ static struct c2c_dimension dim_tot_loads = {
.width = 7,
};
static struct c2c_header percent_hitm_header[] = {
[DISPLAY_LCL] = HEADER_BOTH("Lcl", "Hitm"),
[DISPLAY_RMT] = HEADER_BOTH("Rmt", "Hitm"),
[DISPLAY_TOT] = HEADER_BOTH("Tot", "Hitm"),
static struct c2c_header percent_costly_snoop_header[] = {
[DISPLAY_LCL_HITM] = HEADER_BOTH("Lcl", "Hitm"),
[DISPLAY_RMT_HITM] = HEADER_BOTH("Rmt", "Hitm"),
[DISPLAY_TOT_HITM] = HEADER_BOTH("Tot", "Hitm"),
[DISPLAY_SNP_PEER] = HEADER_BOTH("Peer", "Snoop"),
};
static struct c2c_dimension dim_percent_hitm = {
.name = "percent_hitm",
.cmp = percent_hitm_cmp,
.entry = percent_hitm_entry,
.color = percent_hitm_color,
static struct c2c_dimension dim_percent_costly_snoop = {
.name = "percent_costly_snoop",
.cmp = percent_costly_snoop_cmp,
.entry = percent_costly_snoop_entry,
.color = percent_costly_snoop_color,
.width = 7,
};
@ -1520,6 +1649,24 @@ static struct c2c_dimension dim_percent_lcl_hitm = {
.width = 7,
};
static struct c2c_dimension dim_percent_rmt_peer = {
.header = HEADER_SPAN("-- Peer Snoop --", "Rmt", 1),
.name = "percent_rmt_peer",
.cmp = percent_rmt_peer_cmp,
.entry = percent_rmt_peer_entry,
.color = percent_rmt_peer_color,
.width = 7,
};
static struct c2c_dimension dim_percent_lcl_peer = {
.header = HEADER_SPAN_LOW("Lcl"),
.name = "percent_lcl_peer",
.cmp = percent_lcl_peer_cmp,
.entry = percent_lcl_peer_entry,
.color = percent_lcl_peer_color,
.width = 7,
};
static struct c2c_dimension dim_percent_stores_l1hit = {
.header = HEADER_SPAN("------- Store Refs ------", "L1 Hit", 2),
.name = "percent_stores_l1hit",
@ -1588,12 +1735,6 @@ static struct c2c_dimension dim_dso = {
.se = &sort_dso,
};
static struct c2c_header header_node[3] = {
HEADER_LOW("Node"),
HEADER_LOW("Node{cpus %hitms %stores}"),
HEADER_LOW("Node{cpu list}"),
};
static struct c2c_dimension dim_node = {
.name = "node",
.cmp = empty_cmp,
@ -1625,6 +1766,22 @@ static struct c2c_dimension dim_mean_load = {
.width = 8,
};
static struct c2c_dimension dim_mean_rmt_peer = {
.header = HEADER_SPAN("---------- cycles ----------", "rmt peer", 2),
.name = "mean_rmt_peer",
.cmp = empty_cmp,
.entry = mean_rmt_peer_entry,
.width = 8,
};
static struct c2c_dimension dim_mean_lcl_peer = {
.header = HEADER_SPAN_LOW("lcl peer"),
.name = "mean_lcl_peer",
.cmp = empty_cmp,
.entry = mean_lcl_peer_entry,
.width = 8,
};
static struct c2c_dimension dim_cpucnt = {
.header = HEADER_BOTH("cpu", "cnt"),
.name = "cpucnt",
@ -1672,8 +1829,13 @@ static struct c2c_dimension *dimensions[] = {
&dim_tot_hitm,
&dim_lcl_hitm,
&dim_rmt_hitm,
&dim_tot_peer,
&dim_lcl_peer,
&dim_rmt_peer,
&dim_cl_lcl_hitm,
&dim_cl_rmt_hitm,
&dim_cl_lcl_peer,
&dim_cl_rmt_peer,
&dim_tot_stores,
&dim_stores_l1hit,
&dim_stores_l1miss,
@ -1688,9 +1850,11 @@ static struct c2c_dimension *dimensions[] = {
&dim_ld_rmthit,
&dim_tot_recs,
&dim_tot_loads,
&dim_percent_hitm,
&dim_percent_costly_snoop,
&dim_percent_rmt_hitm,
&dim_percent_lcl_hitm,
&dim_percent_rmt_peer,
&dim_percent_lcl_peer,
&dim_percent_stores_l1hit,
&dim_percent_stores_l1miss,
&dim_percent_stores_na,
@ -1703,6 +1867,8 @@ static struct c2c_dimension *dimensions[] = {
&dim_node,
&dim_mean_rmt,
&dim_mean_lcl,
&dim_mean_rmt_peer,
&dim_mean_lcl_peer,
&dim_mean_load,
&dim_cpucnt,
&dim_srcline,
@ -1941,18 +2107,22 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats)
c2c_he = container_of(he, struct c2c_hist_entry, he);
switch (c2c.display) {
case DISPLAY_LCL:
case DISPLAY_LCL_HITM:
he->filtered = filter_display(c2c_he->stats.lcl_hitm,
stats->lcl_hitm);
break;
case DISPLAY_RMT:
case DISPLAY_RMT_HITM:
he->filtered = filter_display(c2c_he->stats.rmt_hitm,
stats->rmt_hitm);
break;
case DISPLAY_TOT:
case DISPLAY_TOT_HITM:
he->filtered = filter_display(c2c_he->stats.tot_hitm,
stats->tot_hitm);
break;
case DISPLAY_SNP_PEER:
he->filtered = filter_display(c2c_he->stats.tot_peer,
stats->tot_peer);
break;
default:
break;
}
@ -1972,15 +2142,17 @@ static inline bool is_valid_hist_entry(struct hist_entry *he)
return true;
switch (c2c.display) {
case DISPLAY_LCL:
case DISPLAY_LCL_HITM:
has_record = !!c2c_he->stats.lcl_hitm;
break;
case DISPLAY_RMT:
case DISPLAY_RMT_HITM:
has_record = !!c2c_he->stats.rmt_hitm;
break;
case DISPLAY_TOT:
case DISPLAY_TOT_HITM:
has_record = !!c2c_he->stats.tot_hitm;
break;
case DISPLAY_SNP_PEER:
has_record = !!c2c_he->stats.tot_peer;
default:
break;
}
@ -2069,9 +2241,33 @@ static int resort_cl_cb(struct hist_entry *he, void *arg __maybe_unused)
return 0;
}
static struct c2c_header header_node_0 = HEADER_LOW("Node");
static struct c2c_header header_node_1_hitms_stores =
HEADER_LOW("Node{cpus %hitms %stores}");
static struct c2c_header header_node_1_peers_stores =
HEADER_LOW("Node{cpus %peers %stores}");
static struct c2c_header header_node_2 = HEADER_LOW("Node{cpu list}");
static void setup_nodes_header(void)
{
dim_node.header = header_node[c2c.node_info];
switch (c2c.node_info) {
case 0:
dim_node.header = header_node_0;
break;
case 1:
if (c2c.display == DISPLAY_SNP_PEER)
dim_node.header = header_node_1_peers_stores;
else
dim_node.header = header_node_1_hitms_stores;
break;
case 2:
dim_node.header = header_node_2;
break;
default:
break;
}
return;
}
static int setup_nodes(struct perf_session *session)
@ -2136,13 +2332,14 @@ static int setup_nodes(struct perf_session *session)
}
#define HAS_HITMS(__h) ((__h)->stats.lcl_hitm || (__h)->stats.rmt_hitm)
#define HAS_PEER(__h) ((__h)->stats.lcl_peer || (__h)->stats.rmt_peer)
static int resort_shared_cl_cb(struct hist_entry *he, void *arg __maybe_unused)
{
struct c2c_hist_entry *c2c_he;
c2c_he = container_of(he, struct c2c_hist_entry, he);
if (HAS_HITMS(c2c_he)) {
if (HAS_HITMS(c2c_he) || HAS_PEER(c2c_he)) {
c2c.shared_clines++;
c2c_add_stats(&c2c.shared_clines_stats, &c2c_he->stats);
}
@ -2202,6 +2399,8 @@ static void print_c2c__display_stats(FILE *out)
fprintf(out, " Load LLC Misses : %10d\n", llc_misses);
fprintf(out, " Load access blocked by data : %10d\n", stats->blk_data);
fprintf(out, " Load access blocked by address : %10d\n", stats->blk_addr);
fprintf(out, " Load HIT Local Peer : %10d\n", stats->lcl_peer);
fprintf(out, " Load HIT Remote Peer : %10d\n", stats->rmt_peer);
fprintf(out, " LLC Misses to Local DRAM : %10.1f%%\n", ((double)stats->lcl_dram/(double)llc_misses) * 100.);
fprintf(out, " LLC Misses to Remote DRAM : %10.1f%%\n", ((double)stats->rmt_dram/(double)llc_misses) * 100.);
fprintf(out, " LLC Misses to Remote cache (HIT) : %10.1f%%\n", ((double)stats->rmt_hit /(double)llc_misses) * 100.);
@ -2230,6 +2429,7 @@ static void print_shared_cacheline_info(FILE *out)
fprintf(out, " L1D hits on shared lines : %10d\n", stats->ld_l1hit);
fprintf(out, " L2D hits on shared lines : %10d\n", stats->ld_l2hit);
fprintf(out, " LLC hits on shared lines : %10d\n", stats->ld_llchit + stats->lcl_hitm);
fprintf(out, " Load hits on peer cache or nodes : %10d\n", stats->lcl_peer + stats->rmt_peer);
fprintf(out, " Locked Access on shared lines : %10d\n", stats->locks);
fprintf(out, " Blocked Access on shared lines : %10d\n", stats->blk_data + stats->blk_addr);
fprintf(out, " Store HITs on shared lines : %10d\n", stats->store);
@ -2272,13 +2472,22 @@ static void print_pareto(FILE *out)
int ret;
const char *cl_output;
cl_output = "cl_num,"
"cl_rmt_hitm,"
"cl_lcl_hitm,"
"cl_stores_l1hit,"
"cl_stores_l1miss,"
"cl_stores_na,"
"dcacheline";
if (c2c.display != DISPLAY_SNP_PEER)
cl_output = "cl_num,"
"cl_rmt_hitm,"
"cl_lcl_hitm,"
"cl_stores_l1hit,"
"cl_stores_l1miss,"
"cl_stores_na,"
"dcacheline";
else
cl_output = "cl_num,"
"cl_rmt_peer,"
"cl_lcl_peer,"
"cl_stores_l1hit,"
"cl_stores_l1miss,"
"cl_stores_na,"
"dcacheline";
perf_hpp_list__init(&hpp_list);
ret = hpp_list__parse(&hpp_list, cl_output, NULL);
@ -2314,7 +2523,7 @@ static void print_c2c_info(FILE *out, struct perf_session *session)
fprintf(out, "%-36s: %s\n", first ? " Events" : "", evsel__name(evsel));
first = false;
}
fprintf(out, " Cachelines sort on : %s HITMs\n",
fprintf(out, " Cachelines sort on : %s\n",
display_str[c2c.display]);
fprintf(out, " Cacheline data grouping : %s\n", c2c.cl_sort);
}
@ -2471,7 +2680,7 @@ static int perf_c2c_browser__title(struct hist_browser *browser,
{
scnprintf(bf, size,
"Shared Data Cache Line Table "
"(%lu entries, sorted on %s HITMs)",
"(%lu entries, sorted on %s)",
browser->nr_non_filtered_entries,
display_str[c2c.display]);
return 0;
@ -2585,7 +2794,7 @@ static int ui_quirks(void)
nodestr = "CL";
}
dim_percent_hitm.header = percent_hitm_header[c2c.display];
dim_percent_costly_snoop.header = percent_costly_snoop_header[c2c.display];
/* Fix the zero line for dcacheline column. */
buf = fill_line("Cacheline", dim_dcacheline.width +
@ -2669,14 +2878,16 @@ static int setup_callchain(struct evlist *evlist)
static int setup_display(const char *str)
{
const char *display = str ?: "tot";
const char *display = str;
if (!strcmp(display, "tot"))
c2c.display = DISPLAY_TOT;
c2c.display = DISPLAY_TOT_HITM;
else if (!strcmp(display, "rmt"))
c2c.display = DISPLAY_RMT;
c2c.display = DISPLAY_RMT_HITM;
else if (!strcmp(display, "lcl"))
c2c.display = DISPLAY_LCL;
c2c.display = DISPLAY_LCL_HITM;
else if (!strcmp(display, "peer"))
c2c.display = DISPLAY_SNP_PEER;
else {
pr_err("failed: unknown display type: %s\n", str);
return -1;
@ -2723,10 +2934,12 @@ static int build_cl_output(char *cl_sort, bool no_source)
}
if (asprintf(&c2c.cl_output,
"%s%s%s%s%s%s%s%s%s%s",
"%s%s%s%s%s%s%s%s%s%s%s%s",
c2c.use_stdio ? "cl_num_empty," : "",
"percent_rmt_hitm,"
"percent_lcl_hitm,"
c2c.display == DISPLAY_SNP_PEER ? "percent_rmt_peer,"
"percent_lcl_peer," :
"percent_rmt_hitm,"
"percent_lcl_hitm,",
"percent_stores_l1hit,"
"percent_stores_l1miss,"
"percent_stores_na,"
@ -2734,8 +2947,10 @@ static int build_cl_output(char *cl_sort, bool no_source)
add_pid ? "pid," : "",
add_tid ? "tid," : "",
add_iaddr ? "iaddr," : "",
"mean_rmt,"
"mean_lcl,"
c2c.display == DISPLAY_SNP_PEER ? "mean_rmt_peer,"
"mean_lcl_peer," :
"mean_rmt,"
"mean_lcl,",
"mean_load,"
"tot_recs,"
"cpucnt,",
@ -2756,6 +2971,7 @@ err:
static int setup_coalesce(const char *coalesce, bool no_source)
{
const char *c = coalesce ?: coalesce_default;
const char *sort_str = NULL;
if (asprintf(&c2c.cl_sort, "offset,%s", c) < 0)
return -ENOMEM;
@ -2763,12 +2979,16 @@ static int setup_coalesce(const char *coalesce, bool no_source)
if (build_cl_output(c2c.cl_sort, no_source))
return -1;
if (asprintf(&c2c.cl_resort, "offset,%s",
c2c.display == DISPLAY_TOT ?
"tot_hitm" :
c2c.display == DISPLAY_RMT ?
"rmt_hitm,lcl_hitm" :
"lcl_hitm,rmt_hitm") < 0)
if (c2c.display == DISPLAY_TOT_HITM)
sort_str = "tot_hitm";
else if (c2c.display == DISPLAY_RMT_HITM)
sort_str = "rmt_hitm,lcl_hitm";
else if (c2c.display == DISPLAY_LCL_HITM)
sort_str = "lcl_hitm,rmt_hitm";
else if (c2c.display == DISPLAY_SNP_PEER)
sort_str = "tot_peer";
if (asprintf(&c2c.cl_resort, "offset,%s", sort_str) < 0)
return -ENOMEM;
pr_debug("coalesce sort fields: %s\n", c2c.cl_sort);
@ -2814,7 +3034,7 @@ static int perf_c2c__report(int argc, const char **argv)
"print_type,threshold[,print_limit],order,sort_key[,branch],value",
callchain_help, &parse_callchain_opt,
callchain_default_opt),
OPT_STRING('d', "display", &display, "Switch HITM output type", "lcl,rmt"),
OPT_STRING('d', "display", &display, "Switch HITM output type", "tot,lcl,rmt,peer"),
OPT_STRING('c', "coalesce", &coalesce, "coalesce fields",
"coalesce fields: pid,tid,iaddr,dso"),
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
@ -2848,22 +3068,6 @@ static int perf_c2c__report(int argc, const char **argv)
data.path = input_name;
data.force = symbol_conf.force;
err = setup_display(display);
if (err)
goto out;
err = setup_coalesce(coalesce, no_source);
if (err) {
pr_debug("Failed to initialize hists\n");
goto out;
}
err = c2c_hists__init(&c2c.hists, "dcacheline", 2);
if (err) {
pr_debug("Failed to initialize hists\n");
goto out;
}
session = perf_session__new(&data, &c2c.tool);
if (IS_ERR(session)) {
err = PTR_ERR(session);
@ -2871,12 +3075,40 @@ static int perf_c2c__report(int argc, const char **argv)
goto out;
}
/*
* Use the 'tot' as default display type if user doesn't specify it;
* since Arm64 platform doesn't support HITMs flag, use 'peer' as the
* default display type.
*/
if (!display) {
if (!strcmp(perf_env__arch(&session->header.env), "arm64"))
display = "peer";
else
display = "tot";
}
err = setup_display(display);
if (err)
goto out_session;
err = setup_coalesce(coalesce, no_source);
if (err) {
pr_debug("Failed to initialize hists\n");
goto out_session;
}
err = c2c_hists__init(&c2c.hists, "dcacheline", 2);
if (err) {
pr_debug("Failed to initialize hists\n");
goto out_session;
}
session->itrace_synth_opts = &itrace_synth_opts;
err = setup_nodes(session);
if (err) {
pr_err("Failed setup nodes\n");
goto out;
goto out_session;
}
err = mem2node__init(&c2c.mem2node, &session->header.env);
@ -2909,27 +3141,45 @@ static int perf_c2c__report(int argc, const char **argv)
goto out_mem2node;
}
output_str = "cl_idx,"
"dcacheline,"
"dcacheline_node,"
"dcacheline_count,"
"percent_hitm,"
"tot_hitm,lcl_hitm,rmt_hitm,"
"tot_recs,"
"tot_loads,"
"tot_stores,"
"stores_l1hit,stores_l1miss,stores_na,"
"ld_fbhit,ld_l1hit,ld_l2hit,"
"ld_lclhit,lcl_hitm,"
"ld_rmthit,rmt_hitm,"
"dram_lcl,dram_rmt";
if (c2c.display != DISPLAY_SNP_PEER)
output_str = "cl_idx,"
"dcacheline,"
"dcacheline_node,"
"dcacheline_count,"
"percent_costly_snoop,"
"tot_hitm,lcl_hitm,rmt_hitm,"
"tot_recs,"
"tot_loads,"
"tot_stores,"
"stores_l1hit,stores_l1miss,stores_na,"
"ld_fbhit,ld_l1hit,ld_l2hit,"
"ld_lclhit,lcl_hitm,"
"ld_rmthit,rmt_hitm,"
"dram_lcl,dram_rmt";
else
output_str = "cl_idx,"
"dcacheline,"
"dcacheline_node,"
"dcacheline_count,"
"percent_costly_snoop,"
"tot_peer,lcl_peer,rmt_peer,"
"tot_recs,"
"tot_loads,"
"tot_stores,"
"stores_l1hit,stores_l1miss,stores_na,"
"ld_fbhit,ld_l1hit,ld_l2hit,"
"ld_lclhit,lcl_hitm,"
"ld_rmthit,rmt_hitm,"
"dram_lcl,dram_rmt";
if (c2c.display == DISPLAY_TOT)
if (c2c.display == DISPLAY_TOT_HITM)
sort_str = "tot_hitm";
else if (c2c.display == DISPLAY_RMT)
else if (c2c.display == DISPLAY_RMT_HITM)
sort_str = "rmt_hitm";
else if (c2c.display == DISPLAY_LCL)
else if (c2c.display == DISPLAY_LCL_HITM)
sort_str = "lcl_hitm";
else if (c2c.display == DISPLAY_SNP_PEER)
sort_str = "tot_peer";
c2c_hists__reinit(&c2c.hists, output_str, sort_str);

View File

@ -1638,14 +1638,14 @@ int cmd_kvm(int argc, const char **argv)
return __cmd_record(file_name, argc, argv);
else if (strlen(argv[0]) > 2 && strstarts("report", argv[0]))
return __cmd_report(file_name, argc, argv);
else if (!strncmp(argv[0], "diff", 4))
else if (strlen(argv[0]) > 2 && strstarts("diff", argv[0]))
return cmd_diff(argc, argv);
else if (!strncmp(argv[0], "top", 3))
else if (!strcmp(argv[0], "top"))
return cmd_top(argc, argv);
else if (!strncmp(argv[0], "buildid-list", 12))
else if (strlen(argv[0]) > 2 && strstarts("buildid-list", argv[0]))
return __cmd_buildid_list(file_name, argc, argv);
#ifdef HAVE_KVM_STAT_SUPPORT
else if (!strncmp(argv[0], "stat", 4))
else if (strlen(argv[0]) > 2 && strstarts("stat", argv[0]))
return kvm_cmd_stat(file_name, argc, argv);
#endif
else

View File

@ -3996,8 +3996,15 @@ int cmd_record(int argc, const char **argv)
arch__add_leaf_frame_record_opts(&rec->opts);
err = -ENOMEM;
if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
usage_with_options(record_usage, record_options);
if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
if (rec->opts.target.pid != NULL) {
pr_err("Couldn't create thread/CPU maps: %s\n",
errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
goto out;
}
else
usage_with_options(record_usage, record_options);
}
err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
if (err)

View File

@ -3563,7 +3563,7 @@ int cmd_sched(int argc, const char **argv)
if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
return __cmd_record(argc, argv);
} else if (!strncmp(argv[0], "lat", 3)) {
} else if (strlen(argv[0]) > 2 && strstarts("latency", argv[0])) {
sched.tp_handler = &lat_ops;
if (argc > 1) {
argc = parse_options(argc, argv, latency_options, latency_usage, 0);

View File

@ -3861,7 +3861,7 @@ int cmd_script(int argc, const char **argv)
OPT_CALLBACK_OPTARG(0, "xed", NULL, NULL, NULL,
"Run xed disassembler on output", parse_xed),
OPT_CALLBACK_OPTARG(0, "call-trace", &itrace_synth_opts, NULL, NULL,
"Decode calls from from itrace", parse_call_trace),
"Decode calls from itrace", parse_call_trace),
OPT_CALLBACK_OPTARG(0, "call-ret-trace", &itrace_synth_opts, NULL, NULL,
"Decode calls and returns from itrace", parse_callret_trace),
OPT_STRING(0, "graph-function", &symbol_conf.graph_function, "symbol[,symbol...]",

View File

@ -71,7 +71,6 @@
#include "util/bpf_counter.h"
#include "util/iostat.h"
#include "util/pmu-hybrid.h"
#include "util/topdown.h"
#include "asm/bug.h"
#include <linux/time64.h>
@ -1250,6 +1249,8 @@ static struct option stat_options[] = {
"Merge identical named hybrid events"),
OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
"print counts with custom separator"),
OPT_BOOLEAN('j', "json-output", &stat_config.json_output,
"print counts in JSON format"),
OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
"monitor event in cgroup name only", parse_stat_cgroups),
OPT_STRING(0, "for-each-cgroup", &stat_config.cgroup_list, "name",
@ -1436,6 +1437,7 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
case AGGR_MAX:
default:
return NULL;
}
@ -1460,6 +1462,7 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
case AGGR_MAX:
default:
return NULL;
}
@ -1610,6 +1613,7 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
case AGGR_MAX:
default:
return NULL;
}
@ -1630,6 +1634,7 @@ static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode)
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
case AGGR_MAX:
default:
return NULL;
}

View File

@ -2749,7 +2749,7 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel,
/*
* Suppress this argument if its value is zero and
* and we don't have a string associated in an
* we don't have a string associated in an
* strarray for it.
*/
if (val == 0 &&

View File

@ -7,6 +7,10 @@ JSON_TEST = $(shell [ -d $(JDIR_TEST) ] && \
find $(JDIR_TEST) -name '*.json')
JEVENTS_PY = pmu-events/jevents.py
ifeq ($(JEVENTS_ARCH),)
JEVENTS_ARCH=$(SRCARCH)
endif
#
# Locate/process JSON files in pmu-events/arch/
# directory and create tables in pmu-events.c.
@ -19,5 +23,5 @@ $(OUTPUT)pmu-events/pmu-events.c: pmu-events/empty-pmu-events.c
else
$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JSON_TEST) $(JEVENTS_PY)
$(call rule_mkdir)
$(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(SRCARCH) pmu-events/arch $@
$(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) pmu-events/arch $@
endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,64 @@
[
{
"MetricExpr": "1 / IPC",
"MetricName": "CPI"
},
{
"MetricExpr": "inst_retired.any / cpu_clk_unhalted.thread",
"MetricName": "IPC",
"MetricGroup": "group1"
},
{
"MetricExpr": "idq_uops_not_delivered.core / (4 * (( ( cpu_clk_unhalted.thread / 2 ) * ( 1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk ) )))",
"MetricName": "Frontend_Bound_SMT"
},
{
"MetricExpr": "l1d\\-loads\\-misses / inst_retired.any",
"MetricName": "dcache_miss_cpi"
},
{
"MetricExpr": "l1i\\-loads\\-misses / inst_retired.any",
"MetricName": "icache_miss_cycles"
},
{
"MetricExpr": "(dcache_miss_cpi + icache_miss_cycles)",
"MetricName": "cache_miss_cycles",
"MetricGroup": "group1"
},
{
"MetricExpr": "l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit",
"MetricName": "DCache_L2_All_Hits"
},
{
"MetricExpr": "max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss",
"MetricName": "DCache_L2_All_Miss"
},
{
"MetricExpr": "dcache_l2_all_hits + dcache_l2_all_miss",
"MetricName": "DCache_L2_All"
},
{
"MetricExpr": "d_ratio(dcache_l2_all_hits, dcache_l2_all)",
"MetricName": "DCache_L2_Hits"
},
{
"MetricExpr": "d_ratio(dcache_l2_all_miss, dcache_l2_all)",
"MetricName": "DCache_L2_Misses"
},
{
"MetricExpr": "ipc + M2",
"MetricName": "M1"
},
{
"MetricExpr": "ipc + M1",
"MetricName": "M2"
},
{
"MetricExpr": "1/M3",
"MetricName": "M3"
},
{
"MetricExpr": "64 * l1d.replacement / 1000000000 / duration_time",
"MetricName": "L1D_Cache_Fill_BW"
}
]

View File

@ -450,6 +450,12 @@
"MetricGroup": "SoC",
"MetricName": "Socket_CLKS"
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
"MetricExpr": "cbox_0@event\\=0x0@ / #num_dies / duration_time / 1000000000",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
{
"BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",

View File

@ -37,7 +37,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x34",
"EventName": "UNC_C_LLC_LOOKUP.ANY",
"Filter": "CBoFilter0[23:17]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set umask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:18] bits correspond to [FMESI] state.; Filters for any transaction originating from the IPQ or IRQ. This does not include lookups originating from the ISMQ.",
"UMask": "0x11",
@ -48,7 +47,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x34",
"EventName": "UNC_C_LLC_LOOKUP.DATA_READ",
"Filter": "CBoFilter0[23:17]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set umask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:18] bits correspond to [FMESI] state.; Read transactions",
"UMask": "0x3",
@ -59,7 +57,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x34",
"EventName": "UNC_C_LLC_LOOKUP.NID",
"Filter": "CBoFilter0[23:17]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set umask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:18] bits correspond to [FMESI] state.; Qualify one of the other subevents by the Target NID. The NID is programmed in Cn_MSR_PMON_BOX_FILTER.nid. In conjunction with STATE = I, it is possible to monitor misses to specific NIDs in the system.",
"UMask": "0x41",
@ -70,7 +67,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x34",
"EventName": "UNC_C_LLC_LOOKUP.READ",
"Filter": "CBoFilter0[22:18]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set umask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:18] bits correspond to [FMESI] state.; Read transactions",
"UMask": "0x21",
@ -81,7 +77,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x34",
"EventName": "UNC_C_LLC_LOOKUP.REMOTE_SNOOP",
"Filter": "CBoFilter0[23:17]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set umask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:18] bits correspond to [FMESI] state.; Filters for only snoop requests coming from the remote socket(s) through the IPQ.",
"UMask": "0x9",
@ -92,7 +87,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x34",
"EventName": "UNC_C_LLC_LOOKUP.WRITE",
"Filter": "CBoFilter0[23:17]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set umask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:18] bits correspond to [FMESI] state.; Writeback transactions from L2 to the LLC This includes all write transactions -- both Cachable and UC.",
"UMask": "0x5",
@ -153,7 +147,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x37",
"EventName": "UNC_C_LLC_VICTIMS.NID",
"Filter": "CBoFilter1[17:10]",
"PerPkg": "1",
"PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.; Qualify one of the other subevents by the Target NID. The NID is programmed in Cn_MSR_PMON_BOX_FILTER.nid. In conjunction with STATE = I, it is possible to monitor misses to specific NIDs in the system.",
"UMask": "0x40",
@ -794,7 +787,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x28",
"EventName": "UNC_C_RxR_IPQ_RETRY2.TARGET",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "Number of times a snoop (probe) request had to retry. Filters exist to cover some of the common cases retries.; Counts the number of times that a request from the IPQ was retried filtered by the Target NodeID as specified in the Cbox's Filter register.",
"UMask": "0x40",
@ -845,7 +837,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x32",
"EventName": "UNC_C_RxR_IRQ_RETRY.NID",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "Qualify one of the other subevents by a given RTID destination NID. The NID is programmed in Cn_MSR_PMON_BOX_FILTER1.nid.",
"UMask": "0x40",
@ -896,7 +887,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x29",
"EventName": "UNC_C_RxR_IRQ_RETRY2.TARGET",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times that a request from the IPQ was retried filtered by the Target NodeID as specified in the Cbox's Filter register.",
"UMask": "0x40",
@ -937,7 +927,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x33",
"EventName": "UNC_C_RxR_ISMQ_RETRY.NID",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "Number of times a transaction flowing through the ISMQ had to retry. Transaction pass through the ISMQ as responses for requests that already exist in the Cbo. Some examples include: when data is returned or when snoop responses come back from the cores.; Qualify one of the other subevents by a given RTID destination NID. The NID is programmed in Cn_MSR_PMON_BOX_FILTER1.nid.",
"UMask": "0x40",
@ -968,7 +957,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x33",
"EventName": "UNC_C_RxR_ISMQ_RETRY.WB_CREDITS",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "Number of times a transaction flowing through the ISMQ had to retry. Transaction pass through the ISMQ as responses for requests that already exist in the Cbo. Some examples include: when data is returned or when snoop responses come back from the cores.; Qualify one of the other subevents by a given RTID destination NID. The NID is programmed in Cn_MSR_PMON_BOX_FILTER1.nid.",
"UMask": "0x80",
@ -999,7 +987,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x2A",
"EventName": "UNC_C_RxR_ISMQ_RETRY2.TARGET",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times that a request from the ISMQ was retried filtered by the Target NodeID as specified in the Cbox's Filter register.",
"UMask": "0x40",
@ -1114,7 +1101,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.LOCAL_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; All transactions, satisifed by an opcode, inserted into the TOR that are satisifed by locally HOMed memory.",
"UMask": "0x21",
@ -1135,7 +1121,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; Miss transactions, satisifed by an opcode, inserted into the TOR that are satisifed by locally HOMed memory.",
"UMask": "0x23",
@ -1146,7 +1131,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; Miss transactions inserted into the TOR that match an opcode.",
"UMask": "0x3",
@ -1167,7 +1151,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; Miss transactions, satisifed by an opcode, inserted into the TOR that are satisifed by remote caches or remote memory.",
"UMask": "0x83",
@ -1178,7 +1161,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.NID_ALL",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; All NID matched (matches an RTID destination) transactions inserted into the TOR. The NID is programmed in Cn_MSR_PMON_BOX_FILTER.nid. In conjunction with STATE = I, it is possible to monitor misses to specific NIDs in the system.",
"UMask": "0x48",
@ -1189,7 +1171,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.NID_EVICTION",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; NID matched eviction transactions inserted into the TOR.",
"UMask": "0x44",
@ -1200,7 +1181,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.NID_MISS_ALL",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; All NID matched miss requests that were inserted into the TOR.",
"UMask": "0x4A",
@ -1211,7 +1191,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.NID_MISS_OPCODE",
"Filter": "CBoFilter1[28:20], CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; Miss transactions inserted into the TOR that match a NID and an opcode.",
"UMask": "0x43",
@ -1222,7 +1201,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.NID_OPCODE",
"Filter": "CBoFilter1[28:20], CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; Transactions inserted into the TOR that match a NID and an opcode.",
"UMask": "0x41",
@ -1233,7 +1211,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.NID_WB",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; NID matched write transactions inserted into the TOR.",
"UMask": "0x50",
@ -1244,7 +1221,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; Transactions inserted into the TOR that match an opcode (matched by Cn_MSR_PMON_BOX_FILTER.opc)",
"UMask": "0x1",
@ -1265,7 +1241,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.REMOTE_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; All transactions, satisifed by an opcode, inserted into the TOR that are satisifed by remote caches or remote memory.",
"UMask": "0x81",
@ -1312,7 +1287,6 @@
"BriefDescription": "TOR Occupancy; Local Memory - Opcode Matched",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.LOCAL_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); Number of outstanding transactions, satisifed by an opcode, in the TOR that are satisifed by locally HOMed memory.",
"UMask": "0x21",
@ -1340,7 +1314,6 @@
"BriefDescription": "TOR Occupancy; Misses to Local Memory - Opcode Matched",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); Number of outstanding Miss transactions, satisifed by an opcode, in the TOR that are satisifed by locally HOMed memory.",
"UMask": "0x23",
@ -1350,7 +1323,6 @@
"BriefDescription": "TOR Occupancy; Miss Opcode Match",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.MISS_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); TOR entries for miss transactions that match an opcode. This generally means that the request was sent to memory or MMIO.",
"UMask": "0x3",
@ -1369,7 +1341,6 @@
"BriefDescription": "TOR Occupancy; Misses to Remote Memory - Opcode Matched",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.MISS_REMOTE_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); Number of outstanding Miss transactions, satisifed by an opcode, in the TOR that are satisifed by remote caches or remote memory.",
"UMask": "0x83",
@ -1379,7 +1350,6 @@
"BriefDescription": "TOR Occupancy; NID Matched",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.NID_ALL",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); Number of NID matched outstanding requests in the TOR. The NID is programmed in Cn_MSR_PMON_BOX_FILTER.nid.In conjunction with STATE = I, it is possible to monitor misses to specific NIDs in the system.",
"UMask": "0x48",
@ -1389,7 +1359,6 @@
"BriefDescription": "TOR Occupancy; NID Matched Evictions",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.NID_EVICTION",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); Number of outstanding NID matched eviction transactions in the TOR .",
"UMask": "0x44",
@ -1399,7 +1368,6 @@
"BriefDescription": "TOR Occupancy; NID Matched",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.NID_MISS_ALL",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); Number of outstanding Miss requests in the TOR that match a NID.",
"UMask": "0x4A",
@ -1409,7 +1377,6 @@
"BriefDescription": "TOR Occupancy; NID and Opcode Matched Miss",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.NID_MISS_OPCODE",
"Filter": "CBoFilter1[28:20], CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); Number of outstanding Miss requests in the TOR that match a NID and an opcode.",
"UMask": "0x43",
@ -1419,7 +1386,6 @@
"BriefDescription": "TOR Occupancy; NID and Opcode Matched",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.NID_OPCODE",
"Filter": "CBoFilter1[28:20], CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); TOR entries that match a NID and an opcode.",
"UMask": "0x41",
@ -1429,7 +1395,6 @@
"BriefDescription": "TOR Occupancy; NID Matched Writebacks",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.NID_WB",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); NID matched write transactions int the TOR.",
"UMask": "0x50",
@ -1439,7 +1404,6 @@
"BriefDescription": "TOR Occupancy; Opcode Match",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); TOR entries that match an opcode (matched by Cn_MSR_PMON_BOX_FILTER.opc).",
"UMask": "0x1",
@ -1458,7 +1422,6 @@
"BriefDescription": "TOR Occupancy; Remote Memory - Opcode Matched",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.REMOTE_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); Number of outstanding transactions, satisifed by an opcode, in the TOR that are satisifed by remote caches or remote memory.",
"UMask": "0x81",
@ -1610,66 +1573,6 @@
"UMask": "0x8",
"Unit": "CBO"
},
{
"BriefDescription": "QPI Address/Opcode Match; AD Opcodes",
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "UNC_H_ADDR_OPC_MATCH.AD",
"Filter": "HA_OpcodeMatch[5:0]",
"PerPkg": "1",
"UMask": "0x4",
"Unit": "HA"
},
{
"BriefDescription": "QPI Address/Opcode Match; Address",
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "UNC_H_ADDR_OPC_MATCH.ADDR",
"Filter": "HA_AddrMatch0[31:6], HA_AddrMatch1[13:0]",
"PerPkg": "1",
"UMask": "0x1",
"Unit": "HA"
},
{
"BriefDescription": "QPI Address/Opcode Match; AK Opcodes",
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "UNC_H_ADDR_OPC_MATCH.AK",
"Filter": "HA_OpcodeMatch[5:0]",
"PerPkg": "1",
"UMask": "0x10",
"Unit": "HA"
},
{
"BriefDescription": "QPI Address/Opcode Match; BL Opcodes",
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "UNC_H_ADDR_OPC_MATCH.BL",
"Filter": "HA_OpcodeMatch[5:0]",
"PerPkg": "1",
"UMask": "0x8",
"Unit": "HA"
},
{
"BriefDescription": "QPI Address/Opcode Match; Address & Opcode Match",
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "UNC_H_ADDR_OPC_MATCH.FILT",
"Filter": "HA_AddrMatch0[31:6], HA_AddrMatch1[13:0], HA_OpcodeMatch[5:0]",
"PerPkg": "1",
"UMask": "0x3",
"Unit": "HA"
},
{
"BriefDescription": "QPI Address/Opcode Match; Opcode",
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "UNC_H_ADDR_OPC_MATCH.OPC",
"Filter": "HA_OpcodeMatch[5:0]",
"PerPkg": "1",
"UMask": "0x2",
"Unit": "HA"
},
{
"BriefDescription": "BT Cycles Not Empty",
"Counter": "0,1,2,3",

View File

@ -416,17 +416,6 @@
"UMask": "0x10",
"Unit": "IRP"
},
{
"BriefDescription": "Inbound Transaction Count; Select Source",
"Counter": "0,1",
"EventCode": "0x16",
"EventName": "UNC_I_TRANSACTIONS.ORDERINGQ",
"Filter": "IRPFilter[4:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of Inbound transactions from the IRP to the Uncore. This can be filtered based on request type in addition to the source queue. Note the special filtering equation. We do OR-reduction on the request type. If the SOURCE bit is set, then we also do AND qualification based on the source portID.; Tracks only those requests that come from the port specified in the IRP_PmonFilter.OrderingQ register. This register allows one to select one specific queue. It is not possible to monitor multiple queues at a time. If this bit is not set, then requests from all sources will be counted.",
"UMask": "0x40",
"Unit": "IRP"
},
{
"BriefDescription": "Inbound Transaction Count; Other",
"Counter": "0,1",
@ -1117,7 +1106,6 @@
"Counter": "0,1",
"EventCode": "0x41",
"EventName": "UNC_U_FILTER_MATCH.ENABLE",
"Filter": "UBoxFilter[3:0]",
"PerPkg": "1",
"PublicDescription": "Filter match per thread (w/ or w/o Filter Enable). Specify the thread to filter on using NCUPMONCTRLGLCTR.ThreadID.",
"UMask": "0x1",
@ -1138,7 +1126,6 @@
"Counter": "0,1",
"EventCode": "0x41",
"EventName": "UNC_U_FILTER_MATCH.U2C_ENABLE",
"Filter": "UBoxFilter[3:0]",
"PerPkg": "1",
"PublicDescription": "Filter match per thread (w/ or w/o Filter Enable). Specify the thread to filter on using NCUPMONCTRLGLCTR.ThreadID.",
"UMask": "0x4",

View File

@ -444,6 +444,12 @@
"MetricGroup": "SoC",
"MetricName": "Socket_CLKS"
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
"MetricExpr": "cbox_0@event\\=0x0@ / #num_dies / duration_time / 1000000000",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
{
"BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",

View File

@ -845,6 +845,15 @@
"UMask": "0x2",
"Unit": "CBO"
},
{
"BriefDescription": "TOR Inserts; Opcode Match",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.OPCODE",
"PerPkg": "1",
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "PCIe writes (partial cache line). Derived from unc_c_tor_inserts.opcode",
"Counter": "0,1,2,3",
@ -855,16 +864,6 @@
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "PCIe writes (partial cache line)",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.OPCODE",
"Filter": "filter_opc=0x180,filter_tid=0x3e",
"PerPkg": "1",
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "L2 demand and L2 prefetch code references to LLC. Derived from unc_c_tor_inserts.opcode",
"Counter": "0,1,2,3",
@ -876,17 +875,6 @@
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "L2 demand and L2 prefetch code references to LLC",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.OPCODE",
"Filter": "filter_opc=0x181",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "Streaming stores (full cache line). Derived from unc_c_tor_inserts.opcode",
"Counter": "0,1,2,3",
@ -898,17 +886,6 @@
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "Streaming stores (full cache line)",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.OPCODE",
"Filter": "filter_opc=0x18c",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "Streaming stores (partial cache line). Derived from unc_c_tor_inserts.opcode",
"Counter": "0,1,2,3",
@ -920,17 +897,6 @@
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "Streaming stores (partial cache line)",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.OPCODE",
"Filter": "filter_opc=0x18d",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "PCIe read current. Derived from unc_c_tor_inserts.opcode",
"Counter": "0,1,2,3",
@ -942,17 +908,6 @@
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "PCIe read current",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.OPCODE",
"Filter": "filter_opc=0x19e",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "PCIe write references (full cache line). Derived from unc_c_tor_inserts.opcode",
"Counter": "0,1,2,3",
@ -964,17 +919,6 @@
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "PCIe write references (full cache line)",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.OPCODE",
"Filter": "filter_opc=0x1c8,filter_tid=0x3e",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "TOR Inserts; Evictions",
"Counter": "0,1,2,3",
@ -1035,17 +979,6 @@
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "LLC misses - Uncacheable reads (from cpu) ",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "filter_opc=0x187",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "MMIO reads. Derived from unc_c_tor_inserts.miss_opcode",
"Counter": "0,1,2,3",
@ -1057,17 +990,6 @@
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "MMIO reads",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "filter_opc=0x187,filter_nc=1",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "MMIO writes. Derived from unc_c_tor_inserts.miss_opcode",
"Counter": "0,1,2,3",
@ -1079,17 +1001,6 @@
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "MMIO writes",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "filter_opc=0x18f,filter_nc=1",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "LLC prefetch misses for RFO. Derived from unc_c_tor_inserts.miss_opcode",
"Counter": "0,1,2,3",
@ -1101,17 +1012,6 @@
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "LLC prefetch misses for RFO",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "filter_opc=0x190",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "LLC prefetch misses for code reads. Derived from unc_c_tor_inserts.miss_opcode",
"Counter": "0,1,2,3",
@ -1123,17 +1023,6 @@
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "LLC prefetch misses for code reads",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "filter_opc=0x191",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "LLC prefetch misses for data reads. Derived from unc_c_tor_inserts.miss_opcode",
"Counter": "0,1,2,3",
@ -1145,17 +1034,6 @@
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "LLC prefetch misses for data reads",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "filter_opc=0x192",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "LLC misses for PCIe read current. Derived from unc_c_tor_inserts.miss_opcode",
"Counter": "0,1,2,3",
@ -1167,17 +1045,6 @@
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "LLC misses for PCIe read current",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "filter_opc=0x19e",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "ItoM write misses (as part of fast string memcpy stores) + PCIe full line writes. Derived from unc_c_tor_inserts.miss_opcode",
"Counter": "0,1,2,3",
@ -1189,17 +1056,6 @@
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "ItoM write misses (as part of fast string memcpy stores) + PCIe full line writes",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "filter_opc=0x1c8",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "PCIe write misses (full cache line). Derived from unc_c_tor_inserts.miss_opcode",
"Counter": "0,1,2,3",
@ -1211,17 +1067,6 @@
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "PCIe write misses (full cache line)",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "filter_opc=0x1c8,filter_tid=0x3e",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "TOR Inserts; NID and Opcode Matched",
"Counter": "0,1,2,3",

View File

@ -715,6 +715,12 @@
"MetricGroup": "SoC",
"MetricName": "Socket_CLKS"
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
"MetricExpr": "cha_0@event\\=0x0@ / #num_dies / duration_time / 1000000000",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
{
"BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,201 @@
[
{
"BriefDescription": "pclk Cycles",
"Counter": "0,1,2,3",
"EventName": "UNC_P_CLOCKTICKS",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "UNC_P_CORE_TRANSITION_CYCLES",
"Counter": "0,1,2,3",
"EventCode": "0x60",
"EventName": "UNC_P_CORE_TRANSITION_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "UNC_P_DEMOTIONS",
"Counter": "0,1,2,3",
"EventCode": "0x30",
"EventName": "UNC_P_DEMOTIONS",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Phase Shed 0 Cycles",
"Counter": "0,1,2,3",
"EventCode": "0x75",
"EventName": "UNC_P_FIVR_PS_PS0_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Phase Shed 1 Cycles",
"Counter": "0,1,2,3",
"EventCode": "0x76",
"EventName": "UNC_P_FIVR_PS_PS1_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Phase Shed 2 Cycles",
"Counter": "0,1,2,3",
"EventCode": "0x77",
"EventName": "UNC_P_FIVR_PS_PS2_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Phase Shed 3 Cycles",
"Counter": "0,1,2,3",
"EventCode": "0x78",
"EventName": "UNC_P_FIVR_PS_PS3_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Thermal Strongest Upper Limit Cycles",
"Counter": "0,1,2,3",
"EventCode": "0x4",
"EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Power Strongest Upper Limit Cycles",
"Counter": "0,1,2,3",
"EventCode": "0x5",
"EventName": "UNC_P_FREQ_MAX_POWER_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "IO P Limit Strongest Lower Limit Cycles",
"Counter": "0,1,2,3",
"EventCode": "0x73",
"EventName": "UNC_P_FREQ_MIN_IO_P_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Cycles spent changing Frequency",
"Counter": "0,1,2,3",
"EventCode": "0x74",
"EventName": "UNC_P_FREQ_TRANS_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "UNC_P_MCP_PROCHOT_CYCLES",
"Counter": "0,1,2,3",
"EventCode": "0x6",
"EventName": "UNC_P_MCP_PROCHOT_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Memory Phase Shedding Cycles",
"Counter": "0,1,2,3",
"EventCode": "0x2F",
"EventName": "UNC_P_MEMORY_PHASE_SHEDDING_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Package C State Residency - C0",
"Counter": "0,1,2,3",
"EventCode": "0x2A",
"EventName": "UNC_P_PKG_RESIDENCY_C0_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Package C State Residency - C2E",
"Counter": "0,1,2,3",
"EventCode": "0x2B",
"EventName": "UNC_P_PKG_RESIDENCY_C2E_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Package C State Residency - C3",
"Counter": "0,1,2,3",
"EventCode": "0x2C",
"EventName": "UNC_P_PKG_RESIDENCY_C3_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Package C State Residency - C6",
"Counter": "0,1,2,3",
"EventCode": "0x2D",
"EventName": "UNC_P_PKG_RESIDENCY_C6_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "UNC_P_PMAX_THROTTLED_CYCLES",
"Counter": "0,1,2,3",
"EventCode": "0x7",
"EventName": "UNC_P_PMAX_THROTTLED_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Number of cores in C-State; C0 and C1",
"Counter": "0,1,2,3",
"EventCode": "0x80",
"EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Number of cores in C-State; C3",
"Counter": "0,1,2,3",
"EventCode": "0x80",
"EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Number of cores in C-State; C6 and C7",
"Counter": "0,1,2,3",
"EventCode": "0x80",
"EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "External Prochot",
"Counter": "0,1,2,3",
"EventCode": "0xA",
"EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Internal Prochot",
"Counter": "0,1,2,3",
"EventCode": "0x9",
"EventName": "UNC_P_PROCHOT_INTERNAL_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Total Core C State Transition Cycles",
"Counter": "0,1,2,3",
"EventCode": "0x72",
"EventName": "UNC_P_TOTAL_TRANSITION_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "VR Hot",
"Counter": "0,1,2,3",
"EventCode": "0x42",
"EventName": "UNC_P_VR_HOT_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
}
]

View File

@ -74,12 +74,6 @@
"MetricGroup": "Branches;Fed;FetchBW",
"MetricName": "UpTB"
},
{
"BriefDescription": "Cycles Per Instruction (per Logical Processor)",
"MetricExpr": "1 / (INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD)",
"MetricGroup": "Pipeline;Mem",
"MetricName": "CPI"
},
{
"BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD",
@ -327,6 +321,12 @@
"MetricGroup": "SoC",
"MetricName": "Socket_CLKS"
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
"MetricExpr": "cbox_0@event\\=0x0@ / #num_dies / duration_time / 1000000000",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
{
"BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
@ -374,5 +374,404 @@
"MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency"
},
{
"BriefDescription": "CPU operating frequency (in GHz)",
"MetricExpr": "( CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC * #SYSTEM_TSC_FREQ ) / 1000000000",
"MetricGroup": "",
"MetricName": "cpu_operating_frequency",
"ScaleUnit": "1GHz"
},
{
"BriefDescription": "Cycles per instruction retired; indicating how much time each executed instruction took; in units of cycles.",
"MetricExpr": " CPU_CLK_UNHALTED.THREAD / INST_RETIRED.ANY ",
"MetricGroup": "",
"MetricName": "cpi",
"ScaleUnit": "1per_instr"
},
{
"BriefDescription": "The ratio of number of completed memory load instructions to the total number completed instructions",
"MetricExpr": " MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY ",
"MetricGroup": "",
"MetricName": "loads_per_instr",
"ScaleUnit": "1per_instr"
},
{
"BriefDescription": "The ratio of number of completed memory store instructions to the total number completed instructions",
"MetricExpr": " MEM_UOPS_RETIRED.ALL_STORES / INST_RETIRED.ANY ",
"MetricGroup": "",
"MetricName": "stores_per_instr",
"ScaleUnit": "1per_instr"
},
{
"BriefDescription": "Ratio of number of requests missing L1 data cache (includes data+rfo w/ prefetches) to the total number of completed instructions",
"MetricExpr": " L1D.REPLACEMENT / INST_RETIRED.ANY ",
"MetricGroup": "",
"MetricName": "l1d_mpi_includes_data_plus_rfo_with_prefetches",
"ScaleUnit": "1per_instr"
},
{
"BriefDescription": "Ratio of number of demand load requests hitting in L1 data cache to the total number of completed instructions",
"MetricExpr": " MEM_LOAD_UOPS_RETIRED.L1_HIT / INST_RETIRED.ANY ",
"MetricGroup": "",
"MetricName": "l1d_demand_data_read_hits_per_instr",
"ScaleUnit": "1per_instr"
},
{
"BriefDescription": "Ratio of number of code read requests missing in L1 instruction cache (includes prefetches) to the total number of completed instructions",
"MetricExpr": " L2_RQSTS.ALL_CODE_RD / INST_RETIRED.ANY ",
"MetricGroup": "",
"MetricName": "l1_i_code_read_misses_with_prefetches_per_instr",
"ScaleUnit": "1per_instr"
},
{
"BriefDescription": "Ratio of number of completed demand load requests hitting in L2 cache to the total number of completed instructions",
"MetricExpr": " MEM_LOAD_UOPS_RETIRED.L2_HIT / INST_RETIRED.ANY ",
"MetricGroup": "",
"MetricName": "l2_demand_data_read_hits_per_instr",
"ScaleUnit": "1per_instr"
},
{
"BriefDescription": "Ratio of number of requests missing L2 cache (includes code+data+rfo w/ prefetches) to the total number of completed instructions",
"MetricExpr": " L2_LINES_IN.ALL / INST_RETIRED.ANY ",
"MetricGroup": "",
"MetricName": "l2_mpi_includes_code_plus_data_plus_rfo_with_prefetches",
"ScaleUnit": "1per_instr"
},
{
"BriefDescription": "Ratio of number of completed data read request missing L2 cache to the total number of completed instructions",
"MetricExpr": " MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY ",
"MetricGroup": "",
"MetricName": "l2_demand_data_read_mpi",
"ScaleUnit": "1per_instr"
},
{
"BriefDescription": "Ratio of number of code read request missing L2 cache to the total number of completed instructions",
"MetricExpr": " L2_RQSTS.CODE_RD_MISS / INST_RETIRED.ANY ",
"MetricGroup": "",
"MetricName": "l2_demand_code_mpi",
"ScaleUnit": "1per_instr"
},
{
"BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by a code fetch to the total number of completed instructions. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB.",
"MetricExpr": " ITLB_MISSES.WALK_COMPLETED / INST_RETIRED.ANY ",
"MetricGroup": "",
"MetricName": "itlb_mpi",
"ScaleUnit": "1per_instr"
},
{
"BriefDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions. This implies it missed in the Instruction Translation Lookaside Buffer (ITLB) and further levels of TLB.",
"MetricExpr": " ITLB_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY ",
"MetricGroup": "",
"MetricName": "itlb_large_page_mpi",
"ScaleUnit": "1per_instr"
},
{
"BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data loads to the total number of completed instructions. This implies it missed in the DTLB and further levels of TLB.",
"MetricExpr": " DTLB_LOAD_MISSES.WALK_COMPLETED / INST_RETIRED.ANY ",
"MetricGroup": "",
"MetricName": "dtlb_load_mpi",
"ScaleUnit": "1per_instr"
},
{
"BriefDescription": "Ratio of number of completed page walks (for all page sizes) caused by demand data stores to the total number of completed instructions. This implies it missed in the DTLB and further levels of TLB.",
"MetricExpr": " DTLB_STORE_MISSES.WALK_COMPLETED / INST_RETIRED.ANY ",
"MetricGroup": "",
"MetricName": "dtlb_store_mpi",
"ScaleUnit": "1per_instr"
},
{
"BriefDescription": "Intel(R) Quick Path Interconnect (QPI) data transmit bandwidth (MB/sec)",
"MetricExpr": "( UNC_Q_TxL_FLITS_G0.DATA * 8 / 1000000) / duration_time",
"MetricGroup": "",
"MetricName": "qpi_data_transmit_bw_only_data",
"ScaleUnit": "1MB/s"
},
{
"BriefDescription": "DDR memory read bandwidth (MB/sec)",
"MetricExpr": "( UNC_M_CAS_COUNT.RD * 64 / 1000000) / duration_time",
"MetricGroup": "",
"MetricName": "memory_bandwidth_read",
"ScaleUnit": "1MB/s"
},
{
"BriefDescription": "DDR memory write bandwidth (MB/sec)",
"MetricExpr": "( UNC_M_CAS_COUNT.WR * 64 / 1000000) / duration_time",
"MetricGroup": "",
"MetricName": "memory_bandwidth_write",
"ScaleUnit": "1MB/s"
},
{
"BriefDescription": "DDR memory bandwidth (MB/sec)",
"MetricExpr": "(( UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR ) * 64 / 1000000) / duration_time",
"MetricGroup": "",
"MetricName": "memory_bandwidth_total",
"ScaleUnit": "1MB/s"
},
{
"BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the CPU.",
"MetricExpr": "( cbox@UNC_C_TOR_INSERTS.OPCODE\\,filter_opc\\=0x19e@ * 64 / 1000000) / duration_time",
"MetricGroup": "",
"MetricName": "io_bandwidth_read",
"ScaleUnit": "1MB/s"
},
{
"BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the CPU.",
"MetricExpr": "( cbox@UNC_C_TOR_INSERTS.OPCODE\\,filter_opc\\=0x1c8\\,filter_tid\\=0x3e@ * 64 / 1000000) / duration_time",
"MetricGroup": "",
"MetricName": "io_bandwidth_write",
"ScaleUnit": "1MB/s"
},
{
"BriefDescription": "Uops delivered from decoded instruction cache (decoded stream buffer or DSB) as a percent of total uops delivered to Instruction Decode Queue",
"MetricExpr": "100 * ( IDQ.DSB_UOPS / UOPS_ISSUED.ANY )",
"MetricGroup": "",
"MetricName": "percent_uops_delivered_frodecoded_icache_dsb",
"ScaleUnit": "1%"
},
{
"BriefDescription": "Uops delivered from legacy decode pipeline (Micro-instruction Translation Engine or MITE) as a percent of total uops delivered to Instruction Decode Queue",
"MetricExpr": "100 * ( IDQ.MITE_UOPS / UOPS_ISSUED.ANY )",
"MetricGroup": "",
"MetricName": "percent_uops_delivered_frolegacy_decode_pipeline_mite",
"ScaleUnit": "1%"
},
{
"BriefDescription": "Uops delivered from microcode sequencer (MS) as a percent of total uops delivered to Instruction Decode Queue",
"MetricExpr": "100 * ( IDQ.MS_UOPS / UOPS_ISSUED.ANY )",
"MetricGroup": "",
"MetricName": "percent_uops_delivered_fromicrocode_sequencer_ms",
"ScaleUnit": "1%"
},
{
"BriefDescription": "Uops delivered from loop stream detector(LSD) as a percent of total uops delivered to Instruction Decode Queue",
"MetricExpr": "100 * ( UOPS_ISSUED.ANY - IDQ.MITE_UOPS - IDQ.MS_UOPS - IDQ.DSB_UOPS ) / UOPS_ISSUED.ANY ",
"MetricGroup": "",
"MetricName": "percent_uops_delivered_froloop_streadetector_lsd",
"ScaleUnit": "1%"
},
{
"BriefDescription": "Ratio of number of data read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed instructions",
"MetricExpr": "( cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x182@ + cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x192@ ) / INST_RETIRED.ANY ",
"MetricGroup": "",
"MetricName": "llc_data_read_mpi_demand_plus_prefetch",
"ScaleUnit": "1per_instr"
},
{
"BriefDescription": "Ratio of number of code read requests missing last level core cache (includes demand w/ prefetches) to the total number of completed instructions",
"MetricExpr": "( cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x181@ + cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x191@ ) / INST_RETIRED.ANY ",
"MetricGroup": "",
"MetricName": "llc_code_read_mpi_demand_plus_prefetch",
"ScaleUnit": "1per_instr"
},
{
"BriefDescription": "Memory read that miss the last level cache (LLC) addressed to local DRAM as a percentage of total memory read accesses, does not include LLC prefetches.",
"MetricExpr": "100 * cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x182@ / ( cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x182@ + cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x182@ )",
"MetricGroup": "",
"MetricName": "numa_percent_reads_addressed_to_local_dram",
"ScaleUnit": "1%"
},
{
"BriefDescription": "Memory reads that miss the last level cache (LLC) addressed to remote DRAM as a percentage of total memory read accesses, does not include LLC prefetches.",
"MetricExpr": "100 * cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x182@ / ( cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x182@ + cbox@UNC_C_TOR_INSERTS.MISS_OPCODE\\,filter_opc\\=0x182@ )",
"MetricGroup": "",
"MetricName": "numa_percent_reads_addressed_to_remote_dram",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
"MetricExpr": "100 * ( IDQ_UOPS_NOT_DELIVERED.CORE / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) )",
"MetricGroup": "TmaL1, PGO",
"MetricName": "tma_frontend_bound_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period.",
"MetricExpr": "100 * ( ( 4 ) * ( min( CPU_CLK_UNHALTED.THREAD , IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE ) ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) )",
"MetricGroup": "Frontend, TmaL2",
"MetricName": "tma_fetch_latency_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses.",
"MetricExpr": "100 * ( ICACHE.IFDATA_STALL / ( CPU_CLK_UNHALTED.THREAD ) )",
"MetricGroup": "BigFoot, FetchLat, IcMiss",
"MetricName": "tma_icache_misses_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses.",
"MetricExpr": "100 * ( ( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD ) )",
"MetricGroup": "BigFoot, FetchLat, MemoryTLB",
"MetricName": "tma_itlb_misses_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers. Branch Resteers estimates the Frontend delay in fetching operations from corrected path; following all sorts of miss-predicted branches. For example; branchy code with lots of miss-predictions might get categorized under Branch Resteers. Note the value of this node may overlap with its siblings.",
"MetricExpr": "100 * ( ( 12 ) * ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT + BACLEARS.ANY ) / ( CPU_CLK_UNHALTED.THREAD ) )",
"MetricGroup": "FetchLat",
"MetricName": "tma_branch_resteers_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines. The DSB (decoded i-cache) is a Uop Cache where the front-end directly delivers Uops (micro operations) avoiding heavy x86 decoding. The DSB pipeline has shorter latency and delivered higher bandwidth than the MITE (legacy instruction decode pipeline). Switching between the two pipelines can cause penalties hence this metric measures the exposed penalty.",
"MetricExpr": "100 * ( DSB2MITE_SWITCHES.PENALTY_CYCLES / ( CPU_CLK_UNHALTED.THREAD ) )",
"MetricGroup": "DSBmiss, FetchLat",
"MetricName": "tma_dsb_switches_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs). Using proper compiler flags or Intel Compiler by default will certainly avoid this. #Link: Optimization Guide about LCP BKMs.",
"MetricExpr": "100 * ( ILD_STALL.LCP / ( CPU_CLK_UNHALTED.THREAD ) )",
"MetricGroup": "FetchLat",
"MetricName": "tma_lcp_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals.",
"MetricExpr": "100 * ( ( 2 ) * IDQ.MS_SWITCHES / ( CPU_CLK_UNHALTED.THREAD ) )",
"MetricGroup": "FetchLat, MicroSeq",
"MetricName": "tma_ms_switches_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend.",
"MetricExpr": "100 * ( ( IDQ_UOPS_NOT_DELIVERED.CORE / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) - ( ( 4 ) * ( min( CPU_CLK_UNHALTED.THREAD , IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE ) ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) )",
"MetricGroup": "FetchBW, Frontend, TmaL2",
"MetricName": "tma_fetch_bandwidth_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck.",
"MetricExpr": "100 * ( ( IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS ) / ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) / 2 )",
"MetricGroup": "DSBmiss, FetchBW",
"MetricName": "tma_mite_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline. For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
"MetricExpr": "100 * ( ( IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS ) / ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) / 2 )",
"MetricGroup": "DSB, FetchBW",
"MetricName": "tma_dsb_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
"MetricExpr": "100 * ( ( UOPS_ISSUED.ANY - ( UOPS_RETIRED.RETIRE_SLOTS ) + ( 4 ) * ( ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) if #SMT_on else INT_MISC.RECOVERY_CYCLES ) ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) )",
"MetricGroup": "TmaL1",
"MetricName": "tma_bad_speculation_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path.",
"MetricExpr": "100 * ( ( BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT ) ) * ( ( UOPS_ISSUED.ANY - ( UOPS_RETIRED.RETIRE_SLOTS ) + ( 4 ) * ( ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) if #SMT_on else INT_MISC.RECOVERY_CYCLES ) ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) )",
"MetricGroup": "BadSpec, BrMispredicts, TmaL2",
"MetricName": "tma_branch_mispredicts_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes.",
"MetricExpr": "100 * ( ( ( UOPS_ISSUED.ANY - ( UOPS_RETIRED.RETIRE_SLOTS ) + ( 4 ) * ( ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) if #SMT_on else INT_MISC.RECOVERY_CYCLES ) ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) - ( ( BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT ) ) * ( ( UOPS_ISSUED.ANY - ( UOPS_RETIRED.RETIRE_SLOTS ) + ( 4 ) * ( ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) if #SMT_on else INT_MISC.RECOVERY_CYCLES ) ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) ) )",
"MetricGroup": "BadSpec, MachineClears, TmaL2",
"MetricName": "tma_machine_clears_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
"MetricExpr": "100 * ( 1 - ( ( IDQ_UOPS_NOT_DELIVERED.CORE / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) + ( ( UOPS_ISSUED.ANY - ( UOPS_RETIRED.RETIRE_SLOTS ) + ( 4 ) * ( ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) if #SMT_on else INT_MISC.RECOVERY_CYCLES ) ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) + ( ( UOPS_RETIRED.RETIRE_SLOTS ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) ) )",
"MetricGroup": "TmaL1",
"MetricName": "tma_backend_bound_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).",
"MetricExpr": "100 * ( ( ( ( min( CPU_CLK_UNHALTED.THREAD , CYCLE_ACTIVITY.STALLS_LDM_PENDING ) ) + RESOURCE_STALLS.SB ) / ( ( ( min( CPU_CLK_UNHALTED.THREAD , CYCLE_ACTIVITY.CYCLES_NO_EXECUTE ) ) + ( cpu@UOPS_EXECUTED.CORE\\,cmask\\=0x1@ - ( cpu@UOPS_EXECUTED.CORE\\,cmask\\=0x3@ if ( ( INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD ) ) > 1.8 ) else cpu@UOPS_EXECUTED.CORE\\,cmask\\=0x2@ ) ) / 2 - ( RS_EVENTS.EMPTY_CYCLES if ( ( ( 4 ) * ( min( CPU_CLK_UNHALTED.THREAD , IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE ) ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) > 0.1 ) else 0 ) + RESOURCE_STALLS.SB ) if #SMT_on else ( ( min( CPU_CLK_UNHALTED.THREAD , CYCLE_ACTIVITY.CYCLES_NO_EXECUTE ) ) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=0x1@ - ( cpu@UOPS_EXECUTED.CORE\\,cmask\\=0x3@ if ( ( INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD ) ) > 1.8 ) else cpu@UOPS_EXECUTED.CORE\\,cmask\\=0x2@ ) - ( RS_EVENTS.EMPTY_CYCLES if ( ( ( 4 ) * ( min( CPU_CLK_UNHALTED.THREAD , IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE ) ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) > 0.1 ) else 0 ) + RESOURCE_STALLS.SB ) ) ) * ( 1 - ( ( IDQ_UOPS_NOT_DELIVERED.CORE / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) + ( ( UOPS_ISSUED.ANY - ( UOPS_RETIRED.RETIRE_SLOTS ) + ( 4 ) * ( ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) if #SMT_on else INT_MISC.RECOVERY_CYCLES ) ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) + ( ( UOPS_RETIRED.RETIRE_SLOTS ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) ) ) )",
"MetricGroup": "Backend, TmaL2",
"MetricName": "tma_memory_bound_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache. The L1 data cache typically has the shortest latency. However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache.",
"MetricExpr": "100 * ( max( ( ( min( CPU_CLK_UNHALTED.THREAD , CYCLE_ACTIVITY.STALLS_LDM_PENDING ) ) - CYCLE_ACTIVITY.STALLS_L1D_PENDING ) / ( CPU_CLK_UNHALTED.THREAD ) , 0 ) )",
"MetricGroup": "CacheMisses, MemoryBound, TmaL3mem",
"MetricName": "tma_l1_bound_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads. Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance.",
"MetricExpr": "100 * ( ( CYCLE_ACTIVITY.STALLS_L1D_PENDING - CYCLE_ACTIVITY.STALLS_L2_PENDING ) / ( CPU_CLK_UNHALTED.THREAD ) )",
"MetricGroup": "CacheMisses, MemoryBound, TmaL3mem",
"MetricName": "tma_l2_bound_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core. Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance.",
"MetricExpr": "100 * ( ( MEM_LOAD_UOPS_RETIRED.L3_HIT / ( MEM_LOAD_UOPS_RETIRED.L3_HIT + ( 7 ) * MEM_LOAD_UOPS_RETIRED.L3_MISS ) ) * CYCLE_ACTIVITY.STALLS_L2_PENDING / ( CPU_CLK_UNHALTED.THREAD ) )",
"MetricGroup": "CacheMisses, MemoryBound, TmaL3mem",
"MetricName": "tma_l3_bound_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads. Better caching can improve the latency and increase performance.",
"MetricExpr": "100 * ( min( ( ( 1 - ( MEM_LOAD_UOPS_RETIRED.L3_HIT / ( MEM_LOAD_UOPS_RETIRED.L3_HIT + ( 7 ) * MEM_LOAD_UOPS_RETIRED.L3_MISS ) ) ) * CYCLE_ACTIVITY.STALLS_L2_PENDING / ( CPU_CLK_UNHALTED.THREAD ) ) , ( 1 ) ) )",
"MetricGroup": "MemoryBound, TmaL3mem",
"MetricName": "tma_drabound_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write. Even though store accesses do not typically stall out-of-order CPUs; there are few cases where stores can lead to actual stalls. This metric will be flagged should RFO stores be a bottleneck.",
"MetricExpr": "100 * ( RESOURCE_STALLS.SB / ( CPU_CLK_UNHALTED.THREAD ) )",
"MetricGroup": "MemoryBound, TmaL3mem",
"MetricName": "tma_store_bound_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).",
"MetricExpr": "100 * ( ( 1 - ( ( IDQ_UOPS_NOT_DELIVERED.CORE / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) + ( ( UOPS_ISSUED.ANY - ( UOPS_RETIRED.RETIRE_SLOTS ) + ( 4 ) * ( ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) if #SMT_on else INT_MISC.RECOVERY_CYCLES ) ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) + ( ( UOPS_RETIRED.RETIRE_SLOTS ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) ) ) - ( ( ( ( min( CPU_CLK_UNHALTED.THREAD , CYCLE_ACTIVITY.STALLS_LDM_PENDING ) ) + RESOURCE_STALLS.SB ) / ( ( ( min( CPU_CLK_UNHALTED.THREAD , CYCLE_ACTIVITY.CYCLES_NO_EXECUTE ) ) + ( cpu@UOPS_EXECUTED.CORE\\,cmask\\=0x1@ - ( cpu@UOPS_EXECUTED.CORE\\,cmask\\=0x3@ if ( ( INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD ) ) > 1.8 ) else cpu@UOPS_EXECUTED.CORE\\,cmask\\=0x2@ ) ) / 2 - ( RS_EVENTS.EMPTY_CYCLES if ( ( ( 4 ) * ( min( CPU_CLK_UNHALTED.THREAD , IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE ) ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) > 0.1 ) else 0 ) + RESOURCE_STALLS.SB ) if #SMT_on else ( ( min( CPU_CLK_UNHALTED.THREAD , CYCLE_ACTIVITY.CYCLES_NO_EXECUTE ) ) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=0x1@ - ( cpu@UOPS_EXECUTED.CORE\\,cmask\\=0x3@ if ( ( INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD ) ) > 1.8 ) else cpu@UOPS_EXECUTED.CORE\\,cmask\\=0x2@ ) - ( RS_EVENTS.EMPTY_CYCLES if ( ( ( 4 ) * ( min( CPU_CLK_UNHALTED.THREAD , IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE ) ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) > 0.1 ) else 0 ) + RESOURCE_STALLS.SB ) ) ) * ( 1 - ( ( IDQ_UOPS_NOT_DELIVERED.CORE / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) + ( ( UOPS_ISSUED.ANY - ( UOPS_RETIRED.RETIRE_SLOTS ) + ( 4 ) * ( ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) if #SMT_on else INT_MISC.RECOVERY_CYCLES ) ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) + ( ( UOPS_RETIRED.RETIRE_SLOTS ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) ) ) ) )",
"MetricGroup": "Backend, TmaL2, Compute",
"MetricName": "tma_core_bound_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric represents fraction of cycles where the Divider unit was active. Divide and square root instructions are performed by the Divider unit and can take considerably longer latency than integer or Floating Point addition; subtraction; or multiplication.",
"MetricExpr": "100 * ( 10 * ARITH.DIVIDER_UOPS / ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) )",
"MetricGroup": "",
"MetricName": "tma_divider_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related). Two distinct categories can be attributed into this metric: (1) heavy data-dependency among contiguous instructions would manifest in this metric - such cases are often referred to as low Instruction Level Parallelism (ILP). (2) Contention on some hardware execution unit other than Divider. For example; when there are too many multiply operations.",
"MetricExpr": "100 * ( ( ( ( ( min( CPU_CLK_UNHALTED.THREAD , CYCLE_ACTIVITY.CYCLES_NO_EXECUTE ) ) + ( cpu@UOPS_EXECUTED.CORE\\,cmask\\=0x1@ - ( cpu@UOPS_EXECUTED.CORE\\,cmask\\=0x3@ if ( ( INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD ) ) > 1.8 ) else cpu@UOPS_EXECUTED.CORE\\,cmask\\=0x2@ ) ) / 2 - ( RS_EVENTS.EMPTY_CYCLES if ( ( ( 4 ) * ( min( CPU_CLK_UNHALTED.THREAD , IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE ) ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) > 0.1 ) else 0 ) + RESOURCE_STALLS.SB ) if #SMT_on else ( ( min( CPU_CLK_UNHALTED.THREAD , CYCLE_ACTIVITY.CYCLES_NO_EXECUTE ) ) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=0x1@ - ( cpu@UOPS_EXECUTED.CORE\\,cmask\\=0x3@ if ( ( INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD ) ) > 1.8 ) else cpu@UOPS_EXECUTED.CORE\\,cmask\\=0x2@ ) - ( RS_EVENTS.EMPTY_CYCLES if ( ( ( 4 ) * ( min( CPU_CLK_UNHALTED.THREAD , IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE ) ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) > 0.1 ) else 0 ) + RESOURCE_STALLS.SB ) ) - RESOURCE_STALLS.SB - ( min( CPU_CLK_UNHALTED.THREAD , CYCLE_ACTIVITY.STALLS_LDM_PENDING ) ) ) / ( CPU_CLK_UNHALTED.THREAD ) )",
"MetricGroup": "PortsUtil",
"MetricName": "tma_ports_utilization_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. ",
"MetricExpr": "100 * ( ( UOPS_RETIRED.RETIRE_SLOTS ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) )",
"MetricGroup": "TmaL1",
"MetricName": "tma_retiring_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved.",
"MetricExpr": "100 * ( ( ( UOPS_RETIRED.RETIRE_SLOTS ) / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) - ( ( ( ( UOPS_RETIRED.RETIRE_SLOTS ) / UOPS_ISSUED.ANY ) * IDQ.MS_UOPS / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) ) )",
"MetricGroup": "Retire, TmaL2",
"MetricName": "tma_light_operations_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or microcoded sequences. This highly-correlates with the uop length of these instructions/sequences.",
"MetricExpr": "100 * ( ( ( ( UOPS_RETIRED.RETIRE_SLOTS ) / UOPS_ISSUED.ANY ) * IDQ.MS_UOPS / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) ) )",
"MetricGroup": "Retire, TmaL2",
"MetricName": "tma_heavy_operations_percent",
"ScaleUnit": "1%"
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit. The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided.",
"MetricExpr": "100 * ( ( ( UOPS_RETIRED.RETIRE_SLOTS ) / UOPS_ISSUED.ANY ) * IDQ.MS_UOPS / ( ( 4 ) * ( ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else ( CPU_CLK_UNHALTED.THREAD ) ) ) )",
"MetricGroup": "MicroSeq",
"MetricName": "tma_microcode_sequencer_percent",
"ScaleUnit": "1%"
}
]

View File

@ -963,6 +963,15 @@
"UMask": "0x2",
"Unit": "CBO"
},
{
"BriefDescription": "TOR Inserts; Opcode Match",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.OPCODE",
"PerPkg": "1",
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "PCIe writes (partial cache line). Derived from unc_c_tor_inserts.opcode",
"Counter": "0,1,2,3",
@ -973,16 +982,6 @@
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "PCIe writes (partial cache line)",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.OPCODE",
"Filter": "filter_opc=0x180,filter_tid=0x3e",
"PerPkg": "1",
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "L2 demand and L2 prefetch code references to LLC. Derived from unc_c_tor_inserts.opcode",
"Counter": "0,1,2,3",
@ -994,17 +993,6 @@
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "L2 demand and L2 prefetch code references to LLC",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.OPCODE",
"Filter": "filter_opc=0x181",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "Streaming stores (full cache line). Derived from unc_c_tor_inserts.opcode",
"Counter": "0,1,2,3",
@ -1016,17 +1004,6 @@
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "Streaming stores (full cache line)",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.OPCODE",
"Filter": "filter_opc=0x18c",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "Streaming stores (partial cache line). Derived from unc_c_tor_inserts.opcode",
"Counter": "0,1,2,3",
@ -1038,17 +1015,6 @@
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "Streaming stores (partial cache line)",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.OPCODE",
"Filter": "filter_opc=0x18d",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "PCIe read current. Derived from unc_c_tor_inserts.opcode",
"Counter": "0,1,2,3",
@ -1060,17 +1026,6 @@
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "PCIe read current",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.OPCODE",
"Filter": "filter_opc=0x19e",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "PCIe write references (full cache line). Derived from unc_c_tor_inserts.opcode",
"Counter": "0,1,2,3",
@ -1082,17 +1037,6 @@
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "PCIe write references (full cache line)",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.OPCODE",
"Filter": "filter_opc=0x1c8,filter_tid=0x3e",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x1",
"Unit": "CBO"
},
{
"BriefDescription": "TOR Inserts; Evictions",
"Counter": "0,1,2,3",
@ -1121,21 +1065,19 @@
"Unit": "CBO"
},
{
"BriefDescription": "LLC misses - demand and prefetch data reads - excludes LLC prefetches. Derived from unc_c_tor_inserts.miss_opcode",
"BriefDescription": "TOR Inserts; Miss Opcode Match",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "LLC_MISSES.DATA_READ",
"Filter": "filter_opc=0x182",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "LLC misses - demand and prefetch data reads - excludes LLC prefetches",
"BriefDescription": "LLC misses - demand and prefetch data reads - excludes LLC prefetches. Derived from unc_c_tor_inserts.miss_opcode",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"EventName": "LLC_MISSES.DATA_READ",
"Filter": "filter_opc=0x182",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
@ -1153,17 +1095,6 @@
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "LLC misses - Uncacheable reads (from cpu) ",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "filter_opc=0x187",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "MMIO reads. Derived from unc_c_tor_inserts.miss_opcode",
"Counter": "0,1,2,3",
@ -1175,17 +1106,6 @@
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "MMIO reads",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "filter_opc=0x187,filter_nc=1",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "MMIO writes. Derived from unc_c_tor_inserts.miss_opcode",
"Counter": "0,1,2,3",
@ -1197,17 +1117,6 @@
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "MMIO writes",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "filter_opc=0x18f,filter_nc=1",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "LLC prefetch misses for RFO. Derived from unc_c_tor_inserts.miss_opcode",
"Counter": "0,1,2,3",
@ -1219,17 +1128,6 @@
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "LLC prefetch misses for RFO",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "filter_opc=0x190",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "LLC prefetch misses for code reads. Derived from unc_c_tor_inserts.miss_opcode",
"Counter": "0,1,2,3",
@ -1241,17 +1139,6 @@
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "LLC prefetch misses for code reads",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "filter_opc=0x191",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "LLC prefetch misses for data reads. Derived from unc_c_tor_inserts.miss_opcode",
"Counter": "0,1,2,3",
@ -1263,17 +1150,6 @@
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "LLC prefetch misses for data reads",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "filter_opc=0x192",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "LLC misses for PCIe read current. Derived from unc_c_tor_inserts.miss_opcode",
"Counter": "0,1,2,3",
@ -1285,17 +1161,6 @@
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "LLC misses for PCIe read current",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "filter_opc=0x19e",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "ItoM write misses (as part of fast string memcpy stores) + PCIe full line writes. Derived from unc_c_tor_inserts.miss_opcode",
"Counter": "0,1,2,3",
@ -1307,17 +1172,6 @@
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "ItoM write misses (as part of fast string memcpy stores) + PCIe full line writes",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "filter_opc=0x1c8",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "PCIe write misses (full cache line). Derived from unc_c_tor_inserts.miss_opcode",
"Counter": "0,1,2,3",
@ -1329,17 +1183,6 @@
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "PCIe write misses (full cache line)",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "filter_opc=0x1c8,filter_tid=0x3e",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "CBO"
},
{
"BriefDescription": "TOR Inserts; NID and Opcode Matched",
"Counter": "0,1,2,3",

View File

@ -486,6 +486,12 @@
"MetricGroup": "SoC",
"MetricName": "Socket_CLKS"
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
"MetricExpr": "cha_0@event\\=0x0@ / #num_dies / duration_time / 1000000000",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
{
"BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -6,5 +6,230 @@
"EventName": "UNC_P_CLOCKTICKS",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "UNC_P_CORE_TRANSITION_CYCLES",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x60",
"EventName": "UNC_P_CORE_TRANSITION_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "UNC_P_DEMOTIONS",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x30",
"EventName": "UNC_P_DEMOTIONS",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Phase Shed 0 Cycles",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x75",
"EventName": "UNC_P_FIVR_PS_PS0_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Phase Shed 1 Cycles",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x76",
"EventName": "UNC_P_FIVR_PS_PS1_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Phase Shed 2 Cycles",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x77",
"EventName": "UNC_P_FIVR_PS_PS2_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Phase Shed 3 Cycles",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x78",
"EventName": "UNC_P_FIVR_PS_PS3_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "AVX256 Frequency Clipping",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x49",
"EventName": "UNC_P_FREQ_CLIP_AVX256",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "AVX512 Frequency Clipping",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x4a",
"EventName": "UNC_P_FREQ_CLIP_AVX512",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Thermal Strongest Upper Limit Cycles",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x04",
"EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Power Strongest Upper Limit Cycles",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x05",
"EventName": "UNC_P_FREQ_MAX_POWER_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "IO P Limit Strongest Lower Limit Cycles",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x73",
"EventName": "UNC_P_FREQ_MIN_IO_P_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Cycles spent changing Frequency",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x74",
"EventName": "UNC_P_FREQ_TRANS_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Memory Phase Shedding Cycles",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x2F",
"EventName": "UNC_P_MEMORY_PHASE_SHEDDING_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Package C State Residency - C0",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x2A",
"EventName": "UNC_P_PKG_RESIDENCY_C0_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Package C State Residency - C2E",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x2B",
"EventName": "UNC_P_PKG_RESIDENCY_C2E_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Package C State Residency - C3",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x2C",
"EventName": "UNC_P_PKG_RESIDENCY_C3_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Package C State Residency - C6",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x2D",
"EventName": "UNC_P_PKG_RESIDENCY_C6_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "UNC_P_PMAX_THROTTLED_CYCLES",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x06",
"EventName": "UNC_P_PMAX_THROTTLED_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "External Prochot",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x0A",
"EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Internal Prochot",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x09",
"EventName": "UNC_P_PROCHOT_INTERNAL_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Total Core C State Transition Cycles",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x72",
"EventName": "UNC_P_TOTAL_TRANSITION_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "VR Hot",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x42",
"EventName": "UNC_P_VR_HOT_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Number of cores in C-State : C0 and C1",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x80",
"EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Number of cores in C-State : C3",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x80",
"EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Number of cores in C-State : C6 and C7",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x80",
"EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
"PerPkg": "1",
"Unit": "PCU"
}
]

View File

@ -354,6 +354,12 @@
"MetricGroup": "SoC",
"MetricName": "Socket_CLKS"
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
"MetricExpr": "cbox_0@event\\=0x0@ / #num_dies / duration_time / 1000000000",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
{
"BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",

View File

@ -20,7 +20,6 @@
"Counter": "0,1",
"EventCode": "0x34",
"EventName": "UNC_C_LLC_LOOKUP.ANY",
"Filter": "CBoFilter0[23:17]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set filter mask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:17] bits correspond to [M'FMESI] state.; Filters for any transaction originating from the IPQ or IRQ. This does not include lookups originating from the ISMQ.",
"UMask": "0x11",
@ -31,7 +30,6 @@
"Counter": "0,1",
"EventCode": "0x34",
"EventName": "UNC_C_LLC_LOOKUP.DATA_READ",
"Filter": "CBoFilter0[23:17]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set filter mask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:17] bits correspond to [M'FMESI] state.; Read transactions",
"UMask": "0x3",
@ -42,7 +40,6 @@
"Counter": "0,1",
"EventCode": "0x34",
"EventName": "UNC_C_LLC_LOOKUP.NID",
"Filter": "CBoFilter0[23:17]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set filter mask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:17] bits correspond to [M'FMESI] state.; Qualify one of the other subevents by the Target NID. The NID is programmed in Cn_MSR_PMON_BOX_FILTER.nid. In conjunction with STATE = I, it is possible to monitor misses to specific NIDs in the system.",
"UMask": "0x41",
@ -53,7 +50,6 @@
"Counter": "0,1",
"EventCode": "0x34",
"EventName": "UNC_C_LLC_LOOKUP.REMOTE_SNOOP",
"Filter": "CBoFilter0[23:17]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set filter mask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:17] bits correspond to [M'FMESI] state.; Filters for only snoop requests coming from the remote socket(s) through the IPQ.",
"UMask": "0x9",
@ -64,7 +60,6 @@
"Counter": "0,1",
"EventCode": "0x34",
"EventName": "UNC_C_LLC_LOOKUP.WRITE",
"Filter": "CBoFilter0[23:17]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set filter mask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:17] bits correspond to [M'FMESI] state.; Writeback transactions from L2 to the LLC This includes all write transactions -- both Cachable and UC.",
"UMask": "0x5",
@ -105,7 +100,6 @@
"Counter": "0,1",
"EventCode": "0x37",
"EventName": "UNC_C_LLC_VICTIMS.NID",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.; Qualify one of the other subevents by the Target NID. The NID is programmed in Cn_MSR_PMON_BOX_FILTER.nid. In conjunction with STATE = I, it is possible to monitor misses to specific NIDs in the system.",
"UMask": "0x40",
@ -1034,7 +1028,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.LOCAL_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; All transactions, satisifed by an opcode, inserted into the TOR that are satisifed by locally HOMed memory.",
"UMask": "0x21",
@ -1055,7 +1048,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; Miss transactions, satisifed by an opcode, inserted into the TOR that are satisifed by locally HOMed memory.",
"UMask": "0x23",
@ -1066,7 +1058,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; Miss transactions inserted into the TOR that match an opcode.",
"UMask": "0x3",
@ -1087,7 +1078,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; Miss transactions, satisifed by an opcode, inserted into the TOR that are satisifed by remote caches or remote memory.",
"UMask": "0x83",
@ -1098,7 +1088,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.NID_ALL",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; All NID matched (matches an RTID destination) transactions inserted into the TOR. The NID is programmed in Cn_MSR_PMON_BOX_FILTER.nid. In conjunction with STATE = I, it is possible to monitor misses to specific NIDs in the system.",
"UMask": "0x48",
@ -1109,7 +1098,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.NID_EVICTION",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; NID matched eviction transactions inserted into the TOR.",
"UMask": "0x44",
@ -1120,7 +1108,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.NID_MISS_ALL",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; All NID matched miss requests that were inserted into the TOR.",
"UMask": "0x4A",
@ -1131,7 +1118,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.NID_MISS_OPCODE",
"Filter": "CBoFilter1[28:20], CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; Miss transactions inserted into the TOR that match a NID and an opcode.",
"UMask": "0x43",
@ -1142,7 +1128,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.NID_OPCODE",
"Filter": "CBoFilter1[28:20], CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; Transactions inserted into the TOR that match a NID and an opcode.",
"UMask": "0x41",
@ -1153,7 +1138,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.NID_WB",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; NID matched write transactions inserted into the TOR.",
"UMask": "0x50",
@ -1164,7 +1148,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; Transactions inserted into the TOR that match an opcode (matched by Cn_MSR_PMON_BOX_FILTER.opc)",
"UMask": "0x1",
@ -1185,7 +1168,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.REMOTE_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).; All transactions, satisifed by an opcode, inserted into the TOR that are satisifed by remote caches or remote memory.",
"UMask": "0x81",
@ -1232,7 +1214,6 @@
"BriefDescription": "TOR Occupancy; Local Memory - Opcode Matched",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.LOCAL_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); Number of outstanding transactions, satisifed by an opcode, in the TOR that are satisifed by locally HOMed memory.",
"UMask": "0x21",
@ -1260,7 +1241,6 @@
"BriefDescription": "TOR Occupancy; Misses to Local Memory - Opcode Matched",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); Number of outstanding Miss transactions, satisifed by an opcode, in the TOR that are satisifed by locally HOMed memory.",
"UMask": "0x23",
@ -1270,7 +1250,6 @@
"BriefDescription": "TOR Occupancy; Miss Opcode Match",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.MISS_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); TOR entries for miss transactions that match an opcode. This generally means that the request was sent to memory or MMIO.",
"UMask": "0x3",
@ -1289,7 +1268,6 @@
"BriefDescription": "TOR Occupancy; Misses to Remote Memory - Opcode Matched",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.MISS_REMOTE_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); Number of outstanding Miss transactions, satisifed by an opcode, in the TOR that are satisifed by remote caches or remote memory.",
"UMask": "0x83",
@ -1299,7 +1277,6 @@
"BriefDescription": "TOR Occupancy; NID Matched",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.NID_ALL",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); Number of NID matched outstanding requests in the TOR. The NID is programmed in Cn_MSR_PMON_BOX_FILTER.nid.In conjunction with STATE = I, it is possible to monitor misses to specific NIDs in the system.",
"UMask": "0x48",
@ -1309,7 +1286,6 @@
"BriefDescription": "TOR Occupancy; NID Matched Evictions",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.NID_EVICTION",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); Number of outstanding NID matched eviction transactions in the TOR .",
"UMask": "0x44",
@ -1319,7 +1295,6 @@
"BriefDescription": "TOR Occupancy; NID Matched",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.NID_MISS_ALL",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); Number of outstanding Miss requests in the TOR that match a NID.",
"UMask": "0x4A",
@ -1329,7 +1304,6 @@
"BriefDescription": "TOR Occupancy; NID and Opcode Matched Miss",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.NID_MISS_OPCODE",
"Filter": "CBoFilter1[28:20], CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); Number of outstanding Miss requests in the TOR that match a NID and an opcode.",
"UMask": "0x43",
@ -1339,7 +1313,6 @@
"BriefDescription": "TOR Occupancy; NID and Opcode Matched",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.NID_OPCODE",
"Filter": "CBoFilter1[28:20], CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); TOR entries that match a NID and an opcode.",
"UMask": "0x41",
@ -1349,7 +1322,6 @@
"BriefDescription": "TOR Occupancy; NID Matched Writebacks",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.NID_WB",
"Filter": "CBoFilter1[15:0]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); NID matched write transactions int the TOR.",
"UMask": "0x50",
@ -1359,7 +1331,6 @@
"BriefDescription": "TOR Occupancy; Opcode Match",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); TOR entries that match an opcode (matched by Cn_MSR_PMON_BOX_FILTER.opc).",
"UMask": "0x1",
@ -1378,7 +1349,6 @@
"BriefDescription": "TOR Occupancy; Remote Memory - Opcode Matched",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.REMOTE_OPCODE",
"Filter": "CBoFilter1[28:20]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select MISS_OPC_MATCH and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182); Number of outstanding transactions, satisifed by an opcode, in the TOR that are satisifed by remote caches or remote memory.",
"UMask": "0x81",
@ -1520,66 +1490,6 @@
"UMask": "0x8",
"Unit": "CBO"
},
{
"BriefDescription": "QPI Address/Opcode Match; AD Opcodes",
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "UNC_H_ADDR_OPC_MATCH.AD",
"Filter": "HA_OpcodeMatch[5:0]",
"PerPkg": "1",
"UMask": "0x4",
"Unit": "HA"
},
{
"BriefDescription": "QPI Address/Opcode Match; Address",
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "UNC_H_ADDR_OPC_MATCH.ADDR",
"Filter": "HA_AddrMatch0[31:6], HA_AddrMatch1[13:0]",
"PerPkg": "1",
"UMask": "0x1",
"Unit": "HA"
},
{
"BriefDescription": "QPI Address/Opcode Match; AK Opcodes",
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "UNC_H_ADDR_OPC_MATCH.AK",
"Filter": "HA_OpcodeMatch[5:0]",
"PerPkg": "1",
"UMask": "0x10",
"Unit": "HA"
},
{
"BriefDescription": "QPI Address/Opcode Match; BL Opcodes",
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "UNC_H_ADDR_OPC_MATCH.BL",
"Filter": "HA_OpcodeMatch[5:0]",
"PerPkg": "1",
"UMask": "0x8",
"Unit": "HA"
},
{
"BriefDescription": "QPI Address/Opcode Match; Address & Opcode Match",
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "UNC_H_ADDR_OPC_MATCH.FILT",
"Filter": "HA_AddrMatch0[31:6], HA_AddrMatch1[13:0], HA_OpcodeMatch[5:0]",
"PerPkg": "1",
"UMask": "0x3",
"Unit": "HA"
},
{
"BriefDescription": "QPI Address/Opcode Match; Opcode",
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "UNC_H_ADDR_OPC_MATCH.OPC",
"Filter": "HA_OpcodeMatch[5:0]",
"PerPkg": "1",
"UMask": "0x2",
"Unit": "HA"
},
{
"BriefDescription": "BT Bypass",
"Counter": "0,1,2,3",

View File

@ -14,7 +14,6 @@
"EventCode": "0x38",
"EventName": "UNC_Q_CTO_COUNT",
"ExtSel": "1",
"Filter": "QPIMask0[17:0],QPIMatch0[17:0],QPIMask1[19:16],QPIMatch1[19:16]",
"PerPkg": "1",
"PublicDescription": "Counts the number of CTO (cluster trigger outs) events that were asserted across the two slots. If both slots trigger in a given cycle, the event will increment by 2. You can use edge detect to count the number of cases when both events triggered.",
"Unit": "QPI LL"

View File

@ -247,17 +247,6 @@
"UMask": "0x2",
"Unit": "IRP"
},
{
"BriefDescription": "Inbound Transaction Count; Select Source",
"Counter": "0,1",
"EventCode": "0x15",
"EventName": "UNC_I_TRANSACTIONS.ORDERINGQ",
"Filter": "IRPFilter[4:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of Inbound transactions from the IRP to the Uncore. This can be filtered based on request type in addition to the source queue. Note the special filtering equation. We do OR-reduction on the request type. If the SOURCE bit is set, then we also do AND qualification based on the source portID.; Tracks only those requests that come from the port specified in the IRP_PmonFilter.OrderingQ register. This register allows one to select one specific queue. It is not possible to monitor multiple queues at a time. If this bit is not set, then requests from all sources will be counted.",
"UMask": "0x8",
"Unit": "IRP"
},
{
"BriefDescription": "Inbound Transaction Count: Read Prefetches",
"Counter": "0,1",
@ -2274,7 +2263,6 @@
"Counter": "0,1",
"EventCode": "0x41",
"EventName": "UNC_U_FILTER_MATCH.ENABLE",
"Filter": "UBoxFilter[3:0]",
"PerPkg": "1",
"PublicDescription": "Filter match per thread (w/ or w/o Filter Enable). Specify the thread to filter on using NCUPMONCTRLGLCTR.ThreadID.",
"UMask": "0x1",
@ -2295,7 +2283,6 @@
"Counter": "0,1",
"EventCode": "0x41",
"EventName": "UNC_U_FILTER_MATCH.U2C_ENABLE",
"Filter": "UBoxFilter[3:0]",
"PerPkg": "1",
"PublicDescription": "Filter match per thread (w/ or w/o Filter Enable). Specify the thread to filter on using NCUPMONCTRLGLCTR.ThreadID.",
"UMask": "0x4",

View File

@ -297,7 +297,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x1e",
"EventName": "UNC_P_DEMOTIONS_CORE0",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -307,7 +306,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x1f",
"EventName": "UNC_P_DEMOTIONS_CORE1",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -317,7 +315,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x42",
"EventName": "UNC_P_DEMOTIONS_CORE10",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -327,7 +324,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x43",
"EventName": "UNC_P_DEMOTIONS_CORE11",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -337,7 +333,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x44",
"EventName": "UNC_P_DEMOTIONS_CORE12",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -347,7 +342,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x45",
"EventName": "UNC_P_DEMOTIONS_CORE13",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -357,7 +351,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x46",
"EventName": "UNC_P_DEMOTIONS_CORE14",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -367,7 +360,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "UNC_P_DEMOTIONS_CORE2",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -377,7 +369,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "UNC_P_DEMOTIONS_CORE3",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -387,7 +378,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x22",
"EventName": "UNC_P_DEMOTIONS_CORE4",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -397,7 +387,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x23",
"EventName": "UNC_P_DEMOTIONS_CORE5",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -407,7 +396,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "UNC_P_DEMOTIONS_CORE6",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -417,7 +405,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x25",
"EventName": "UNC_P_DEMOTIONS_CORE7",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -427,7 +414,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "UNC_P_DEMOTIONS_CORE8",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -437,7 +423,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x41",
"EventName": "UNC_P_DEMOTIONS_CORE9",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -447,7 +432,6 @@
"Counter": "0,1,2,3",
"EventCode": "0xb",
"EventName": "UNC_P_FREQ_BAND0_CYCLES",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. One can use all four counters with this event, so it is possible to track up to 4 configurable bands. One can use edge detect in conjunction with this event to track the number of times that we transitioned into a frequency greater than or equal to the configurable frequency. One can also use inversion to track cycles when we were less than the configured frequency.",
"Unit": "PCU"
@ -457,7 +441,6 @@
"Counter": "0,1,2,3",
"EventCode": "0xc",
"EventName": "UNC_P_FREQ_BAND1_CYCLES",
"Filter": "PCUFilter[15:8]",
"PerPkg": "1",
"PublicDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. One can use all four counters with this event, so it is possible to track up to 4 configurable bands. One can use edge detect in conjunction with this event to track the number of times that we transitioned into a frequency greater than or equal to the configurable frequency. One can also use inversion to track cycles when we were less than the configured frequency.",
"Unit": "PCU"
@ -467,7 +450,6 @@
"Counter": "0,1,2,3",
"EventCode": "0xd",
"EventName": "UNC_P_FREQ_BAND2_CYCLES",
"Filter": "PCUFilter[23:16]",
"PerPkg": "1",
"PublicDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. One can use all four counters with this event, so it is possible to track up to 4 configurable bands. One can use edge detect in conjunction with this event to track the number of times that we transitioned into a frequency greater than or equal to the configurable frequency. One can also use inversion to track cycles when we were less than the configured frequency.",
"Unit": "PCU"
@ -477,7 +459,6 @@
"Counter": "0,1,2,3",
"EventCode": "0xe",
"EventName": "UNC_P_FREQ_BAND3_CYCLES",
"Filter": "PCUFilter[31:24]",
"PerPkg": "1",
"PublicDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. One can use all four counters with this event, so it is possible to track up to 4 configurable bands. One can use edge detect in conjunction with this event to track the number of times that we transitioned into a frequency greater than or equal to the configurable frequency. One can also use inversion to track cycles when we were less than the configured frequency.",
"Unit": "PCU"

View File

@ -208,6 +208,12 @@
"MetricGroup": "SoC",
"MetricName": "Socket_CLKS"
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
"MetricExpr": "cbox_0@event\\=0x0@ / #num_dies / duration_time / 1000000000",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
{
"BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",

View File

@ -20,7 +20,6 @@
"Counter": "0,1",
"EventCode": "0x34",
"EventName": "UNC_C_LLC_LOOKUP.DATA_READ",
"Filter": "CBoFilter[22:18]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set filter mask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:18] bits correspond to [FMESI] state.",
"UMask": "0x3",
@ -31,7 +30,6 @@
"Counter": "0,1",
"EventCode": "0x34",
"EventName": "UNC_C_LLC_LOOKUP.NID",
"Filter": "CBoFilter[22:18], CBoFilter[17:10]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set filter mask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:18] bits correspond to [FMESI] state.",
"UMask": "0x41",
@ -42,7 +40,6 @@
"Counter": "0,1",
"EventCode": "0x34",
"EventName": "UNC_C_LLC_LOOKUP.REMOTE_SNOOP",
"Filter": "CBoFilter[22:18]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set filter mask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:18] bits correspond to [FMESI] state.",
"UMask": "0x9",
@ -53,7 +50,6 @@
"Counter": "0,1",
"EventCode": "0x34",
"EventName": "UNC_C_LLC_LOOKUP.WRITE",
"Filter": "CBoFilter[22:18]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2. This has numerous filters available. Note the non-standard filtering equation. This event will count requests that lookup the cache multiple times with multiple increments. One must ALWAYS set filter mask bit 0 and select a state or states to match. Otherwise, the event will count nothing. CBoGlCtrl[22:18] bits correspond to [FMESI] state.",
"UMask": "0x5",
@ -94,7 +90,6 @@
"Counter": "0,1",
"EventCode": "0x37",
"EventName": "UNC_C_LLC_VICTIMS.NID",
"Filter": "CBoFilter[17:10]",
"PerPkg": "1",
"PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.",
"UMask": "0x40",
@ -613,7 +608,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.MISS_OPCODE",
"Filter": "CBoFilter[31:23]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select 'MISS_OPC_MATCH' and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).",
"UMask": "0x3",
@ -624,7 +618,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.NID_ALL",
"Filter": "CBoFilter[17:10]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select 'MISS_OPC_MATCH' and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).",
"UMask": "0x48",
@ -635,7 +628,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.NID_EVICTION",
"Filter": "CBoFilter[17:10]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select 'MISS_OPC_MATCH' and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).",
"UMask": "0x44",
@ -646,7 +638,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.NID_MISS_ALL",
"Filter": "CBoFilter[17:10]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select 'MISS_OPC_MATCH' and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).",
"UMask": "0x4a",
@ -657,7 +648,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.NID_MISS_OPCODE",
"Filter": "CBoFilter[31:23], CBoFilter[17:10]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select 'MISS_OPC_MATCH' and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).",
"UMask": "0x43",
@ -668,7 +658,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.NID_OPCODE",
"Filter": "CBoFilter[31:23], CBoFilter[17:10]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select 'MISS_OPC_MATCH' and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).",
"UMask": "0x41",
@ -679,7 +668,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.NID_WB",
"Filter": "CBoFilter[17:10]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select 'MISS_OPC_MATCH' and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).",
"UMask": "0x50",
@ -690,7 +678,6 @@
"Counter": "0,1",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.OPCODE",
"Filter": "CBoFilter[31:23]",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries successfuly inserted into the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select 'MISS_OPC_MATCH' and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182).",
"UMask": "0x1",
@ -737,7 +724,6 @@
"BriefDescription": "TOR Occupancy; Miss Opcode Match",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.MISS_OPCODE",
"Filter": "CBoFilter[31:23]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select 'MISS_OPC_MATCH' and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182)",
"UMask": "0x3",
@ -747,7 +733,6 @@
"BriefDescription": "TOR Occupancy; NID Matched",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.NID_ALL",
"Filter": "CBoFilter[17:10]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select 'MISS_OPC_MATCH' and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182)",
"UMask": "0x48",
@ -757,7 +742,6 @@
"BriefDescription": "TOR Occupancy; NID Matched Evictions",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.NID_EVICTION",
"Filter": "CBoFilter[17:10]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select 'MISS_OPC_MATCH' and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182)",
"UMask": "0x44",
@ -767,7 +751,6 @@
"BriefDescription": "TOR Occupancy; NID Matched",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.NID_MISS_ALL",
"Filter": "CBoFilter[17:10]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select 'MISS_OPC_MATCH' and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182)",
"UMask": "0x4a",
@ -777,7 +760,6 @@
"BriefDescription": "TOR Occupancy; NID and Opcode Matched Miss",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.NID_MISS_OPCODE",
"Filter": "CBoFilter[31:23], CBoFilter[17:10]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select 'MISS_OPC_MATCH' and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182)",
"UMask": "0x43",
@ -787,7 +769,6 @@
"BriefDescription": "TOR Occupancy; NID and Opcode Matched",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.NID_OPCODE",
"Filter": "CBoFilter[31:23], CBoFilter[17:10]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select 'MISS_OPC_MATCH' and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182)",
"UMask": "0x41",
@ -797,7 +778,6 @@
"BriefDescription": "TOR Occupancy; Opcode Match",
"EventCode": "0x36",
"EventName": "UNC_C_TOR_OCCUPANCY.OPCODE",
"Filter": "CBoFilter[31:23]",
"PerPkg": "1",
"PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. There are a number of subevent 'filters' but only a subset of the subevent combinations are valid. Subevents that require an opcode or NID match require the Cn_MSR_PMON_BOX_FILTER.{opc, nid} field to be set. If, for example, one wanted to count DRD Local Misses, one should select 'MISS_OPC_MATCH' and set Cn_MSR_PMON_BOX_FILTER.opc to DRD (0x182)",
"UMask": "0x1",
@ -893,16 +873,6 @@
"UMask": "0x4",
"Unit": "CBO"
},
{
"BriefDescription": "QPI Address/Opcode Match; Address & Opcode Match",
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "UNC_H_ADDR_OPC_MATCH.FILT",
"Filter": "HA_AddrMatch0[31:6], HA_AddrMatch1[13:0], HA_OpcodeMatch[5:0]",
"PerPkg": "1",
"UMask": "0x3",
"Unit": "HA"
},
{
"BriefDescription": "HA to iMC Bypass; Not Taken",
"Counter": "0,1,2,3",

View File

@ -247,17 +247,6 @@
"UMask": "0x2",
"Unit": "IRP"
},
{
"BriefDescription": "Inbound Transaction Count; Select Source",
"Counter": "0,1",
"EventCode": "0x15",
"EventName": "UNC_I_TRANSACTIONS.ORDERINGQ",
"Filter": "IRPFilter[4:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of 'Inbound' transactions from the IRP to the Uncore. This can be filtered based on request type in addition to the source queue. Note the special filtering equation. We do OR-reduction on the request type. If the SOURCE bit is set, then we also do AND qualification based on the source portID.",
"UMask": "0x8",
"Unit": "IRP"
},
{
"BriefDescription": "Inbound Transaction Count; Read Prefetches",
"Counter": "0,1",
@ -1378,7 +1367,6 @@
"Counter": "0,1",
"EventCode": "0x41",
"EventName": "UNC_U_FILTER_MATCH.ENABLE",
"Filter": "UBoxFilter[3:0]",
"PerPkg": "1",
"PublicDescription": "Filter match per thread (w/ or w/o Filter Enable). Specify the thread to filter on using NCUPMONCTRLGLCTR.ThreadID.",
"UMask": "0x1",
@ -1399,7 +1387,6 @@
"Counter": "0,1",
"EventCode": "0x41",
"EventName": "UNC_U_FILTER_MATCH.U2C_ENABLE",
"Filter": "UBoxFilter[3:0]",
"PerPkg": "1",
"PublicDescription": "Filter match per thread (w/ or w/o Filter Enable). Specify the thread to filter on using NCUPMONCTRLGLCTR.ThreadID.",
"UMask": "0x4",

View File

@ -92,7 +92,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x1e",
"EventName": "UNC_P_DEMOTIONS_CORE0",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -102,7 +101,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x1f",
"EventName": "UNC_P_DEMOTIONS_CORE1",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -121,7 +119,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "UNC_P_DEMOTIONS_CORE3",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -131,7 +128,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x22",
"EventName": "UNC_P_DEMOTIONS_CORE4",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -141,7 +137,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x23",
"EventName": "UNC_P_DEMOTIONS_CORE5",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -151,7 +146,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "UNC_P_DEMOTIONS_CORE6",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -161,7 +155,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x25",
"EventName": "UNC_P_DEMOTIONS_CORE7",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of times when a configurable cores had a C-state demotion",
"Unit": "PCU"
@ -171,7 +164,6 @@
"Counter": "0,1,2,3",
"EventCode": "0xb",
"EventName": "UNC_P_FREQ_BAND0_CYCLES",
"Filter": "PCUFilter[7:0]",
"PerPkg": "1",
"PublicDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. One can use all four counters with this event, so it is possible to track up to 4 configurable bands. One can use edge detect in conjunction with this event to track the number of times that we transitioned into a frequency greater than or equal to the configurable frequency. One can also use inversion to track cycles when we were less than the configured frequency.",
"Unit": "PCU"
@ -181,7 +173,6 @@
"Counter": "0,1,2,3",
"EventCode": "0xc",
"EventName": "UNC_P_FREQ_BAND1_CYCLES",
"Filter": "PCUFilter[15:8]",
"PerPkg": "1",
"PublicDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. One can use all four counters with this event, so it is possible to track up to 4 configurable bands. One can use edge detect in conjunction with this event to track the number of times that we transitioned into a frequency greater than or equal to the configurable frequency. One can also use inversion to track cycles when we were less than the configured frequency.",
"Unit": "PCU"
@ -191,7 +182,6 @@
"Counter": "0,1,2,3",
"EventCode": "0xd",
"EventName": "UNC_P_FREQ_BAND2_CYCLES",
"Filter": "PCUFilter[23:16]",
"PerPkg": "1",
"PublicDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. One can use all four counters with this event, so it is possible to track up to 4 configurable bands. One can use edge detect in conjunction with this event to track the number of times that we transitioned into a frequency greater than or equal to the configurable frequency. One can also use inversion to track cycles when we were less than the configured frequency.",
"Unit": "PCU"
@ -201,7 +191,6 @@
"Counter": "0,1,2,3",
"EventCode": "0xe",
"EventName": "UNC_P_FREQ_BAND3_CYCLES",
"Filter": "PCUFilter[31:24]",
"PerPkg": "1",
"PublicDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. One can use all four counters with this event, so it is possible to track up to 4 configurable bands. One can use edge detect in conjunction with this event to track the number of times that we transitioned into a frequency greater than or equal to the configurable frequency. One can also use inversion to track cycles when we were less than the configured frequency.",
"Unit": "PCU"

View File

@ -1,4 +1,67 @@
[
{
"BriefDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent -IPQ",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.IPQ_HIT",
"PerPkg": "1",
"UMask": "0x18",
"Unit": "CHA"
},
{
"BriefDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent -IPQ",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.IPQ_MISS",
"PerPkg": "1",
"UMask": "0x28",
"Unit": "CHA"
},
{
"BriefDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent -IRQ",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.IRQ_HIT",
"PerPkg": "1",
"UMask": "0x11",
"Unit": "CHA"
},
{
"BriefDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent -IRQ ",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.IRQ_MISS",
"PerPkg": "1",
"UMask": "0x21",
"Unit": "CHA"
},
{
"BriefDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent -IRQ or PRQ",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.LOC_ALL",
"PerPkg": "1",
"UMask": "0x37",
"Unit": "CHA"
},
{
"BriefDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent -PRQ",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.PRQ_HIT",
"PerPkg": "1",
"UMask": "0x14",
"Unit": "CHA"
},
{
"BriefDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent -PRQ",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_C_TOR_INSERTS.PRQ_MISS",
"PerPkg": "1",
"UMask": "0x24",
"Unit": "CHA"
},
{
"BriefDescription": "Counts the number of read requests and streaming stores that hit in MCDRAM cache and the data in MCDRAM is clean with respect to DDR. This event is only valid in cache and hybrid memory mode.",
"Counter": "0,1,2,3",
@ -3497,6 +3560,156 @@
"UMask": "0x08",
"Unit": "CHA"
},
{
"BriefDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent -SF/LLC Evictions",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_H_TOR_INSERTS.EVICT",
"PerPkg": "1",
"UMask": "0x32",
"Unit": "CHA"
},
{
"BriefDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent -Hit (Not a Miss)",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_H_TOR_INSERTS.HIT",
"PerPkg": "1",
"UMask": "0x1F",
"Unit": "CHA"
},
{
"BriefDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent -IPQ",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_H_TOR_INSERTS.IPQ",
"PerPkg": "1",
"UMask": "0x38",
"Unit": "CHA"
},
{
"BriefDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent -IRQ",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_H_TOR_INSERTS.IRQ",
"PerPkg": "1",
"UMask": "0x31",
"Unit": "CHA"
},
{
"BriefDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent -Miss",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_H_TOR_INSERTS.MISS",
"PerPkg": "1",
"UMask": "0x2F",
"Unit": "CHA"
},
{
"BriefDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent -PRQ",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_H_TOR_INSERTS.PRQ",
"PerPkg": "1",
"UMask": "0x34",
"Unit": "CHA"
},
{
"BriefDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent -SF/LLC Evictions",
"EventCode": "0x36",
"EventName": "UNC_H_TOR_OCCUPANCY.EVICT",
"PerPkg": "1",
"UMask": "0x32",
"Unit": "CHA"
},
{
"BriefDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent -Hit (Not a Miss)",
"EventCode": "0x36",
"EventName": "UNC_H_TOR_OCCUPANCY.HIT",
"PerPkg": "1",
"UMask": "0x1F",
"Unit": "CHA"
},
{
"BriefDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent -IPQ",
"EventCode": "0x36",
"EventName": "UNC_H_TOR_OCCUPANCY.IPQ",
"PerPkg": "1",
"UMask": "0x38",
"Unit": "CHA"
},
{
"BriefDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent -IPQ hit",
"EventCode": "0x36",
"EventName": "UNC_H_TOR_OCCUPANCY.IPQ_HIT",
"PerPkg": "1",
"UMask": "0x18",
"Unit": "CHA"
},
{
"BriefDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent -IPQ miss",
"EventCode": "0x36",
"EventName": "UNC_H_TOR_OCCUPANCY.IPQ_MISS",
"PerPkg": "1",
"UMask": "0x28",
"Unit": "CHA"
},
{
"BriefDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent -IRQ or PRQ",
"EventCode": "0x36",
"EventName": "UNC_H_TOR_OCCUPANCY.IRQ",
"PerPkg": "1",
"UMask": "0x31",
"Unit": "CHA"
},
{
"BriefDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent -IRQ or PRQ hit",
"EventCode": "0x36",
"EventName": "UNC_H_TOR_OCCUPANCY.IRQ_HIT",
"PerPkg": "1",
"UMask": "0x11",
"Unit": "CHA"
},
{
"BriefDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent -IRQ or PRQ miss",
"EventCode": "0x36",
"EventName": "UNC_H_TOR_OCCUPANCY.IRQ_MISS",
"PerPkg": "1",
"UMask": "0x21",
"Unit": "CHA"
},
{
"BriefDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent -Miss",
"EventCode": "0x36",
"EventName": "UNC_H_TOR_OCCUPANCY.MISS",
"PerPkg": "1",
"UMask": "0x2F",
"Unit": "CHA"
},
{
"BriefDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent -PRQ",
"EventCode": "0x36",
"EventName": "UNC_H_TOR_OCCUPANCY.PRQ",
"PerPkg": "1",
"UMask": "0x34",
"Unit": "CHA"
},
{
"BriefDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent -PRQ hit",
"EventCode": "0x36",
"EventName": "UNC_H_TOR_OCCUPANCY.PRQ_HIT",
"PerPkg": "1",
"UMask": "0x14",
"Unit": "CHA"
},
{
"BriefDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent -PRQ miss",
"EventCode": "0x36",
"EventName": "UNC_H_TOR_OCCUPANCY.PRQ_MISS",
"PerPkg": "1",
"UMask": "0x24",
"Unit": "CHA"
},
{
"BriefDescription": "Uncore Clocks",
"Counter": "0,1,2,3",

View File

@ -491,6 +491,12 @@
"MetricGroup": "SoC",
"MetricName": "Socket_CLKS"
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
"MetricExpr": "uncore_cha_0@event\\=0x1@ / #num_dies / duration_time / 1000000000",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
{
"BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",

View File

@ -697,6 +697,12 @@
"MetricGroup": "SoC",
"MetricName": "Socket_CLKS"
},
{
"BriefDescription": "Uncore frequency per die [GHZ]",
"MetricExpr": "cha_0@event\\=0x0@ / #num_dies / duration_time / 1000000000",
"MetricGroup": "SoC",
"MetricName": "UNCORE_FREQ"
},
{
"BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,201 @@
[
{
"BriefDescription": "pclk Cycles",
"Counter": "0,1,2,3",
"EventName": "UNC_P_CLOCKTICKS",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "UNC_P_CORE_TRANSITION_CYCLES",
"Counter": "0,1,2,3",
"EventCode": "0x60",
"EventName": "UNC_P_CORE_TRANSITION_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "UNC_P_DEMOTIONS",
"Counter": "0,1,2,3",
"EventCode": "0x30",
"EventName": "UNC_P_DEMOTIONS",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Phase Shed 0 Cycles",
"Counter": "0,1,2,3",
"EventCode": "0x75",
"EventName": "UNC_P_FIVR_PS_PS0_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Phase Shed 1 Cycles",
"Counter": "0,1,2,3",
"EventCode": "0x76",
"EventName": "UNC_P_FIVR_PS_PS1_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Phase Shed 2 Cycles",
"Counter": "0,1,2,3",
"EventCode": "0x77",
"EventName": "UNC_P_FIVR_PS_PS2_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Phase Shed 3 Cycles",
"Counter": "0,1,2,3",
"EventCode": "0x78",
"EventName": "UNC_P_FIVR_PS_PS3_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Thermal Strongest Upper Limit Cycles",
"Counter": "0,1,2,3",
"EventCode": "0x4",
"EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Power Strongest Upper Limit Cycles",
"Counter": "0,1,2,3",
"EventCode": "0x5",
"EventName": "UNC_P_FREQ_MAX_POWER_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "IO P Limit Strongest Lower Limit Cycles",
"Counter": "0,1,2,3",
"EventCode": "0x73",
"EventName": "UNC_P_FREQ_MIN_IO_P_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Cycles spent changing Frequency",
"Counter": "0,1,2,3",
"EventCode": "0x74",
"EventName": "UNC_P_FREQ_TRANS_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "UNC_P_MCP_PROCHOT_CYCLES",
"Counter": "0,1,2,3",
"EventCode": "0x6",
"EventName": "UNC_P_MCP_PROCHOT_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Memory Phase Shedding Cycles",
"Counter": "0,1,2,3",
"EventCode": "0x2F",
"EventName": "UNC_P_MEMORY_PHASE_SHEDDING_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Package C State Residency - C0",
"Counter": "0,1,2,3",
"EventCode": "0x2A",
"EventName": "UNC_P_PKG_RESIDENCY_C0_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Package C State Residency - C2E",
"Counter": "0,1,2,3",
"EventCode": "0x2B",
"EventName": "UNC_P_PKG_RESIDENCY_C2E_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Package C State Residency - C3",
"Counter": "0,1,2,3",
"EventCode": "0x2C",
"EventName": "UNC_P_PKG_RESIDENCY_C3_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Package C State Residency - C6",
"Counter": "0,1,2,3",
"EventCode": "0x2D",
"EventName": "UNC_P_PKG_RESIDENCY_C6_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "UNC_P_PMAX_THROTTLED_CYCLES",
"Counter": "0,1,2,3",
"EventCode": "0x7",
"EventName": "UNC_P_PMAX_THROTTLED_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Number of cores in C-State; C0 and C1",
"Counter": "0,1,2,3",
"EventCode": "0x80",
"EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Number of cores in C-State; C3",
"Counter": "0,1,2,3",
"EventCode": "0x80",
"EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Number of cores in C-State; C6 and C7",
"Counter": "0,1,2,3",
"EventCode": "0x80",
"EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "External Prochot",
"Counter": "0,1,2,3",
"EventCode": "0xA",
"EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Internal Prochot",
"Counter": "0,1,2,3",
"EventCode": "0x9",
"EventName": "UNC_P_PROCHOT_INTERNAL_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "Total Core C State Transition Cycles",
"Counter": "0,1,2,3",
"EventCode": "0x72",
"EventName": "UNC_P_TOTAL_TRANSITION_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
},
{
"BriefDescription": "VR Hot",
"Counter": "0,1,2,3",
"EventCode": "0x42",
"EventName": "UNC_P_VR_HOT_CYCLES",
"PerPkg": "1",
"Unit": "PCU"
}
]

View File

@ -132,23 +132,22 @@
"Unit": "CHA"
},
{
"BriefDescription": "LLC misses - Uncacheable reads (from cpu) . Derived from unc_cha_tor_inserts.ia_miss",
"BriefDescription": "TOR Inserts : All requests from iA Cores that Missed the LLC",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x35",
"EventName": "LLC_MISSES.UNCACHEABLE",
"Filter": "config1=0x40e33",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS",
"PerPkg": "1",
"UMask": "0xC001FE01",
"UMaskExt": "0xC001FE",
"Unit": "CHA"
},
{
"BriefDescription": "LLC misses - Uncacheable reads (from cpu) ",
"BriefDescription": "LLC misses - Uncacheable reads (from cpu) . Derived from unc_cha_tor_inserts.ia_miss",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS",
"EventName": "LLC_MISSES.UNCACHEABLE",
"Filter": "config1=0x40e33",
"PerPkg": "1",
"UMask": "0xC001FE01",
@ -167,18 +166,6 @@
"UMaskExt": "0xC001FE",
"Unit": "CHA"
},
{
"BriefDescription": "MMIO reads",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS",
"Filter": "config1=0x40040e33",
"PerPkg": "1",
"UMask": "0xC001FE01",
"UMaskExt": "0xC001FE",
"Unit": "CHA"
},
{
"BriefDescription": "MMIO writes. Derived from unc_cha_tor_inserts.ia_miss",
"Counter": "0,1,2,3",
@ -191,18 +178,6 @@
"UMaskExt": "0xC001FE",
"Unit": "CHA"
},
{
"BriefDescription": "MMIO writes",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS",
"Filter": "config1=0x40041e33",
"PerPkg": "1",
"UMask": "0xC001FE01",
"UMaskExt": "0xC001FE",
"Unit": "CHA"
},
{
"BriefDescription": "Streaming stores (full cache line). Derived from unc_cha_tor_inserts.ia_miss",
"Counter": "0,1,2,3",
@ -216,19 +191,6 @@
"UMaskExt": "0xC001FE",
"Unit": "CHA"
},
{
"BriefDescription": "Streaming stores (full cache line)",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS",
"Filter": "config1=0x41833",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0xC001FE01",
"UMaskExt": "0xC001FE",
"Unit": "CHA"
},
{
"BriefDescription": "Streaming stores (partial cache line). Derived from unc_cha_tor_inserts.ia_miss",
"Counter": "0,1,2,3",
@ -242,19 +204,6 @@
"UMaskExt": "0xC001FE",
"Unit": "CHA"
},
{
"BriefDescription": "Streaming stores (partial cache line)",
"Counter": "0,1,2,3",
"CounterType": "PGMABLE",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IA_MISS",
"Filter": "config1=0x41a33",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0xC001FE01",
"UMaskExt": "0xC001FE",
"Unit": "CHA"
},
{
"BriefDescription": "TOR Inserts : CRds issued by iA Cores that Missed the LLC",
"Counter": "0,1,2,3",
@ -829,31 +778,12 @@
"Unit": "IIO"
},
{
"BriefDescription": "PCI Express bandwidth writing at IIO. Derived from unc_iio_data_req_of_cpu.mem_write.part0",
"Counter": "0,1",
"CounterType": "PGMABLE",
"EventCode": "0x83",
"EventName": "LLC_MISSES.PCIE_WRITE",
"FCMask": "0x07",
"Filter": "ch_mask=0x1f",
"MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
"MetricName": "LLC_MISSES.PCIE_WRITE",
"PerPkg": "1",
"PortMask": "0x01",
"ScaleUnit": "4Bytes",
"UMask": "0x01",
"Unit": "IIO"
},
{
"BriefDescription": "PCI Express bandwidth writing at IIO",
"BriefDescription": "PCI Express bandwidth writing at IIO, part 0",
"Counter": "0,1",
"CounterType": "PGMABLE",
"EventCode": "0x83",
"EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0",
"FCMask": "0x07",
"Filter": "ch_mask=0x1f",
"MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
"MetricName": "LLC_MISSES.PCIE_WRITE",
"PerPkg": "1",
"PortMask": "0x01",
"ScaleUnit": "4Bytes",
@ -900,31 +830,28 @@
"Unit": "IIO"
},
{
"BriefDescription": "PCI Express bandwidth reading at IIO. Derived from unc_iio_data_req_of_cpu.mem_read.part0",
"BriefDescription": "PCI Express bandwidth writing at IIO. Derived from unc_iio_data_req_of_cpu.mem_write.part0",
"Counter": "0,1",
"CounterType": "PGMABLE",
"EventCode": "0x83",
"EventName": "LLC_MISSES.PCIE_READ",
"EventName": "LLC_MISSES.PCIE_WRITE",
"FCMask": "0x07",
"Filter": "ch_mask=0x1f",
"MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
"MetricName": "LLC_MISSES.PCIE_READ",
"MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
"MetricName": "LLC_MISSES.PCIE_WRITE",
"PerPkg": "1",
"PortMask": "0x01",
"ScaleUnit": "4Bytes",
"UMask": "0x04",
"UMask": "0x01",
"Unit": "IIO"
},
{
"BriefDescription": "PCI Express bandwidth reading at IIO",
"BriefDescription": "PCI Express bandwidth reading at IIO, part 0",
"Counter": "0,1",
"CounterType": "PGMABLE",
"EventCode": "0x83",
"EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0",
"FCMask": "0x07",
"Filter": "ch_mask=0x1f",
"MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
"MetricName": "LLC_MISSES.PCIE_READ",
"PerPkg": "1",
"PortMask": "0x01",
"ScaleUnit": "4Bytes",
@ -970,6 +897,22 @@
"UMask": "0x04",
"Unit": "IIO"
},
{
"BriefDescription": "PCI Express bandwidth reading at IIO. Derived from unc_iio_data_req_of_cpu.mem_read.part0",
"Counter": "0,1",
"CounterType": "PGMABLE",
"EventCode": "0x83",
"EventName": "LLC_MISSES.PCIE_READ",
"FCMask": "0x07",
"Filter": "ch_mask=0x1f",
"MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
"MetricName": "LLC_MISSES.PCIE_READ",
"PerPkg": "1",
"PortMask": "0x01",
"ScaleUnit": "4Bytes",
"UMask": "0x04",
"Unit": "IIO"
},
{
"BriefDescription": "Data requested of the CPU : CmpD - device sending completion to CPU request",
"Counter": "0,1",

View File

@ -6,6 +6,10 @@
* The test cpu/soc is provided for testing.
*/
#include "pmu-events/pmu-events.h"
#include "util/header.h"
#include "util/pmu.h"
#include <string.h>
#include <stddef.h>
static const struct pmu_event pme_test_soc_cpu[] = {
{
@ -101,6 +105,70 @@ static const struct pmu_event pme_test_soc_cpu[] = {
.desc = "L2 BTB Correction",
.topic = "branch",
},
{
.metric_expr = "1 / IPC",
.metric_name = "CPI",
},
{
.metric_expr = "inst_retired.any / cpu_clk_unhalted.thread",
.metric_name = "IPC",
.metric_group = "group1",
},
{
.metric_expr = "idq_uops_not_delivered.core / (4 * (( ( cpu_clk_unhalted.thread / 2 ) * "
"( 1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk ) )))",
.metric_name = "Frontend_Bound_SMT",
},
{
.metric_expr = "l1d\\-loads\\-misses / inst_retired.any",
.metric_name = "dcache_miss_cpi",
},
{
.metric_expr = "l1i\\-loads\\-misses / inst_retired.any",
.metric_name = "icache_miss_cycles",
},
{
.metric_expr = "(dcache_miss_cpi + icache_miss_cycles)",
.metric_name = "cache_miss_cycles",
.metric_group = "group1",
},
{
.metric_expr = "l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit",
.metric_name = "DCache_L2_All_Hits",
},
{
.metric_expr = "max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + "
"l2_rqsts.pf_miss + l2_rqsts.rfo_miss",
.metric_name = "DCache_L2_All_Miss",
},
{
.metric_expr = "dcache_l2_all_hits + dcache_l2_all_miss",
.metric_name = "DCache_L2_All",
},
{
.metric_expr = "d_ratio(dcache_l2_all_hits, dcache_l2_all)",
.metric_name = "DCache_L2_Hits",
},
{
.metric_expr = "d_ratio(dcache_l2_all_miss, dcache_l2_all)",
.metric_name = "DCache_L2_Misses",
},
{
.metric_expr = "ipc + M2",
.metric_name = "M1",
},
{
.metric_expr = "ipc + M1",
.metric_name = "M2",
},
{
.metric_expr = "1/M3",
.metric_name = "M3",
},
{
.metric_expr = "64 * l1d.replacement / 1000000000 / duration_time",
.metric_name = "L1D_Cache_Fill_BW",
},
{
.name = 0,
.event = 0,
@ -108,18 +176,39 @@ static const struct pmu_event pme_test_soc_cpu[] = {
},
};
const struct pmu_events_map pmu_events_map[] = {
/* Struct used to make the PMU event table implementation opaque to callers. */
struct pmu_events_table {
const struct pmu_event *entries;
};
/*
* Map a CPU to its table of PMU events. The CPU is identified by the
* cpuid field, which is an arch-specific identifier for the CPU.
* The identifier specified in tools/perf/pmu-events/arch/xxx/mapfile
* must match the get_cpuid_str() in tools/perf/arch/xxx/util/header.c)
*
* The cpuid can contain any character other than the comma.
*/
struct pmu_events_map {
const char *arch;
const char *cpuid;
const struct pmu_events_table table;
};
/*
* Global table mapping each known CPU for the architecture to its
* table of PMU events.
*/
static const struct pmu_events_map pmu_events_map[] = {
{
.arch = "testarch",
.cpuid = "testcpu",
.version = "v1",
.type = "core",
.table = pme_test_soc_cpu,
.table = { pme_test_soc_cpu },
},
{
.arch = 0,
.cpuid = 0,
.version = 0,
.type = 0,
.table = 0,
.table = { 0 },
},
};
@ -147,12 +236,107 @@ static const struct pmu_event pme_test_soc_sys[] = {
},
};
const struct pmu_sys_events pmu_sys_event_tables[] = {
struct pmu_sys_events {
const char *name;
const struct pmu_events_table table;
};
static const struct pmu_sys_events pmu_sys_event_tables[] = {
{
.table = pme_test_soc_sys,
.table = { pme_test_soc_sys },
.name = "pme_test_soc_sys",
},
{
.table = 0
.table = { 0 }
},
};
int pmu_events_table_for_each_event(const struct pmu_events_table *table, pmu_event_iter_fn fn,
void *data)
{
for (const struct pmu_event *pe = &table->entries[0];
pe->name || pe->metric_group || pe->metric_name;
pe++) {
int ret = fn(pe, table, data);
if (ret)
return ret;
}
return 0;
}
const struct pmu_events_table *perf_pmu__find_table(struct perf_pmu *pmu)
{
const struct pmu_events_table *table = NULL;
char *cpuid = perf_pmu__getcpuid(pmu);
int i;
/* on some platforms which uses cpus map, cpuid can be NULL for
* PMUs other than CORE PMUs.
*/
if (!cpuid)
return NULL;
i = 0;
for (;;) {
const struct pmu_events_map *map = &pmu_events_map[i++];
if (!map->cpuid)
break;
if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
table = &map->table;
break;
}
}
free(cpuid);
return table;
}
const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid)
{
for (const struct pmu_events_map *tables = &pmu_events_map[0];
tables->arch;
tables++) {
if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
return &tables->table;
}
return NULL;
}
int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data)
{
for (const struct pmu_events_map *tables = &pmu_events_map[0];
tables->arch;
tables++) {
int ret = pmu_events_table_for_each_event(&tables->table, fn, data);
if (ret)
return ret;
}
return 0;
}
const struct pmu_events_table *find_sys_events_table(const char *name)
{
for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
tables->name;
tables++) {
if (!strcmp(tables->name, name))
return &tables->table;
}
return NULL;
}
int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data)
{
for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
tables->name;
tables++) {
int ret = pmu_events_table_for_each_event(&tables->table, fn, data);
if (ret)
return ret;
}
return 0;
}

View File

@ -6,8 +6,8 @@ import csv
import json
import os
import sys
from typing import Callable
from typing import Sequence
from typing import (Callable, Dict, Optional, Sequence, Set, Tuple)
import collections
# Global command line arguments.
_args = None
@ -19,6 +19,21 @@ _sys_event_tables = []
_arch_std_events = {}
# Track whether an events table is currently being defined and needs closing.
_close_table = False
# Events to write out when the table is closed
_pending_events = []
# Global BigCString shared by all structures.
_bcs = None
# Order specific JsonEvent attributes will be visited.
_json_event_attributes = [
# cmp_sevent related attributes.
'name', 'pmu', 'topic', 'desc', 'metric_name', 'metric_group',
# Seems useful, put it early.
'event',
# Short things in alphabetical order.
'aggr_mode', 'compat', 'deprecated', 'perpkg', 'unit',
# Longer things (the last won't be iterated over during decompress).
'metric_constraint', 'metric_expr', 'long_desc'
]
def removesuffix(s: str, suffix: str) -> str:
@ -38,6 +53,107 @@ def file_name_to_table_name(parents: Sequence[str], dirname: str) -> str:
tblname += '_' + dirname
return tblname.replace('-', '_')
def c_len(s: str) -> int:
"""Return the length of s a C string
This doesn't handle all escape characters properly. It first assumes
all \ are for escaping, it then adjusts as it will have over counted
\\. The code uses \000 rather than \0 as a terminator as an adjacent
number would be folded into a string of \0 (ie. "\0" + "5" doesn't
equal a terminator followed by the number 5 but the escape of
\05). The code adjusts for \000 but not properly for all octal, hex
or unicode values.
"""
try:
utf = s.encode(encoding='utf-8',errors='strict')
except:
print(f'broken string {s}')
raise
return len(utf) - utf.count(b'\\') + utf.count(b'\\\\') - (utf.count(b'\\000') * 2)
class BigCString:
"""A class to hold many strings concatenated together.
Generating a large number of stand-alone C strings creates a large
number of relocations in position independent code. The BigCString
is a helper for this case. It builds a single string which within it
are all the other C strings (to avoid memory issues the string
itself is held as a list of strings). The offsets within the big
string are recorded and when stored to disk these don't need
relocation. To reduce the size of the string further, identical
strings are merged. If a longer string ends-with the same value as a
shorter string, these entries are also merged.
"""
strings: Set[str]
big_string: Sequence[str]
offsets: Dict[str, int]
def __init__(self):
self.strings = set()
def add(self, s: str) -> None:
"""Called to add to the big string."""
self.strings.add(s)
def compute(self) -> None:
"""Called once all strings are added to compute the string and offsets."""
folded_strings = {}
# Determine if two strings can be folded, ie. let 1 string use the
# end of another. First reverse all strings and sort them.
sorted_reversed_strings = sorted([x[::-1] for x in self.strings])
# Strings 'xyz' and 'yz' will now be [ 'zy', 'zyx' ]. Scan forward
# for each string to see if there is a better candidate to fold it
# into, in the example rather than using 'yz' we can use'xyz' at
# an offset of 1. We record which string can be folded into which
# in folded_strings, we don't need to record the offset as it is
# trivially computed from the string lengths.
for pos,s in enumerate(sorted_reversed_strings):
best_pos = pos
for check_pos in range(pos + 1, len(sorted_reversed_strings)):
if sorted_reversed_strings[check_pos].startswith(s):
best_pos = check_pos
else:
break
if pos != best_pos:
folded_strings[s[::-1]] = sorted_reversed_strings[best_pos][::-1]
# Compute reverse mappings for debugging.
fold_into_strings = collections.defaultdict(set)
for key, val in folded_strings.items():
if key != val:
fold_into_strings[val].add(key)
# big_string_offset is the current location within the C string
# being appended to - comments, etc. don't count. big_string is
# the string contents represented as a list. Strings are immutable
# in Python and so appending to one causes memory issues, while
# lists are mutable.
big_string_offset = 0
self.big_string = []
self.offsets = {}
# Emit all strings that aren't folded in a sorted manner.
for s in sorted(self.strings):
if s not in folded_strings:
self.offsets[s] = big_string_offset
self.big_string.append(f'/* offset={big_string_offset} */ "')
self.big_string.append(s)
self.big_string.append('"')
if s in fold_into_strings:
self.big_string.append(' /* also: ' + ', '.join(fold_into_strings[s]) + ' */')
self.big_string.append('\n')
big_string_offset += c_len(s)
continue
# Compute the offsets of the folded strings.
for s in folded_strings.keys():
assert s not in self.offsets
folded_s = folded_strings[s]
self.offsets[s] = self.offsets[folded_s] + c_len(folded_s) - c_len(s)
_bcs = BigCString()
class JsonEvent:
"""Representation of an event loaded from a json file dictionary."""
@ -57,7 +173,7 @@ class JsonEvent:
'. '), '.').replace('\n', '\\n').replace(
'\"', '\\"').replace('\r', '\\r')
def convert_aggr_mode(aggr_mode: str) -> str:
def convert_aggr_mode(aggr_mode: str) -> Optional[str]:
"""Returns the aggr_mode_class enum value associated with the JSON string."""
if not aggr_mode:
return None
@ -67,7 +183,7 @@ class JsonEvent:
}
return aggr_mode_to_enum[aggr_mode]
def lookup_msr(num: str) -> str:
def lookup_msr(num: str) -> Optional[str]:
"""Converts the msr number, or first in a list to the appropriate event field."""
if not num:
return None
@ -79,7 +195,7 @@ class JsonEvent:
}
return msrmap[int(num.split(',', 1)[0], 0)]
def real_event(name: str, event: str) -> str:
def real_event(name: str, event: str) -> Optional[str]:
"""Convert well known event names to an event string otherwise use the event argument."""
fixed = {
'inst_retired.any': 'event=0xc0,period=2000003',
@ -95,7 +211,7 @@ class JsonEvent:
return fixed[name.lower()]
return event
def unit_to_pmu(unit: str) -> str:
def unit_to_pmu(unit: str) -> Optional[str]:
"""Convert a JSON Unit to Linux PMU name."""
if not unit:
return None
@ -108,6 +224,7 @@ class JsonEvent:
'iMPH-U': 'uncore_arb',
'CPU-M-CF': 'cpum_cf',
'CPU-M-SF': 'cpum_sf',
'PAI-CRYPTO' : 'pai_crypto',
'UPI LL': 'uncore_upi',
'hisi_sicl,cpa': 'hisi_sicl,cpa',
'hisi_sccl,ddrc': 'hisi_sccl,ddrc',
@ -128,6 +245,7 @@ class JsonEvent:
eventcode |= int(jd['ExtSel']) << 8
configcode = int(jd['ConfigCode'], 0) if 'ConfigCode' in jd else None
self.name = jd['EventName'].lower() if 'EventName' in jd else None
self.topic = ''
self.compat = jd.get('Compat')
self.desc = fixdesc(jd.get('BriefDescription'))
self.long_desc = fixdesc(jd.get('PublicDescription'))
@ -154,7 +272,7 @@ class JsonEvent:
if self.metric_expr:
self.metric_expr = self.metric_expr.replace('\\', '\\\\')
arch_std = jd.get('ArchStdEvent')
if precise and self.desc and not '(Precise Event)' in self.desc:
if precise and self.desc and '(Precise Event)' not in self.desc:
extra_desc += ' (Must be precise)' if precise == '2' else (' (Precise '
'event)')
event = f'config={llx(configcode)}' if configcode is not None else f'event={llx(eventcode)}'
@ -200,46 +318,38 @@ class JsonEvent:
s += f'\t{attr} = {value},\n'
return s + '}'
def to_c_string(self, topic_local: str) -> str:
"""Representation of the event as a C struct initializer."""
def attr_string(attr: str, value: str) -> str:
return '\t.%s = \"%s\",\n' % (attr, value)
def str_if_present(self, attr: str) -> str:
if not getattr(self, attr):
return ''
return attr_string(attr, getattr(self, attr))
s = '{\n'
for attr in ['name', 'event']:
s += str_if_present(self, attr)
if self.desc is not None:
s += attr_string('desc', self.desc)
else:
s += attr_string('desc', '(null)')
s += str_if_present(self, 'compat')
s += f'\t.topic = "{topic_local}",\n'
for attr in [
'long_desc', 'pmu', 'unit', 'perpkg', 'aggr_mode', 'metric_expr',
'metric_name', 'metric_group', 'deprecated', 'metric_constraint'
]:
s += str_if_present(self, attr)
s += '},\n'
def build_c_string(self) -> str:
s = ''
for attr in _json_event_attributes:
x = getattr(self, attr)
s += f'{x}\\000' if x else '\\000'
return s
def to_c_string(self) -> str:
"""Representation of the event as a C struct initializer."""
def read_json_events(path: str) -> Sequence[JsonEvent]:
s = self.build_c_string()
return f'{{ { _bcs.offsets[s] } }}, /* {s} */\n'
def read_json_events(path: str, topic: str) -> Sequence[JsonEvent]:
"""Read json events from the specified file."""
return json.load(open(path), object_hook=lambda d: JsonEvent(d))
try:
result = json.load(open(path), object_hook=JsonEvent)
except BaseException as err:
print(f"Exception processing {path}")
raise
for event in result:
event.topic = topic
return result
def preprocess_arch_std_files(archpath: str) -> None:
"""Read in all architecture standard events."""
global _arch_std_events
for item in os.scandir(archpath):
if item.is_file() and item.name.endswith('.json'):
for event in read_json_events(item.path):
for event in read_json_events(item.path, topic=''):
if event.name:
_arch_std_events[event.name.lower()] = event
@ -249,39 +359,70 @@ def print_events_table_prefix(tblname: str) -> None:
global _close_table
if _close_table:
raise IOError('Printing table prefix but last table has no suffix')
_args.output_file.write(f'static const struct pmu_event {tblname}[] = {{\n')
_args.output_file.write(f'static const struct compact_pmu_event {tblname}[] = {{\n')
_close_table = True
def print_events_table_entries(item: os.DirEntry, topic: str) -> None:
"""Create contents of an events table."""
def add_events_table_entries(item: os.DirEntry, topic: str) -> None:
"""Add contents of file to _pending_events table."""
if not _close_table:
raise IOError('Table entries missing prefix')
for event in read_json_events(item.path):
_args.output_file.write(event.to_c_string(topic))
for e in read_json_events(item.path, topic):
_pending_events.append(e)
def print_events_table_suffix() -> None:
"""Optionally close events table."""
def event_cmp_key(j: JsonEvent) -> Tuple[bool, str, str, str, str]:
def fix_none(s: Optional[str]) -> str:
if s is None:
return ''
return s
return (j.desc is not None, fix_none(j.topic), fix_none(j.name), fix_none(j.pmu),
fix_none(j.metric_name))
global _close_table
if _close_table:
_args.output_file.write("""{
\t.name = 0,
\t.event = 0,
\t.desc = 0,
},
};
""")
if not _close_table:
return
global _pending_events
for event in sorted(_pending_events, key=event_cmp_key):
_args.output_file.write(event.to_c_string())
_pending_events = []
_args.output_file.write('};\n\n')
_close_table = False
def get_topic(topic: str) -> str:
if topic.endswith('metrics.json'):
return 'metrics'
return removesuffix(topic, '.json').replace('-', ' ')
def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
if item.is_dir():
return
# base dir or too deep
level = len(parents)
if level == 0 or level > 4:
return
# Ignore other directories. If the file name does not have a .json
# extension, ignore it. It could be a readme.txt for instance.
if not item.is_file() or not item.name.endswith('.json'):
return
topic = get_topic(item.name)
for event in read_json_events(item.path, topic):
_bcs.add(event.build_c_string())
def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
"""Process a JSON file during the main walk."""
global _sys_event_tables
def get_topic(topic: str) -> str:
return removesuffix(topic, '.json').replace('-', ' ')
def is_leaf_dir(path: str) -> bool:
for item in os.scandir(path):
if item.is_dir():
@ -308,59 +449,205 @@ def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
if not item.is_file() or not item.name.endswith('.json'):
return
print_events_table_entries(item, get_topic(item.name))
add_events_table_entries(item, get_topic(item.name))
def print_mapping_table() -> None:
def print_mapping_table(archs: Sequence[str]) -> None:
"""Read the mapfile and generate the struct from cpuid string to event table."""
with open(f'{_args.starting_dir}/{_args.arch}/mapfile.csv') as csvfile:
table = csv.reader(csvfile)
_args.output_file.write(
'const struct pmu_events_map pmu_events_map[] = {\n')
first = True
for row in table:
# Skip the first row or any row beginning with #.
if not first and len(row) > 0 and not row[0].startswith('#'):
tblname = file_name_to_table_name([], row[2].replace('/', '_'))
_args.output_file.write("""{
\t.cpuid = \"%s\",
\t.version = \"%s\",
\t.type = \"%s\",
\t.table = %s
_args.output_file.write("""
/* Struct used to make the PMU event table implementation opaque to callers. */
struct pmu_events_table {
const struct compact_pmu_event *entries;
size_t length;
};
/*
* Map a CPU to its table of PMU events. The CPU is identified by the
* cpuid field, which is an arch-specific identifier for the CPU.
* The identifier specified in tools/perf/pmu-events/arch/xxx/mapfile
* must match the get_cpuid_str() in tools/perf/arch/xxx/util/header.c)
*
* The cpuid can contain any character other than the comma.
*/
struct pmu_events_map {
const char *arch;
const char *cpuid;
struct pmu_events_table table;
};
/*
* Global table mapping each known CPU for the architecture to its
* table of PMU events.
*/
const struct pmu_events_map pmu_events_map[] = {
""")
for arch in archs:
if arch == 'test':
_args.output_file.write("""{
\t.arch = "testarch",
\t.cpuid = "testcpu",
\t.table = {
\t.entries = pme_test_soc_cpu,
\t.length = ARRAY_SIZE(pme_test_soc_cpu),
\t}
},
""" % (row[0].replace('\\', '\\\\'), row[1], row[3], tblname))
first = False
""")
else:
with open(f'{_args.starting_dir}/{arch}/mapfile.csv') as csvfile:
table = csv.reader(csvfile)
first = True
for row in table:
# Skip the first row or any row beginning with #.
if not first and len(row) > 0 and not row[0].startswith('#'):
tblname = file_name_to_table_name([], row[2].replace('/', '_'))
cpuid = row[0].replace('\\', '\\\\')
_args.output_file.write(f"""{{
\t.arch = "{arch}",
\t.cpuid = "{cpuid}",
\t.table = {{
\t\t.entries = {tblname},
\t\t.length = ARRAY_SIZE({tblname})
\t}}
}},
""")
first = False
_args.output_file.write("""{
\t.cpuid = "testcpu",
\t.version = "v1",
\t.type = "core",
\t.table = pme_test_soc_cpu,
},
{
\t.arch = 0,
\t.cpuid = 0,
\t.version = 0,
\t.type = 0,
\t.table = 0,
},
\t.table = { 0, 0 },
}
};
""")
def print_system_mapping_table() -> None:
"""C struct mapping table array for tables from /sys directories."""
_args.output_file.write(
'\nconst struct pmu_sys_events pmu_sys_event_tables[] = {\n')
_args.output_file.write("""
struct pmu_sys_events {
\tconst char *name;
\tstruct pmu_events_table table;
};
static const struct pmu_sys_events pmu_sys_event_tables[] = {
""")
for tblname in _sys_event_tables:
_args.output_file.write(f"""\t{{
\t\t.table = {tblname},
\t\t.table = {{
\t\t\t.entries = {tblname},
\t\t\t.length = ARRAY_SIZE({tblname})
\t\t}},
\t\t.name = \"{tblname}\",
\t}},
""")
_args.output_file.write("""\t{
\t\t.table = 0
\t\t.table = { 0, 0 }
\t},
};
static void decompress(int offset, struct pmu_event *pe)
{
\tconst char *p = &big_c_string[offset];
""")
for attr in _json_event_attributes:
_args.output_file.write(f"""
\tpe->{attr} = (*p == '\\0' ? NULL : p);
""")
if attr == _json_event_attributes[-1]:
continue
_args.output_file.write('\twhile (*p++);')
_args.output_file.write("""}
int pmu_events_table_for_each_event(const struct pmu_events_table *table,
pmu_event_iter_fn fn,
void *data)
{
for (size_t i = 0; i < table->length; i++) {
struct pmu_event pe;
int ret;
decompress(table->entries[i].offset, &pe);
ret = fn(&pe, table, data);
if (ret)
return ret;
}
return 0;
}
const struct pmu_events_table *perf_pmu__find_table(struct perf_pmu *pmu)
{
const struct pmu_events_table *table = NULL;
char *cpuid = perf_pmu__getcpuid(pmu);
int i;
/* on some platforms which uses cpus map, cpuid can be NULL for
* PMUs other than CORE PMUs.
*/
if (!cpuid)
return NULL;
i = 0;
for (;;) {
const struct pmu_events_map *map = &pmu_events_map[i++];
if (!map->arch)
break;
if (!strcmp_cpuid_str(map->cpuid, cpuid)) {
table = &map->table;
break;
}
}
free(cpuid);
return table;
}
const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid)
{
for (const struct pmu_events_map *tables = &pmu_events_map[0];
tables->arch;
tables++) {
if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
return &tables->table;
}
return NULL;
}
int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data)
{
for (const struct pmu_events_map *tables = &pmu_events_map[0];
tables->arch;
tables++) {
int ret = pmu_events_table_for_each_event(&tables->table, fn, data);
if (ret)
return ret;
}
return 0;
}
const struct pmu_events_table *find_sys_events_table(const char *name)
{
for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
tables->name;
tables++) {
if (!strcmp(tables->name, name))
return &tables->table;
}
return NULL;
}
int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data)
{
for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
tables->name;
tables++) {
int ret = pmu_events_table_for_each_event(&tables->table, fn, data);
if (ret)
return ret;
}
return 0;
}
""")
@ -389,19 +676,48 @@ def main() -> None:
help='Root of tree containing architecture directories containing json files'
)
ap.add_argument(
'output_file', type=argparse.FileType('w'), nargs='?', default=sys.stdout)
'output_file', type=argparse.FileType('w', encoding='utf-8'), nargs='?', default=sys.stdout)
_args = ap.parse_args()
_args.output_file.write("#include \"pmu-events/pmu-events.h\"\n")
for path in [_args.arch, 'test']:
arch_path = f'{_args.starting_dir}/{path}'
if not os.path.isdir(arch_path):
raise IOError(f'Missing architecture directory in \'{arch_path}\'')
_args.output_file.write("""
#include "pmu-events/pmu-events.h"
#include "util/header.h"
#include "util/pmu.h"
#include <string.h>
#include <stddef.h>
struct compact_pmu_event {
int offset;
};
""")
archs = []
for item in os.scandir(_args.starting_dir):
if not item.is_dir():
continue
if item.name == _args.arch or _args.arch == 'all' or item.name == 'test':
archs.append(item.name)
if len(archs) < 2:
raise IOError(f'Missing architecture directory \'{_args.arch}\'')
archs.sort()
for arch in archs:
arch_path = f'{_args.starting_dir}/{arch}'
preprocess_arch_std_files(arch_path)
ftw(arch_path, [], preprocess_one_file)
_bcs.compute()
_args.output_file.write('static const char *const big_c_string =\n')
for s in _bcs.big_string:
_args.output_file.write(s)
_args.output_file.write(';\n\n')
for arch in archs:
arch_path = f'{_args.starting_dir}/{arch}'
ftw(arch_path, [], process_one_file)
print_events_table_suffix()
print_mapping_table()
print_mapping_table(archs)
print_system_mapping_table()

View File

@ -2,6 +2,8 @@
#ifndef PMU_EVENTS_H
#define PMU_EVENTS_H
struct perf_pmu;
enum aggr_mode_class {
PerChip = 1,
PerCore
@ -28,32 +30,20 @@ struct pmu_event {
const char *metric_constraint;
};
/*
*
* Map a CPU to its table of PMU events. The CPU is identified by the
* cpuid field, which is an arch-specific identifier for the CPU.
* The identifier specified in tools/perf/pmu-events/arch/xxx/mapfile
* must match the get_cpuid_str() in tools/perf/arch/xxx/util/header.c)
*
* The cpuid can contain any character other than the comma.
*/
struct pmu_events_map {
const char *cpuid;
const char *version;
const char *type; /* core, uncore etc */
const struct pmu_event *table;
};
struct pmu_events_table;
struct pmu_sys_events {
const char *name;
const struct pmu_event *table;
};
typedef int (*pmu_event_iter_fn)(const struct pmu_event *pe,
const struct pmu_events_table *table,
void *data);
/*
* Global table mapping each known CPU for the architecture to its
* table of PMU events.
*/
extern const struct pmu_events_map pmu_events_map[];
extern const struct pmu_sys_events pmu_sys_event_tables[];
int pmu_events_table_for_each_event(const struct pmu_events_table *table, pmu_event_iter_fn fn,
void *data);
const struct pmu_events_table *perf_pmu__find_table(struct perf_pmu *pmu);
const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid);
int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data);
const struct pmu_events_table *find_sys_events_table(const char *name);
int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data);
#endif

View File

@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
perf-y += builtin-test.o
perf-y += builtin-test-list.o
perf-y += parse-events.o
perf-y += dso-data.o
perf-y += attr.o

View File

@ -0,0 +1,207 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/ctype.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/zalloc.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include <subcmd/exec-cmd.h>
#include <subcmd/parse-options.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include "builtin.h"
#include "builtin-test-list.h"
#include "color.h"
#include "debug.h"
#include "hist.h"
#include "intlist.h"
#include "string2.h"
#include "symbol.h"
#include "tests.h"
#include "util/rlimit.h"
/*
* As this is a singleton built once for the run of the process, there is
* no value in trying to free it and just let it stay around until process
* exits when it's cleaned up.
*/
static size_t files_num = 0;
static struct script_file *files = NULL;
static int files_max_width = 0;
static const char *shell_tests__dir(char *path, size_t size)
{
const char *devel_dirs[] = { "./tools/perf/tests", "./tests", };
char *exec_path;
unsigned int i;
for (i = 0; i < ARRAY_SIZE(devel_dirs); ++i) {
struct stat st;
if (!lstat(devel_dirs[i], &st)) {
scnprintf(path, size, "%s/shell", devel_dirs[i]);
if (!lstat(devel_dirs[i], &st))
return path;
}
}
/* Then installed path. */
exec_path = get_argv_exec_path();
scnprintf(path, size, "%s/tests/shell", exec_path);
free(exec_path);
return path;
}
static const char *shell_test__description(char *description, size_t size,
const char *path, const char *name)
{
FILE *fp;
char filename[PATH_MAX];
int ch;
path__join(filename, sizeof(filename), path, name);
fp = fopen(filename, "r");
if (!fp)
return NULL;
/* Skip first line - should be #!/bin/sh Shebang */
do {
ch = fgetc(fp);
} while (ch != EOF && ch != '\n');
description = fgets(description, size, fp);
fclose(fp);
/* Assume first char on line is omment everything after that desc */
return description ? strim(description + 1) : NULL;
}
/* Is this full file path a shell script */
static bool is_shell_script(const char *path)
{
const char *ext;
ext = strrchr(path, '.');
if (!ext)
return false;
if (!strcmp(ext, ".sh")) { /* Has .sh extension */
if (access(path, R_OK | X_OK) == 0) /* Is executable */
return true;
}
return false;
}
/* Is this file in this dir a shell script (for test purposes) */
static bool is_test_script(const char *path, const char *name)
{
char filename[PATH_MAX];
path__join(filename, sizeof(filename), path, name);
if (!is_shell_script(filename)) return false;
return true;
}
/* Duplicate a string and fall over and die if we run out of memory */
static char *strdup_check(const char *str)
{
char *newstr;
newstr = strdup(str);
if (!newstr) {
pr_err("Out of memory while duplicating test script string\n");
abort();
}
return newstr;
}
static void append_script(const char *dir, const char *file, const char *desc)
{
struct script_file *files_tmp;
size_t files_num_tmp;
int width;
files_num_tmp = files_num + 1;
if (files_num_tmp >= SIZE_MAX) {
pr_err("Too many script files\n");
abort();
}
/* Realloc is good enough, though we could realloc by chunks, not that
* anyone will ever measure performance here */
files_tmp = realloc(files,
(files_num_tmp + 1) * sizeof(struct script_file));
if (files_tmp == NULL) {
pr_err("Out of memory while building test list\n");
abort();
}
/* Add file to end and NULL terminate the struct array */
files = files_tmp;
files_num = files_num_tmp;
files[files_num - 1].dir = strdup_check(dir);
files[files_num - 1].file = strdup_check(file);
files[files_num - 1].desc = strdup_check(desc);
files[files_num].dir = NULL;
files[files_num].file = NULL;
files[files_num].desc = NULL;
width = strlen(desc); /* Track max width of desc */
if (width > files_max_width)
files_max_width = width;
}
static void append_scripts_in_dir(const char *path)
{
struct dirent **entlist;
struct dirent *ent;
int n_dirs, i;
char filename[PATH_MAX];
/* List files, sorted by alpha */
n_dirs = scandir(path, &entlist, NULL, alphasort);
if (n_dirs == -1)
return;
for (i = 0; i < n_dirs && (ent = entlist[i]); i++) {
if (ent->d_name[0] == '.')
continue; /* Skip hidden files */
if (is_test_script(path, ent->d_name)) { /* It's a test */
char bf[256];
const char *desc = shell_test__description
(bf, sizeof(bf), path, ent->d_name);
if (desc) /* It has a desc line - valid script */
append_script(path, ent->d_name, desc);
} else if (is_directory(path, ent)) { /* Scan the subdir */
path__join(filename, sizeof(filename),
path, ent->d_name);
append_scripts_in_dir(filename);
}
}
for (i = 0; i < n_dirs; i++) /* Clean up */
zfree(&entlist[i]);
free(entlist);
}
const struct script_file *list_script_files(void)
{
char path_dir[PATH_MAX];
const char *path;
if (files)
return files; /* Singleton - we already know our list */
path = shell_tests__dir(path_dir, sizeof(path_dir)); /* Walk dir */
append_scripts_in_dir(path);
return files;
}
int list_script_max_width(void)
{
list_script_files(); /* Ensure we have scanned all scripts */
return files_max_width;
}

View File

@ -0,0 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */
struct script_file {
char *dir;
char *file;
char *desc;
};
/* List available script tests to run - singleton - never freed */
const struct script_file *list_script_files(void);
/* Get maximum width of description string */
int list_script_max_width(void);

View File

@ -28,6 +28,8 @@
#include <subcmd/exec-cmd.h>
#include <linux/zalloc.h>
#include "builtin-test-list.h"
static bool dont_fork;
struct test_suite *__weak arch_tests[] = {
@ -274,91 +276,6 @@ static int test_and_print(struct test_suite *t, int subtest)
return err;
}
static const char *shell_test__description(char *description, size_t size,
const char *path, const char *name)
{
FILE *fp;
char filename[PATH_MAX];
int ch;
path__join(filename, sizeof(filename), path, name);
fp = fopen(filename, "r");
if (!fp)
return NULL;
/* Skip shebang */
do {
ch = fgetc(fp);
} while (ch != EOF && ch != '\n');
description = fgets(description, size, fp);
fclose(fp);
return description ? strim(description + 1) : NULL;
}
#define for_each_shell_test(entlist, nr, base, ent) \
for (int __i = 0; __i < nr && (ent = entlist[__i]); __i++) \
if (!is_directory(base, ent) && \
is_executable_file(base, ent) && \
ent->d_name[0] != '.')
static const char *shell_tests__dir(char *path, size_t size)
{
const char *devel_dirs[] = { "./tools/perf/tests", "./tests", };
char *exec_path;
unsigned int i;
for (i = 0; i < ARRAY_SIZE(devel_dirs); ++i) {
struct stat st;
if (!lstat(devel_dirs[i], &st)) {
scnprintf(path, size, "%s/shell", devel_dirs[i]);
if (!lstat(devel_dirs[i], &st))
return path;
}
}
/* Then installed path. */
exec_path = get_argv_exec_path();
scnprintf(path, size, "%s/tests/shell", exec_path);
free(exec_path);
return path;
}
static int shell_tests__max_desc_width(void)
{
struct dirent **entlist;
struct dirent *ent;
int n_dirs, e;
char path_dir[PATH_MAX];
const char *path = shell_tests__dir(path_dir, sizeof(path_dir));
int width = 0;
if (path == NULL)
return -1;
n_dirs = scandir(path, &entlist, NULL, alphasort);
if (n_dirs == -1)
return -1;
for_each_shell_test(entlist, n_dirs, path, ent) {
char bf[256];
const char *desc = shell_test__description(bf, sizeof(bf), path, ent->d_name);
if (desc) {
int len = strlen(desc);
if (width < len)
width = len;
}
}
for (e = 0; e < n_dirs; e++)
zfree(&entlist[e]);
free(entlist);
return width;
}
struct shell_test {
const char *dir;
const char *file;
@ -385,33 +302,17 @@ static int shell_test__run(struct test_suite *test, int subdir __maybe_unused)
static int run_shell_tests(int argc, const char *argv[], int i, int width,
struct intlist *skiplist)
{
struct dirent **entlist;
struct dirent *ent;
int n_dirs, e;
char path_dir[PATH_MAX];
struct shell_test st = {
.dir = shell_tests__dir(path_dir, sizeof(path_dir)),
};
struct shell_test st;
const struct script_file *files, *file;
if (st.dir == NULL)
return -1;
n_dirs = scandir(st.dir, &entlist, NULL, alphasort);
if (n_dirs == -1) {
pr_err("failed to open shell test directory: %s\n",
st.dir);
return -1;
}
for_each_shell_test(entlist, n_dirs, st.dir, ent) {
files = list_script_files();
if (!files)
return 0;
for (file = files; file->dir; file++) {
int curr = i++;
char desc[256];
struct test_case test_cases[] = {
{
.desc = shell_test__description(desc,
sizeof(desc),
st.dir,
ent->d_name),
.desc = file->desc,
.run_case = shell_test__run,
},
{ .name = NULL, }
@ -421,12 +322,13 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width,
.test_cases = test_cases,
.priv = &st,
};
st.dir = file->dir;
if (test_suite.desc == NULL ||
!perf_test__matches(test_suite.desc, curr, argc, argv))
continue;
st.file = ent->d_name;
st.file = file->file;
pr_info("%3d: %-*s:", i, width, test_suite.desc);
if (intlist__find(skiplist, i)) {
@ -436,10 +338,6 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width,
test_and_print(&test_suite, 0);
}
for (e = 0; e < n_dirs; e++)
zfree(&entlist[e]);
free(entlist);
return 0;
}
@ -448,7 +346,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
struct test_suite *t;
unsigned int j, k;
int i = 0;
int width = shell_tests__max_desc_width();
int width = list_script_max_width();
for_each_test(j, k, t) {
int len = strlen(test_description(t, -1));
@ -529,36 +427,22 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
static int perf_test__list_shell(int argc, const char **argv, int i)
{
struct dirent **entlist;
struct dirent *ent;
int n_dirs, e;
char path_dir[PATH_MAX];
const char *path = shell_tests__dir(path_dir, sizeof(path_dir));
const struct script_file *files, *file;
if (path == NULL)
return -1;
n_dirs = scandir(path, &entlist, NULL, alphasort);
if (n_dirs == -1)
return -1;
for_each_shell_test(entlist, n_dirs, path, ent) {
files = list_script_files();
if (!files)
return 0;
for (file = files; file->dir; file++) {
int curr = i++;
char bf[256];
struct test_suite t = {
.desc = shell_test__description(bf, sizeof(bf), path, ent->d_name),
.desc = file->desc
};
if (!perf_test__matches(t.desc, curr, argc, argv))
continue;
pr_info("%3d: %s\n", i, t.desc);
}
for (e = 0; e < n_dirs; e++)
zfree(&entlist[e]);
free(entlist);
return 0;
}

View File

@ -638,7 +638,7 @@ static int do_test_code_reading(bool try_kcore)
str = do_determine_event(excl_kernel);
pr_debug("Parsing event '%s'\n", str);
ret = parse_events(evlist, str, NULL);
ret = parse_event(evlist, str);
if (ret < 0) {
pr_debug("parse_events failed\n");
goto out_put;

View File

@ -174,7 +174,7 @@ static int test_times(int (attach)(struct evlist *),
goto out_err;
}
err = parse_events(evlist, "cpu-clock:u", NULL);
err = parse_event(evlist, "cpu-clock:u");
if (err) {
pr_debug("failed to parse event cpu-clock:u\n");
goto out_err;

View File

@ -27,7 +27,7 @@ static int perf_evsel__roundtrip_cache_name_test(void)
for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
__evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name));
err = parse_events(evlist, name, NULL);
err = parse_event(evlist, name);
if (err)
ret = err;
}
@ -75,7 +75,7 @@ static int __perf_evsel__name_array_test(const char *const names[], int nr_names
return -ENOMEM;
for (i = 0; i < nr_names; ++i) {
err = parse_events(evlist, names[i], NULL);
err = parse_event(evlist, names[i]);
if (err) {
pr_debug("failed to parse event '%s', err %d\n",
names[i], err);

View File

@ -180,33 +180,14 @@ static int expand_metric_events(void)
struct evlist *evlist;
struct rblist metric_events;
const char metric_str[] = "CPI";
struct pmu_event pme_test[] = {
{
.metric_expr = "instructions / cycles",
.metric_name = "IPC",
},
{
.metric_expr = "1 / IPC",
.metric_name = "CPI",
},
{
.metric_expr = NULL,
.metric_name = NULL,
},
};
const struct pmu_events_map ev_map = {
.cpuid = "test",
.version = "1",
.type = "core",
.table = pme_test,
};
const struct pmu_events_table *pme_test;
evlist = evlist__new();
TEST_ASSERT_VAL("failed to get evlist", evlist);
rblist__init(&metric_events);
ret = metricgroup__parse_groups_test(evlist, &ev_map, metric_str,
pme_test = find_core_events_table("testarch", "testcpu");
ret = metricgroup__parse_groups_test(evlist, pme_test, metric_str,
false, false, &metric_events);
if (ret < 0) {
pr_debug("failed to parse '%s' metric\n", metric_str);

View File

@ -706,7 +706,7 @@ static int test__hists_cumulate(struct test_suite *test __maybe_unused, int subt
TEST_ASSERT_VAL("No memory", evlist);
err = parse_events(evlist, "cpu-clock", NULL);
err = parse_event(evlist, "cpu-clock");
if (err)
goto out;
err = TEST_FAIL;

View File

@ -111,10 +111,10 @@ static int test__hists_filter(struct test_suite *test __maybe_unused, int subtes
TEST_ASSERT_VAL("No memory", evlist);
err = parse_events(evlist, "cpu-clock", NULL);
err = parse_event(evlist, "cpu-clock");
if (err)
goto out;
err = parse_events(evlist, "task-clock", NULL);
err = parse_event(evlist, "task-clock");
if (err)
goto out;
err = TEST_FAIL;

View File

@ -276,10 +276,10 @@ static int test__hists_link(struct test_suite *test __maybe_unused, int subtest
if (evlist == NULL)
return -ENOMEM;
err = parse_events(evlist, "cpu-clock", NULL);
err = parse_event(evlist, "cpu-clock");
if (err)
goto out;
err = parse_events(evlist, "task-clock", NULL);
err = parse_event(evlist, "task-clock");
if (err)
goto out;

View File

@ -593,7 +593,7 @@ static int test__hists_output(struct test_suite *test __maybe_unused, int subtes
TEST_ASSERT_VAL("No memory", evlist);
err = parse_events(evlist, "cpu-clock", NULL);
err = parse_event(evlist, "cpu-clock");
if (err)
goto out;
err = TEST_FAIL;

View File

@ -89,8 +89,8 @@ static int test__keep_tracking(struct test_suite *test __maybe_unused, int subte
perf_evlist__set_maps(&evlist->core, cpus, threads);
CHECK__(parse_events(evlist, "dummy:u", NULL));
CHECK__(parse_events(evlist, "cycles:u", NULL));
CHECK__(parse_event(evlist, "dummy:u"));
CHECK__(parse_event(evlist, "cycles:u"));
evlist__config(evlist, &opts, NULL);

View File

@ -13,79 +13,6 @@
#include "stat.h"
#include "pmu.h"
static struct pmu_event pme_test[] = {
{
.metric_expr = "inst_retired.any / cpu_clk_unhalted.thread",
.metric_name = "IPC",
.metric_group = "group1",
},
{
.metric_expr = "idq_uops_not_delivered.core / (4 * (( ( cpu_clk_unhalted.thread / 2 ) * "
"( 1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk ) )))",
.metric_name = "Frontend_Bound_SMT",
},
{
.metric_expr = "l1d\\-loads\\-misses / inst_retired.any",
.metric_name = "dcache_miss_cpi",
},
{
.metric_expr = "l1i\\-loads\\-misses / inst_retired.any",
.metric_name = "icache_miss_cycles",
},
{
.metric_expr = "(dcache_miss_cpi + icache_miss_cycles)",
.metric_name = "cache_miss_cycles",
.metric_group = "group1",
},
{
.metric_expr = "l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit",
.metric_name = "DCache_L2_All_Hits",
},
{
.metric_expr = "max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + "
"l2_rqsts.pf_miss + l2_rqsts.rfo_miss",
.metric_name = "DCache_L2_All_Miss",
},
{
.metric_expr = "dcache_l2_all_hits + dcache_l2_all_miss",
.metric_name = "DCache_L2_All",
},
{
.metric_expr = "d_ratio(dcache_l2_all_hits, dcache_l2_all)",
.metric_name = "DCache_L2_Hits",
},
{
.metric_expr = "d_ratio(dcache_l2_all_miss, dcache_l2_all)",
.metric_name = "DCache_L2_Misses",
},
{
.metric_expr = "ipc + m2",
.metric_name = "M1",
},
{
.metric_expr = "ipc + m1",
.metric_name = "M2",
},
{
.metric_expr = "1/m3",
.metric_name = "M3",
},
{
.metric_expr = "64 * l1d.replacement / 1000000000 / duration_time",
.metric_name = "L1D_Cache_Fill_BW",
},
{
.name = NULL,
}
};
static const struct pmu_events_map map = {
.cpuid = "test",
.version = "1",
.type = "core",
.table = pme_test,
};
struct value {
const char *event;
u64 val;
@ -145,6 +72,7 @@ static int __compute_metric(const char *name, struct value *vals,
struct rblist metric_events = {
.nr_entries = 0,
};
const struct pmu_events_table *pme_test;
struct perf_cpu_map *cpus;
struct runtime_stat st;
struct evlist *evlist;
@ -168,7 +96,8 @@ static int __compute_metric(const char *name, struct value *vals,
runtime_stat__init(&st);
/* Parse the metric into metric_events list. */
err = metricgroup__parse_groups_test(evlist, &map, name,
pme_test = find_core_events_table("testarch", "testcpu");
err = metricgroup__parse_groups_test(evlist, pme_test, name,
false, false,
&metric_events);
if (err)

View File

@ -62,7 +62,7 @@ static int test__tsc_is_supported(struct test_suite *test __maybe_unused,
* This function implements a test that checks that the conversion of perf time
* to and from TSC is consistent with the order of events. If the test passes
* %0 is returned, otherwise %-1 is returned. If TSC conversion is not
* supported then then the test passes but " (not supported)" is printed.
* supported then the test passes but " (not supported)" is printed.
*/
static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
@ -100,7 +100,7 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
perf_evlist__set_maps(&evlist->core, cpus, threads);
CHECK__(parse_events(evlist, "cycles:u", NULL));
CHECK__(parse_event(evlist, "cycles:u"));
evlist__config(evlist, &opts, NULL);

View File

@ -9,10 +9,12 @@
#include <linux/zalloc.h>
#include "debug.h"
#include "../pmu-events/pmu-events.h"
#include <perf/evlist.h>
#include "util/evlist.h"
#include "util/expr.h"
#include "util/parse-events.h"
#include "metricgroup.h"
#include "stat.h"
struct perf_pmu_test_event {
/* used for matching against events from generated pmu-events.c */
@ -272,32 +274,6 @@ static bool is_same(const char *reference, const char *test)
return !strcmp(reference, test);
}
static const struct pmu_events_map *__test_pmu_get_events_map(void)
{
const struct pmu_events_map *map;
for (map = &pmu_events_map[0]; map->cpuid; map++) {
if (!strcmp(map->cpuid, "testcpu"))
return map;
}
pr_err("could not find test events map\n");
return NULL;
}
static const struct pmu_event *__test_pmu_get_sys_events_table(void)
{
const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
for ( ; tables->name; tables++) {
if (!strcmp("pme_test_soc_sys", tables->name))
return tables->table;
}
return NULL;
}
static int compare_pmu_events(const struct pmu_event *e1, const struct pmu_event *e2)
{
if (!is_same(e1->name, e2->name)) {
@ -447,85 +423,104 @@ static int compare_alias_to_test_event(struct perf_pmu_alias *alias,
return 0;
}
static int test__pmu_event_table_core_callback(const struct pmu_event *pe,
const struct pmu_events_table *table __maybe_unused,
void *data)
{
int *map_events = data;
struct perf_pmu_test_event const **test_event_table;
bool found = false;
if (!pe->name)
return 0;
if (pe->pmu)
test_event_table = &uncore_events[0];
else
test_event_table = &core_events[0];
for (; *test_event_table; test_event_table++) {
struct perf_pmu_test_event const *test_event = *test_event_table;
struct pmu_event const *event = &test_event->event;
if (strcmp(pe->name, event->name))
continue;
found = true;
(*map_events)++;
if (compare_pmu_events(pe, event))
return -1;
pr_debug("testing event table %s: pass\n", pe->name);
}
if (!found) {
pr_err("testing event table: could not find event %s\n", pe->name);
return -1;
}
return 0;
}
static int test__pmu_event_table_sys_callback(const struct pmu_event *pe,
const struct pmu_events_table *table __maybe_unused,
void *data)
{
int *map_events = data;
struct perf_pmu_test_event const **test_event_table;
bool found = false;
test_event_table = &sys_events[0];
for (; *test_event_table; test_event_table++) {
struct perf_pmu_test_event const *test_event = *test_event_table;
struct pmu_event const *event = &test_event->event;
if (strcmp(pe->name, event->name))
continue;
found = true;
(*map_events)++;
if (compare_pmu_events(pe, event))
return TEST_FAIL;
pr_debug("testing sys event table %s: pass\n", pe->name);
}
if (!found) {
pr_debug("testing sys event table: could not find event %s\n", pe->name);
return TEST_FAIL;
}
return TEST_OK;
}
/* Verify generated events from pmu-events.c are as expected */
static int test__pmu_event_table(struct test_suite *test __maybe_unused,
int subtest __maybe_unused)
{
const struct pmu_event *sys_event_tables = __test_pmu_get_sys_events_table();
const struct pmu_events_map *map = __test_pmu_get_events_map();
const struct pmu_event *table;
int map_events = 0, expected_events;
const struct pmu_events_table *sys_event_table = find_sys_events_table("pme_test_soc_sys");
const struct pmu_events_table *table = find_core_events_table("testarch", "testcpu");
int map_events = 0, expected_events, err;
/* ignore 3x sentinels */
expected_events = ARRAY_SIZE(core_events) +
ARRAY_SIZE(uncore_events) +
ARRAY_SIZE(sys_events) - 3;
if (!map || !sys_event_tables)
if (!table || !sys_event_table)
return -1;
for (table = map->table; table->name; table++) {
struct perf_pmu_test_event const **test_event_table;
bool found = false;
err = pmu_events_table_for_each_event(table, test__pmu_event_table_core_callback,
&map_events);
if (err)
return err;
if (table->pmu)
test_event_table = &uncore_events[0];
else
test_event_table = &core_events[0];
for (; *test_event_table; test_event_table++) {
struct perf_pmu_test_event const *test_event = *test_event_table;
struct pmu_event const *event = &test_event->event;
if (strcmp(table->name, event->name))
continue;
found = true;
map_events++;
if (compare_pmu_events(table, event))
return -1;
pr_debug("testing event table %s: pass\n", table->name);
}
if (!found) {
pr_err("testing event table: could not find event %s\n",
table->name);
return -1;
}
}
for (table = sys_event_tables; table->name; table++) {
struct perf_pmu_test_event const **test_event_table;
bool found = false;
test_event_table = &sys_events[0];
for (; *test_event_table; test_event_table++) {
struct perf_pmu_test_event const *test_event = *test_event_table;
struct pmu_event const *event = &test_event->event;
if (strcmp(table->name, event->name))
continue;
found = true;
map_events++;
if (compare_pmu_events(table, event))
return -1;
pr_debug("testing sys event table %s: pass\n", table->name);
}
if (!found) {
pr_debug("testing event table: could not find event %s\n",
table->name);
return -1;
}
}
err = pmu_events_table_for_each_event(sys_event_table, test__pmu_event_table_sys_callback,
&map_events);
if (err)
return err;
if (map_events != expected_events) {
pr_err("testing event table: found %d, but expected %d\n",
map_events, expected_events);
return -1;
return TEST_FAIL;
}
return 0;
@ -549,10 +544,10 @@ static int __test_core_pmu_event_aliases(char *pmu_name, int *count)
struct perf_pmu *pmu;
LIST_HEAD(aliases);
int res = 0;
const struct pmu_events_map *map = __test_pmu_get_events_map();
const struct pmu_events_table *table = find_core_events_table("testarch", "testcpu");
struct perf_pmu_alias *a, *tmp;
if (!map)
if (!table)
return -1;
test_event_table = &core_events[0];
@ -563,7 +558,7 @@ static int __test_core_pmu_event_aliases(char *pmu_name, int *count)
pmu->name = pmu_name;
pmu_add_cpu_aliases_map(&aliases, pmu, map);
pmu_add_cpu_aliases_table(&aliases, pmu, table);
for (; *test_event_table; test_event_table++) {
struct perf_pmu_test_event const *test_event = *test_event_table;
@ -602,14 +597,14 @@ static int __test_uncore_pmu_event_aliases(struct perf_pmu_test_pmu *test_pmu)
struct perf_pmu *pmu = &test_pmu->pmu;
const char *pmu_name = pmu->name;
struct perf_pmu_alias *a, *tmp, *alias;
const struct pmu_events_map *map;
const struct pmu_events_table *events_table;
LIST_HEAD(aliases);
int res = 0;
map = __test_pmu_get_events_map();
if (!map)
events_table = find_core_events_table("testarch", "testcpu");
if (!events_table)
return -1;
pmu_add_cpu_aliases_map(&aliases, pmu, map);
pmu_add_cpu_aliases_table(&aliases, pmu, events_table);
pmu_add_sys_aliases(&aliases, pmu);
/* Count how many aliases we generated */
@ -828,27 +823,6 @@ static int check_parse_id(const char *id, struct parse_events_error *error,
return ret;
}
static int check_parse_cpu(const char *id, bool same_cpu, const struct pmu_event *pe)
{
struct parse_events_error error;
int ret;
parse_events_error__init(&error);
ret = check_parse_id(id, &error, NULL);
if (ret && same_cpu) {
pr_warning("Parse event failed metric '%s' id '%s' expr '%s'\n",
pe->metric_name, id, pe->metric_expr);
pr_warning("Error string '%s' help '%s'\n", error.str,
error.help);
} else if (ret) {
pr_debug3("Parse event failed, but for an event that may not be supported by this CPU.\nid '%s' metric '%s' expr '%s'\n",
id, pe->metric_name, pe->metric_expr);
ret = 0;
}
parse_events_error__exit(&error);
return ret;
}
static int check_parse_fake(const char *id)
{
struct parse_events_error error;
@ -860,168 +834,116 @@ static int check_parse_fake(const char *id)
return ret;
}
static void expr_failure(const char *msg,
const struct pmu_events_map *map,
const struct pmu_event *pe)
{
pr_debug("%s for map %s %s %s\n",
msg, map->cpuid, map->version, map->type);
pr_debug("On metric %s\n", pe->metric_name);
pr_debug("On expression %s\n", pe->metric_expr);
}
struct metric {
struct list_head list;
struct metric_ref metric_ref;
};
static int resolve_metric_simple(struct expr_parse_ctx *pctx,
struct list_head *compound_list,
const struct pmu_events_map *map,
const char *metric_name)
static int test__parsing_callback(const struct pmu_event *pe, const struct pmu_events_table *table,
void *data)
{
struct hashmap_entry *cur, *cur_tmp;
struct metric *metric, *tmp;
size_t bkt;
bool all;
int rc;
int *failures = data;
int k;
struct evlist *evlist;
struct perf_cpu_map *cpus;
struct runtime_stat st;
struct evsel *evsel;
struct rblist metric_events = {
.nr_entries = 0,
};
int err = 0;
do {
all = true;
hashmap__for_each_entry_safe(pctx->ids, cur, cur_tmp, bkt) {
struct metric_ref *ref;
const struct pmu_event *pe;
if (!pe->metric_expr)
return 0;
pe = metricgroup__find_metric(cur->key, map);
if (!pe)
continue;
pr_debug("Found metric '%s'\n", pe->metric_name);
(*failures)++;
if (!strcmp(metric_name, (char *)cur->key)) {
pr_warning("Recursion detected for metric %s\n", metric_name);
rc = -1;
goto out_err;
}
/*
* We need to prepare evlist for stat mode running on CPU 0
* because that's where all the stats are going to be created.
*/
evlist = evlist__new();
if (!evlist)
return -ENOMEM;
all = false;
cpus = perf_cpu_map__new("0");
if (!cpus) {
evlist__delete(evlist);
return -ENOMEM;
}
/* The metric key itself needs to go out.. */
expr__del_id(pctx, cur->key);
perf_evlist__set_maps(&evlist->core, cpus, NULL);
runtime_stat__init(&st);
metric = malloc(sizeof(*metric));
if (!metric) {
rc = -ENOMEM;
goto out_err;
}
ref = &metric->metric_ref;
ref->metric_name = pe->metric_name;
ref->metric_expr = pe->metric_expr;
list_add_tail(&metric->list, compound_list);
rc = expr__find_ids(pe->metric_expr, NULL, pctx);
if (rc)
goto out_err;
break; /* The hashmap has been modified, so restart */
err = metricgroup__parse_groups_test(evlist, table, pe->metric_name,
false, false,
&metric_events);
if (err) {
if (!strcmp(pe->metric_name, "M1") || !strcmp(pe->metric_name, "M2") ||
!strcmp(pe->metric_name, "M3")) {
(*failures)--;
pr_debug("Expected broken metric %s skipping\n", pe->metric_name);
err = 0;
}
} while (!all);
goto out_err;
}
return 0;
err = evlist__alloc_stats(evlist, false);
if (err)
goto out_err;
/*
* Add all ids with a made up value. The value may trigger divide by
* zero when subtracted and so try to make them unique.
*/
k = 1;
perf_stat__reset_shadow_stats();
evlist__for_each_entry(evlist, evsel) {
perf_stat__update_shadow_stats(evsel, k, 0, &st);
if (!strcmp(evsel->name, "duration_time"))
update_stats(&walltime_nsecs_stats, k);
k++;
}
evlist__for_each_entry(evlist, evsel) {
struct metric_event *me = metricgroup__lookup(&metric_events, evsel, false);
if (me != NULL) {
struct metric_expr *mexp;
list_for_each_entry (mexp, &me->head, nd) {
if (strcmp(mexp->metric_name, pe->metric_name))
continue;
pr_debug("Result %f\n", test_generic_metric(mexp, 0, &st));
err = 0;
(*failures)--;
goto out_err;
}
}
}
pr_debug("Didn't find parsed metric %s", pe->metric_name);
err = 1;
out_err:
list_for_each_entry_safe(metric, tmp, compound_list, list)
free(metric);
return rc;
if (err)
pr_debug("Broken metric %s\n", pe->metric_name);
/* ... cleanup. */
metricgroup__rblist_exit(&metric_events);
runtime_stat__exit(&st);
evlist__free_stats(evlist);
perf_cpu_map__put(cpus);
evlist__delete(evlist);
return err;
}
static int test__parsing(struct test_suite *test __maybe_unused,
int subtest __maybe_unused)
{
const struct pmu_events_map *cpus_map = pmu_events_map__find();
const struct pmu_events_map *map;
const struct pmu_event *pe;
int i, j, k;
int ret = 0;
struct expr_parse_ctx *ctx;
double result;
int failures = 0;
ctx = expr__ctx_new();
if (!ctx) {
pr_debug("expr__ctx_new failed");
return TEST_FAIL;
}
i = 0;
for (;;) {
map = &pmu_events_map[i++];
if (!map->table)
break;
j = 0;
for (;;) {
struct metric *metric, *tmp;
struct hashmap_entry *cur;
LIST_HEAD(compound_list);
size_t bkt;
pmu_for_each_core_event(test__parsing_callback, &failures);
pmu_for_each_sys_event(test__parsing_callback, &failures);
pe = &map->table[j++];
if (!pe->name && !pe->metric_group && !pe->metric_name)
break;
if (!pe->metric_expr)
continue;
expr__ctx_clear(ctx);
if (expr__find_ids(pe->metric_expr, NULL, ctx) < 0) {
expr_failure("Parse find ids failed", map, pe);
ret++;
continue;
}
if (resolve_metric_simple(ctx, &compound_list, map,
pe->metric_name)) {
expr_failure("Could not resolve metrics", map, pe);
ret++;
goto exit; /* Don't tolerate errors due to severity */
}
/*
* Add all ids with a made up value. The value may
* trigger divide by zero when subtracted and so try to
* make them unique.
*/
k = 1;
hashmap__for_each_entry(ctx->ids, cur, bkt)
expr__add_id_val(ctx, strdup(cur->key), k++);
hashmap__for_each_entry(ctx->ids, cur, bkt) {
if (check_parse_cpu(cur->key, map == cpus_map,
pe))
ret++;
}
list_for_each_entry_safe(metric, tmp, &compound_list, list) {
expr__add_ref(ctx, &metric->metric_ref);
free(metric);
}
if (expr__parse(&result, ctx, pe->metric_expr)) {
/*
* Parsing failed, make numbers go from large to
* small which can resolve divide by zero
* issues.
*/
k = 1024;
hashmap__for_each_entry(ctx->ids, cur, bkt)
expr__add_id_val(ctx, strdup(cur->key), k--);
if (expr__parse(&result, ctx, pe->metric_expr)) {
expr_failure("Parse failed", map, pe);
ret++;
}
}
}
}
expr__ctx_free(ctx);
/* TODO: fail when not ok */
exit:
return ret == 0 ? TEST_OK : TEST_SKIP;
return failures == 0 ? TEST_OK : TEST_FAIL;
}
struct test_metric {
@ -1093,6 +1015,16 @@ out:
return ret;
}
static int test__parsing_fake_callback(const struct pmu_event *pe,
const struct pmu_events_table *table __maybe_unused,
void *data __maybe_unused)
{
if (!pe->metric_expr)
return 0;
return metric_parse_fake(pe->metric_expr);
}
/*
* Parse all the metrics for current architecture,
* or all defined cpus via the 'fake_pmu'
@ -1101,37 +1033,19 @@ out:
static int test__parsing_fake(struct test_suite *test __maybe_unused,
int subtest __maybe_unused)
{
const struct pmu_events_map *map;
const struct pmu_event *pe;
unsigned int i, j;
int err = 0;
for (i = 0; i < ARRAY_SIZE(metrics); i++) {
for (size_t i = 0; i < ARRAY_SIZE(metrics); i++) {
err = metric_parse_fake(metrics[i].str);
if (err)
return err;
}
i = 0;
for (;;) {
map = &pmu_events_map[i++];
if (!map->table)
break;
j = 0;
for (;;) {
pe = &map->table[j++];
if (!pe->name && !pe->metric_group && !pe->metric_name)
break;
if (!pe->metric_expr)
continue;
pr_debug("Found metric '%s' for '%s'\n", pe->metric_name, map->cpuid);
err = metric_parse_fake(pe->metric_expr);
if (err)
return err;
}
}
err = pmu_for_each_core_event(test__parsing_fake_callback, NULL);
if (err)
return err;
return 0;
return pmu_for_each_sys_event(test__parsing_fake_callback, NULL);
}
static struct test_case pmu_events_tests[] = {

View File

@ -0,0 +1,96 @@
#!/usr/bin/python
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
# Basic sanity check of perf JSON output as specified in the man page.
import argparse
import sys
import json
ap = argparse.ArgumentParser()
ap.add_argument('--no-args', action='store_true')
ap.add_argument('--interval', action='store_true')
ap.add_argument('--system-wide-no-aggr', action='store_true')
ap.add_argument('--system-wide', action='store_true')
ap.add_argument('--event', action='store_true')
ap.add_argument('--per-core', action='store_true')
ap.add_argument('--per-thread', action='store_true')
ap.add_argument('--per-die', action='store_true')
ap.add_argument('--per-node', action='store_true')
ap.add_argument('--per-socket', action='store_true')
args = ap.parse_args()
Lines = sys.stdin.readlines()
def isfloat(num):
try:
float(num)
return True
except ValueError:
return False
def isint(num):
try:
int(num)
return True
except ValueError:
return False
def is_counter_value(num):
return isfloat(num) or num == '<not counted>' or num == '<not supported>'
def check_json_output(expected_items):
if expected_items != -1:
for line in Lines:
if 'failed' not in line:
count = 0
count = line.count(',')
if count != expected_items and count >= 1 and count <= 3 and 'metric-value' in line:
# Events that generate >1 metric may have isolated metric
# values and possibly other prefixes like interval, core and
# aggregate-number.
continue
if count != expected_items:
raise RuntimeError(f'wrong number of fields. counted {count} expected {expected_items}'
f' in \'{line}\'')
checks = {
'aggregate-number': lambda x: isfloat(x),
'core': lambda x: True,
'counter-value': lambda x: is_counter_value(x),
'cgroup': lambda x: True,
'cpu': lambda x: isint(x),
'die': lambda x: True,
'event': lambda x: True,
'event-runtime': lambda x: isfloat(x),
'interval': lambda x: isfloat(x),
'metric-unit': lambda x: True,
'metric-value': lambda x: isfloat(x),
'node': lambda x: True,
'pcnt-running': lambda x: isfloat(x),
'socket': lambda x: True,
'thread': lambda x: True,
'unit': lambda x: True,
}
input = '[\n' + ','.join(Lines) + '\n]'
for item in json.loads(input):
for key, value in item.items():
if key not in checks:
raise RuntimeError(f'Unexpected key: key={key} value={value}')
if not checks[key](value):
raise RuntimeError(f'Check failed for: key={key} value={value}')
try:
if args.no_args or args.system_wide or args.event:
expected_items = 6
elif args.interval or args.per_thread or args.system_wide_no_aggr:
expected_items = 7
elif args.per_core or args.per_socket or args.per_node or args.per_die:
expected_items = 8
else:
# If no option is specified, don't check the number of items.
expected_items = -1
check_json_output(expected_items)
except:
print('Test failed for input:\n' + '\n'.join(Lines))
raise

View File

@ -19,20 +19,26 @@ trap_cleanup() {
}
trap trap_cleanup exit term int
test_offcpu() {
echo "Basic off-cpu test"
test_offcpu_priv() {
echo "Checking off-cpu privilege"
if [ `id -u` != 0 ]
then
echo "Basic off-cpu test [Skipped permission]"
echo "off-cpu test [Skipped permission]"
err=2
return
fi
if perf record --off-cpu -o ${perfdata} --quiet true 2>&1 | grep BUILD_BPF_SKEL
if perf record --off-cpu -o /dev/null --quiet true 2>&1 | grep BUILD_BPF_SKEL
then
echo "Basic off-cpu test [Skipped missing BPF support]"
echo "off-cpu test [Skipped missing BPF support]"
err=2
return
fi
}
test_offcpu_basic() {
echo "Basic off-cpu test"
if ! perf record --off-cpu -e dummy -o ${perfdata} sleep 1 2> /dev/null
then
echo "Basic off-cpu test [Failed record]"
@ -41,7 +47,7 @@ test_offcpu() {
fi
if ! perf evlist -i ${perfdata} | grep -q "offcpu-time"
then
echo "Basic off-cpu test [Failed record]"
echo "Basic off-cpu test [Failed no event]"
err=1
return
fi
@ -54,7 +60,44 @@ test_offcpu() {
echo "Basic off-cpu test [Success]"
}
test_offcpu
test_offcpu_child() {
echo "Child task off-cpu test"
# perf bench sched messaging creates 400 processes
if ! perf record --off-cpu -e dummy -o ${perfdata} -- \
perf bench sched messaging -g 10 > /dev/null 2&>1
then
echo "Child task off-cpu test [Failed record]"
err=1
return
fi
if ! perf evlist -i ${perfdata} | grep -q "offcpu-time"
then
echo "Child task off-cpu test [Failed no event]"
err=1
return
fi
# each process waits for read and write, so it should be more than 800 events
if ! perf report -i ${perfdata} -s comm -q -n -t ';' --percent-limit=90 | \
awk -F ";" '{ if (NF > 3 && int($3) < 800) exit 1; }'
then
echo "Child task off-cpu test [Failed invalid output]"
err=1
return
fi
echo "Child task off-cpu test [Success]"
}
test_offcpu_priv
if [ $err = 0 ]; then
test_offcpu_basic
fi
if [ $err = 0 ]; then
test_offcpu_child
fi
cleanup
exit $err

View File

@ -0,0 +1,147 @@
#!/bin/bash
# perf stat JSON output linter
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
# Checks various perf stat JSON output commands for the
# correct number of fields.
set -e
pythonchecker=$(dirname $0)/lib/perf_json_output_lint.py
if [ "x$PYTHON" == "x" ]
then
if which python3 > /dev/null
then
PYTHON=python3
elif which python > /dev/null
then
PYTHON=python
else
echo Skipping test, python not detected please set environment variable PYTHON.
exit 2
fi
fi
# Return true if perf_event_paranoid is > $1 and not running as root.
function ParanoidAndNotRoot()
{
[ $(id -u) != 0 ] && [ $(cat /proc/sys/kernel/perf_event_paranoid) -gt $1 ]
}
check_no_args()
{
echo -n "Checking json output: no args "
perf stat -j true 2>&1 | $PYTHON $pythonchecker --no-args
echo "[Success]"
}
check_system_wide()
{
echo -n "Checking json output: system wide "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
perf stat -j -a true 2>&1 | $PYTHON $pythonchecker --system-wide
echo "[Success]"
}
check_system_wide_no_aggr()
{
echo -n "Checking json output: system wide "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
echo -n "Checking json output: system wide no aggregation "
perf stat -j -A -a --no-merge true 2>&1 | $PYTHON $pythonchecker --system-wide-no-aggr
echo "[Success]"
}
check_interval()
{
echo -n "Checking json output: interval "
perf stat -j -I 1000 true 2>&1 | $PYTHON $pythonchecker --interval
echo "[Success]"
}
check_event()
{
echo -n "Checking json output: event "
perf stat -j -e cpu-clock true 2>&1 | $PYTHON $pythonchecker --event
echo "[Success]"
}
check_per_core()
{
echo -n "Checking json output: per core "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
perf stat -j --per-core -a true 2>&1 | $PYTHON $pythonchecker --per-core
echo "[Success]"
}
check_per_thread()
{
echo -n "Checking json output: per thread "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
perf stat -j --per-thread -a true 2>&1 | $PYTHON $pythonchecker --per-thread
echo "[Success]"
}
check_per_die()
{
echo -n "Checking json output: per die "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
perf stat -j --per-die -a true 2>&1 | $PYTHON $pythonchecker --per-die
echo "[Success]"
}
check_per_node()
{
echo -n "Checking json output: per node "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
perf stat -j --per-node -a true 2>&1 | $PYTHON $pythonchecker --per-node
echo "[Success]"
}
check_per_socket()
{
echo -n "Checking json output: per socket "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoia and not root"
return
fi
perf stat -j --per-socket -a true 2>&1 | $PYTHON $pythonchecker --per-socket
echo "[Success]"
}
check_no_args
check_system_wide
check_system_wide_no_aggr
check_interval
check_event
check_per_core
check_per_thread
check_per_die
check_per_node
check_per_socket
exit 0

View File

@ -324,6 +324,7 @@ out_free_nodes:
static int test__switch_tracking(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
const char *sched_switch = "sched:sched_switch";
const char *cycles = "cycles:u";
struct switch_tracking switch_tracking = { .tids = NULL, };
struct record_opts opts = {
.mmap_pages = UINT_MAX,
@ -363,7 +364,7 @@ static int test__switch_tracking(struct test_suite *test __maybe_unused, int sub
perf_evlist__set_maps(&evlist->core, cpus, threads);
/* First event */
err = parse_events(evlist, "cpu-clock:u", NULL);
err = parse_event(evlist, "cpu-clock:u");
if (err) {
pr_debug("Failed to parse event dummy:u\n");
goto out_err;
@ -372,12 +373,19 @@ static int test__switch_tracking(struct test_suite *test __maybe_unused, int sub
cpu_clocks_evsel = evlist__last(evlist);
/* Second event */
if (perf_pmu__has_hybrid())
err = parse_events(evlist, "cpu_core/cycles/u", NULL);
else
err = parse_events(evlist, "cycles:u", NULL);
if (perf_pmu__has_hybrid()) {
cycles = "cpu_core/cycles/u";
err = parse_event(evlist, cycles);
if (err) {
cycles = "cpu_atom/cycles/u";
pr_debug("Trying %s\n", cycles);
err = parse_event(evlist, cycles);
}
} else {
err = parse_event(evlist, cycles);
}
if (err) {
pr_debug("Failed to parse event cycles:u\n");
pr_debug("Failed to parse event %s\n", cycles);
goto out_err;
}
@ -390,7 +398,7 @@ static int test__switch_tracking(struct test_suite *test __maybe_unused, int sub
goto out;
}
err = parse_events(evlist, sched_switch, NULL);
err = parse_event(evlist, sched_switch);
if (err) {
pr_debug("Failed to parse event %s\n", sched_switch);
goto out_err;
@ -420,7 +428,7 @@ static int test__switch_tracking(struct test_suite *test __maybe_unused, int sub
evsel__set_sample_bit(cycles_evsel, TIME);
/* Fourth event */
err = parse_events(evlist, "dummy:u", NULL);
err = parse_event(evlist, "dummy:u");
if (err) {
pr_debug("Failed to parse event dummy:u\n");
goto out_err;

View File

@ -289,6 +289,7 @@ CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET
CFLAGS_parse-events.o += -Wno-redundant-decls
CFLAGS_expr.o += -Wno-redundant-decls
CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE
CFLAGS_arm-spe.o += -I$(srctree)/tools/arch/arm64/include/
$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE
$(call rule_mkdir)

View File

@ -220,6 +220,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
break;
case ARM_SPE_DATA_SOURCE:
decoder->record.source = payload;
break;
case ARM_SPE_BAD:
break;

View File

@ -29,6 +29,17 @@ enum arm_spe_op_type {
ARM_SPE_ST = 1 << 1,
};
enum arm_spe_neoverse_data_source {
ARM_SPE_NV_L1D = 0x0,
ARM_SPE_NV_L2 = 0x8,
ARM_SPE_NV_PEER_CORE = 0x9,
ARM_SPE_NV_LOCAL_CLUSTER = 0xa,
ARM_SPE_NV_SYS_CACHE = 0xb,
ARM_SPE_NV_PEER_CLUSTER = 0xc,
ARM_SPE_NV_REMOTE = 0xd,
ARM_SPE_NV_DRAM = 0xe,
};
struct arm_spe_record {
enum arm_spe_sample_type type;
int err;
@ -40,6 +51,7 @@ struct arm_spe_record {
u64 virt_addr;
u64 phys_addr;
u64 context_id;
u16 source;
};
struct arm_spe_insn;

View File

@ -34,6 +34,7 @@
#include "arm-spe-decoder/arm-spe-decoder.h"
#include "arm-spe-decoder/arm-spe-pkt-decoder.h"
#include "../../arch/arm64/include/asm/cputype.h"
#define MAX_TIMESTAMP (~0ULL)
struct arm_spe {
@ -45,6 +46,7 @@ struct arm_spe {
struct perf_session *session;
struct machine *machine;
u32 pmu_type;
u64 midr;
struct perf_tsc_conversion tc;
@ -387,9 +389,116 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
return arm_spe_deliver_synth_event(spe, speq, event, &sample);
}
static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
static const struct midr_range neoverse_spe[] = {
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
{},
};
static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record,
union perf_mem_data_src *data_src)
{
/*
* Even though four levels of cache hierarchy are possible, no known
* production Neoverse systems currently include more than three levels
* so for the time being we assume three exist. If a production system
* is built with four the this function would have to be changed to
* detect the number of levels for reporting.
*/
/*
* We have no data on the hit level or data source for stores in the
* Neoverse SPE records.
*/
if (record->op & ARM_SPE_ST) {
data_src->mem_lvl = PERF_MEM_LVL_NA;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
data_src->mem_snoop = PERF_MEM_SNOOP_NA;
return;
}
switch (record->source) {
case ARM_SPE_NV_L1D:
data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
break;
case ARM_SPE_NV_L2:
data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
break;
case ARM_SPE_NV_PEER_CORE:
data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
break;
/*
* We don't know if this is L1, L2 but we do know it was a cache-2-cache
* transfer, so set SNOOPX_PEER
*/
case ARM_SPE_NV_LOCAL_CLUSTER:
case ARM_SPE_NV_PEER_CLUSTER:
data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
break;
/*
* System cache is assumed to be L3
*/
case ARM_SPE_NV_SYS_CACHE:
data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
break;
/*
* We don't know what level it hit in, except it came from the other
* socket
*/
case ARM_SPE_NV_REMOTE:
data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
break;
case ARM_SPE_NV_DRAM:
data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
break;
default:
break;
}
}
static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record,
union perf_mem_data_src *data_src)
{
if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
data_src->mem_lvl = PERF_MEM_LVL_L3;
if (record->type & ARM_SPE_LLC_MISS)
data_src->mem_lvl |= PERF_MEM_LVL_MISS;
else
data_src->mem_lvl |= PERF_MEM_LVL_HIT;
} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
data_src->mem_lvl = PERF_MEM_LVL_L1;
if (record->type & ARM_SPE_L1D_MISS)
data_src->mem_lvl |= PERF_MEM_LVL_MISS;
else
data_src->mem_lvl |= PERF_MEM_LVL_HIT;
}
if (record->type & ARM_SPE_REMOTE_ACCESS)
data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
}
static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
{
union perf_mem_data_src data_src = { 0 };
bool is_neoverse = is_midr_in_range(midr, neoverse_spe);
if (record->op == ARM_SPE_LD)
data_src.mem_op = PERF_MEM_OP_LOAD;
@ -398,24 +507,10 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
else
return 0;
if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
data_src.mem_lvl = PERF_MEM_LVL_L3;
if (record->type & ARM_SPE_LLC_MISS)
data_src.mem_lvl |= PERF_MEM_LVL_MISS;
else
data_src.mem_lvl |= PERF_MEM_LVL_HIT;
} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
data_src.mem_lvl = PERF_MEM_LVL_L1;
if (record->type & ARM_SPE_L1D_MISS)
data_src.mem_lvl |= PERF_MEM_LVL_MISS;
else
data_src.mem_lvl |= PERF_MEM_LVL_HIT;
}
if (record->type & ARM_SPE_REMOTE_ACCESS)
data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1;
if (is_neoverse)
arm_spe__synth_data_source_neoverse(record, &data_src);
else
arm_spe__synth_data_source_generic(record, &data_src);
if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
data_src.mem_dtlb = PERF_MEM_TLB_WK;
@ -436,7 +531,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
u64 data_src;
int err;
data_src = arm_spe__synth_data_source(record);
data_src = arm_spe__synth_data_source(record, spe->midr);
if (spe->sample_flc) {
if (record->type & ARM_SPE_L1D_MISS) {
@ -1178,6 +1273,8 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
struct perf_record_time_conv *tc = &session->time_conv;
const char *cpuid = perf_env__cpuid(session->evlist->env);
u64 midr = strtol(cpuid, NULL, 16);
struct arm_spe *spe;
int err;
@ -1197,6 +1294,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
spe->machine = &session->machines.host; /* No kvm support */
spe->auxtrace_type = auxtrace_info->type;
spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
spe->midr = midr;
spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);

View File

@ -1879,7 +1879,7 @@ struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name)
if (asprintf(&event_definition, "bpf-output/no-inherit=1,name=%s/", name) < 0)
return ERR_PTR(-ENOMEM);
err = parse_events(evlist, event_definition, NULL);
err = parse_event(evlist, event_definition);
free(event_definition);
if (err) {

View File

@ -11,11 +11,13 @@
#include "util/cpumap.h"
#include "util/thread_map.h"
#include "util/cgroup.h"
#include "util/strlist.h"
#include <bpf/bpf.h>
#include "bpf_skel/off_cpu.skel.h"
#define MAX_STACKS 32
#define MAX_PROC 4096
/* we don't need actual timestamp, just want to put the samples at last */
#define OFF_CPU_TIMESTAMP (~0ull << 32)
@ -78,6 +80,7 @@ static void off_cpu_start(void *arg)
u8 val = 1;
skel->bss->has_task = 1;
skel->bss->uses_tgid = 1;
fd = bpf_map__fd(skel->maps.task_filter);
pid = perf_thread_map__pid(evlist->core.threads, 0);
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
@ -124,6 +127,8 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
{
int err, fd, i;
int ncpus = 1, ntasks = 1, ncgrps = 1;
struct strlist *pid_slist = NULL;
struct str_node *pos;
if (off_cpu_config(evlist) < 0) {
pr_err("Failed to config off-cpu BPF event\n");
@ -142,9 +147,34 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
}
if (target__has_task(target)) {
if (target->pid) {
pid_slist = strlist__new(target->pid, NULL);
if (!pid_slist) {
pr_err("Failed to create a strlist for pid\n");
return -1;
}
ntasks = 0;
strlist__for_each_entry(pos, pid_slist) {
char *end_ptr;
int pid = strtol(pos->s, &end_ptr, 10);
if (pid == INT_MIN || pid == INT_MAX ||
(*end_ptr != '\0' && *end_ptr != ','))
continue;
ntasks++;
}
if (ntasks < MAX_PROC)
ntasks = MAX_PROC;
bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
} else if (target__has_task(target)) {
ntasks = perf_thread_map__nr(evlist->core.threads);
bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
} else if (target__none(target)) {
bpf_map__set_max_entries(skel->maps.task_filter, MAX_PROC);
}
if (evlist__first(evlist)->cgrp) {
@ -184,7 +214,26 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
}
}
if (target__has_task(target)) {
if (target->pid) {
u8 val = 1;
skel->bss->has_task = 1;
skel->bss->uses_tgid = 1;
fd = bpf_map__fd(skel->maps.task_filter);
strlist__for_each_entry(pos, pid_slist) {
char *end_ptr;
u32 tgid;
int pid = strtol(pos->s, &end_ptr, 10);
if (pid == INT_MIN || pid == INT_MAX ||
(*end_ptr != '\0' && *end_ptr != ','))
continue;
tgid = pid;
bpf_map_update_elem(fd, &tgid, &val, BPF_ANY);
}
} else if (target__has_task(target)) {
u32 pid;
u8 val = 1;

View File

@ -12,6 +12,9 @@
#define TASK_INTERRUPTIBLE 0x0001
#define TASK_UNINTERRUPTIBLE 0x0002
/* create a new thread */
#define CLONE_THREAD 0x10000
#define MAX_STACKS 32
#define MAX_ENTRIES 102400
@ -85,6 +88,7 @@ int enabled = 0;
int has_cpu = 0;
int has_task = 0;
int has_cgroup = 0;
int uses_tgid = 0;
const volatile bool has_prev_state = false;
const volatile bool needs_cgroup = false;
@ -144,7 +148,12 @@ static inline int can_record(struct task_struct *t, int state)
if (has_task) {
__u8 *ok;
__u32 pid = t->pid;
__u32 pid;
if (uses_tgid)
pid = t->tgid;
else
pid = t->pid;
ok = bpf_map_lookup_elem(&task_filter, &pid);
if (!ok)
@ -214,6 +223,33 @@ next:
return 0;
}
SEC("tp_btf/task_newtask")
int on_newtask(u64 *ctx)
{
struct task_struct *task;
u64 clone_flags;
u32 pid;
u8 val = 1;
if (!uses_tgid)
return 0;
task = (struct task_struct *)bpf_get_current_task();
pid = BPF_CORE_READ(task, tgid);
if (!bpf_map_lookup_elem(&task_filter, &pid))
return 0;
task = (struct task_struct *)ctx[0];
clone_flags = ctx[1];
pid = task->tgid;
if (!(clone_flags & CLONE_THREAD))
bpf_map_update_elem(&task_filter, &pid, &val, BPF_NOEXIST);
return 0;
}
SEC("tp_btf/sched_switch")
int on_switch(u64 *ctx)
{

View File

@ -652,17 +652,21 @@ static char *build_id_cache__find_debug(const char *sbuild_id,
nsinfo__mountns_exit(&nsc);
#ifdef HAVE_DEBUGINFOD_SUPPORT
if (realname == NULL) {
debuginfod_client* c = debuginfod_begin();
if (c != NULL) {
int fd = debuginfod_find_debuginfo(c,
(const unsigned char*)sbuild_id, 0,
&realname);
if (fd >= 0)
close(fd); /* retaining reference by realname */
debuginfod_end(c);
}
}
if (realname == NULL) {
debuginfod_client* c;
pr_debug("Downloading debug info with build id %s\n", sbuild_id);
c = debuginfod_begin();
if (c != NULL) {
int fd = debuginfod_find_debuginfo(c,
(const unsigned char*)sbuild_id, 0,
&realname);
if (fd >= 0)
close(fd); /* retaining reference by realname */
debuginfod_end(c);
}
}
#endif
out:

View File

@ -22,7 +22,7 @@
*
* The total_period is needed because by default auto-freq is used, so
* multiplying nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
* the total number of low level events, it is necessary to to sum all struct
* the total number of low level events, it is necessary to sum all struct
* perf_record_sample.period and stash the result in total_period.
*/
struct events_stats {

View File

@ -845,8 +845,13 @@ jit_process(struct perf_session *session,
if (jit_detect(filename, pid, nsi)) {
nsinfo__put(nsi);
// Strip //anon* mmaps if we processed a jitdump for this pid
if (jit_has_pid(machine, pid) && (strncmp(filename, "//anon", 6) == 0))
/*
* Strip //anon*, [anon:* and /memfd:* mmaps if we processed a jitdump for this pid
*/
if (jit_has_pid(machine, pid) &&
((strncmp(filename, "//anon", 6) == 0) ||
(strncmp(filename, "[anon:", 6) == 0) ||
(strncmp(filename, "/memfd:", 7) == 0)))
return 1;
return 0;

View File

@ -236,6 +236,7 @@ void machine__exit(struct machine *machine)
zfree(&machine->root_dir);
zfree(&machine->mmap_name);
zfree(&machine->current_tid);
zfree(&machine->kallsyms_filename);
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
struct threads *threads = &machine->threads[i];

View File

@ -410,6 +410,11 @@ static const char * const snoop_access[] = {
"HitM",
};
static const char * const snoopx_access[] = {
"Fwd",
"Peer",
};
int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
{
size_t i, l = 0;
@ -430,13 +435,20 @@ int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
}
l += scnprintf(out + l, sz - l, snoop_access[i]);
}
if (mem_info &&
(mem_info->data_src.mem_snoopx & PERF_MEM_SNOOPX_FWD)) {
m = 0;
if (mem_info)
m = mem_info->data_src.mem_snoopx;
for (i = 0; m && i < ARRAY_SIZE(snoopx_access); i++, m >>= 1) {
if (!(m & 0x1))
continue;
if (l) {
strcat(out, " or ");
l += 4;
}
l += scnprintf(out + l, sz - l, "Fwd");
l += scnprintf(out + l, sz - l, snoopx_access[i]);
}
if (*out == '\0')
@ -513,6 +525,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
u64 op = data_src->mem_op;
u64 lvl = data_src->mem_lvl;
u64 snoop = data_src->mem_snoop;
u64 snoopx = data_src->mem_snoopx;
u64 lock = data_src->mem_lock;
u64 blk = data_src->mem_blk;
/*
@ -532,6 +545,12 @@ do { \
stats->tot_hitm++; \
} while (0)
#define PEER_INC(__f) \
do { \
stats->__f++; \
stats->tot_peer++; \
} while (0)
#define P(a, b) PERF_MEM_##a##_##b
stats->nr_entries++;
@ -555,12 +574,20 @@ do { \
if (lvl & P(LVL, IO)) stats->ld_io++;
if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
if (lvl & P(LVL, L2 )) stats->ld_l2hit++;
if (lvl & P(LVL, L2)) {
stats->ld_l2hit++;
if (snoopx & P(SNOOPX, PEER))
PEER_INC(lcl_peer);
}
if (lvl & P(LVL, L3 )) {
if (snoop & P(SNOOP, HITM))
HITM_INC(lcl_hitm);
else
stats->ld_llchit++;
if (snoopx & P(SNOOPX, PEER))
PEER_INC(lcl_peer);
}
if (lvl & P(LVL, LOC_RAM)) {
@ -585,10 +612,14 @@ do { \
if ((lvl & P(LVL, REM_CCE1)) ||
(lvl & P(LVL, REM_CCE2)) ||
mrem) {
if (snoop & P(SNOOP, HIT))
if (snoop & P(SNOOP, HIT)) {
stats->rmt_hit++;
else if (snoop & P(SNOOP, HITM))
} else if (snoop & P(SNOOP, HITM)) {
HITM_INC(rmt_hitm);
} else if (snoopx & P(SNOOPX, PEER)) {
stats->rmt_hit++;
PEER_INC(rmt_peer);
}
}
if ((lvl & P(LVL, MISS)))
@ -652,6 +683,9 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
stats->lcl_hitm += add->lcl_hitm;
stats->rmt_hitm += add->rmt_hitm;
stats->tot_hitm += add->tot_hitm;
stats->lcl_peer += add->lcl_peer;
stats->rmt_peer += add->rmt_peer;
stats->tot_peer += add->tot_peer;
stats->rmt_hit += add->rmt_hit;
stats->lcl_dram += add->lcl_dram;
stats->rmt_dram += add->rmt_dram;

View File

@ -78,6 +78,9 @@ struct c2c_stats {
u32 lcl_hitm; /* count of loads with local HITM */
u32 rmt_hitm; /* count of loads with remote HITM */
u32 tot_hitm; /* count of loads with local and remote HITM */
u32 lcl_peer; /* count of loads with local peer cache */
u32 rmt_peer; /* count of loads with remote peer cache */
u32 tot_peer; /* count of loads with local and remote peer cache */
u32 rmt_hit; /* count of loads with remote hit clean; */
u32 lcl_dram; /* count of loads miss to local DRAM */
u32 rmt_dram; /* count of loads miss to remote DRAM */

View File

@ -502,14 +502,14 @@ struct metricgroup_print_sys_idata {
bool details;
};
typedef int (*metricgroup_sys_event_iter_fn)(const struct pmu_event *pe, void *);
struct metricgroup_iter_data {
metricgroup_sys_event_iter_fn fn;
pmu_event_iter_fn fn;
void *data;
};
static int metricgroup__sys_event_iter(const struct pmu_event *pe, void *data)
static int metricgroup__sys_event_iter(const struct pmu_event *pe,
const struct pmu_events_table *table,
void *data)
{
struct metricgroup_iter_data *d = data;
struct perf_pmu *pmu = NULL;
@ -522,13 +522,15 @@ static int metricgroup__sys_event_iter(const struct pmu_event *pe, void *data)
if (!pmu->id || strcmp(pmu->id, pe->compat))
continue;
return d->fn(pe, d->data);
return d->fn(pe, table, d->data);
}
return 0;
}
static int metricgroup__print_sys_event_iter(const struct pmu_event *pe, void *data)
static int metricgroup__print_sys_event_iter(const struct pmu_event *pe,
const struct pmu_events_table *table __maybe_unused,
void *data)
{
struct metricgroup_print_sys_idata *d = data;
@ -536,15 +538,40 @@ static int metricgroup__print_sys_event_iter(const struct pmu_event *pe, void *d
d->details, d->groups, d->metriclist);
}
struct metricgroup_print_data {
const char *pmu_name;
struct strlist *metriclist;
char *filter;
struct rblist *groups;
bool metricgroups;
bool raw;
bool details;
};
static int metricgroup__print_callback(const struct pmu_event *pe,
const struct pmu_events_table *table __maybe_unused,
void *vdata)
{
struct metricgroup_print_data *data = vdata;
if (!pe->metric_expr)
return 0;
if (data->pmu_name && perf_pmu__is_hybrid(pe->pmu) && strcmp(data->pmu_name, pe->pmu))
return 0;
return metricgroup__print_pmu_event(pe, data->metricgroups, data->filter,
data->raw, data->details, data->groups,
data->metriclist);
}
void metricgroup__print(bool metrics, bool metricgroups, char *filter,
bool raw, bool details, const char *pmu_name)
{
const struct pmu_events_map *map = pmu_events_map__find();
const struct pmu_event *pe;
int i;
struct rblist groups;
struct rb_node *node, *next;
struct strlist *metriclist = NULL;
const struct pmu_events_table *table;
if (!metricgroups) {
metriclist = strlist__new(NULL, NULL);
@ -556,23 +583,22 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
groups.node_new = mep_new;
groups.node_cmp = mep_cmp;
groups.node_delete = mep_delete;
for (i = 0; map; i++) {
pe = &map->table[i];
table = pmu_events_table__find();
if (table) {
struct metricgroup_print_data data = {
.pmu_name = pmu_name,
.metriclist = metriclist,
.metricgroups = metricgroups,
.filter = filter,
.raw = raw,
.details = details,
.groups = &groups,
};
if (!pe->name && !pe->metric_group && !pe->metric_name)
break;
if (!pe->metric_expr)
continue;
if (pmu_name && perf_pmu__is_hybrid(pe->pmu) &&
strcmp(pmu_name, pe->pmu)) {
continue;
}
if (metricgroup__print_pmu_event(pe, metricgroups, filter,
raw, details, &groups,
metriclist) < 0)
return;
pmu_events_table_for_each_event(table,
metricgroup__print_callback,
&data);
}
{
struct metricgroup_iter_data data = {
.fn = metricgroup__print_sys_event_iter,
@ -850,16 +876,20 @@ struct metricgroup_add_iter_data {
bool metric_no_group;
struct metric *root_metric;
const struct visited_metric *visited;
const struct pmu_events_map *map;
const struct pmu_events_table *table;
};
static bool metricgroup__find_metric(const char *metric,
const struct pmu_events_table *table,
struct pmu_event *pe);
static int add_metric(struct list_head *metric_list,
const struct pmu_event *pe,
const char *modifier,
bool metric_no_group,
struct metric *root_metric,
const struct visited_metric *visited,
const struct pmu_events_map *map);
const struct pmu_events_table *table);
/**
* resolve_metric - Locate metrics within the root metric and recursively add
@ -874,7 +904,7 @@ static int add_metric(struct list_head *metric_list,
* metrics. When adding a root this argument is NULL.
* @visited: A singly linked list of metric names being added that is used to
* detect recursion.
* @map: The map that is searched for metrics, most commonly the table for the
* @table: The table that is searched for metrics, most commonly the table for the
* architecture perf is running upon.
*/
static int resolve_metric(struct list_head *metric_list,
@ -882,13 +912,13 @@ static int resolve_metric(struct list_head *metric_list,
bool metric_no_group,
struct metric *root_metric,
const struct visited_metric *visited,
const struct pmu_events_map *map)
const struct pmu_events_table *table)
{
struct hashmap_entry *cur;
size_t bkt;
struct to_resolve {
/* The metric to resolve. */
const struct pmu_event *pe;
struct pmu_event pe;
/*
* The key in the IDs map, this may differ from in case,
* etc. from pe->metric_name.
@ -902,16 +932,15 @@ static int resolve_metric(struct list_head *metric_list,
* the pending array.
*/
hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) {
const struct pmu_event *pe;
struct pmu_event pe;
pe = metricgroup__find_metric(cur->key, map);
if (pe) {
if (metricgroup__find_metric(cur->key, table, &pe)) {
pending = realloc(pending,
(pending_cnt + 1) * sizeof(struct to_resolve));
if (!pending)
return -ENOMEM;
pending[pending_cnt].pe = pe;
memcpy(&pending[pending_cnt].pe, &pe, sizeof(pe));
pending[pending_cnt].key = cur->key;
pending_cnt++;
}
@ -926,8 +955,8 @@ static int resolve_metric(struct list_head *metric_list,
* context.
*/
for (i = 0; i < pending_cnt; i++) {
ret = add_metric(metric_list, pending[i].pe, modifier, metric_no_group,
root_metric, visited, map);
ret = add_metric(metric_list, &pending[i].pe, modifier, metric_no_group,
root_metric, visited, table);
if (ret)
break;
}
@ -950,7 +979,7 @@ static int resolve_metric(struct list_head *metric_list,
* metrics. When adding a root this argument is NULL.
* @visited: A singly linked list of metric names being added that is used to
* detect recursion.
* @map: The map that is searched for metrics, most commonly the table for the
* @table: The table that is searched for metrics, most commonly the table for the
* architecture perf is running upon.
*/
static int __add_metric(struct list_head *metric_list,
@ -960,7 +989,7 @@ static int __add_metric(struct list_head *metric_list,
int runtime,
struct metric *root_metric,
const struct visited_metric *visited,
const struct pmu_events_map *map)
const struct pmu_events_table *table)
{
const struct visited_metric *vm;
int ret;
@ -1032,7 +1061,7 @@ static int __add_metric(struct list_head *metric_list,
} else {
/* Resolve referenced metrics. */
ret = resolve_metric(metric_list, modifier, metric_no_group, root_metric,
&visited_node, map);
&visited_node, table);
}
if (ret) {
@ -1045,30 +1074,35 @@ static int __add_metric(struct list_head *metric_list,
return ret;
}
#define map_for_each_event(__pe, __idx, __map) \
if (__map) \
for (__idx = 0, __pe = &__map->table[__idx]; \
__pe->name || __pe->metric_group || __pe->metric_name; \
__pe = &__map->table[++__idx])
struct metricgroup__find_metric_data {
const char *metric;
struct pmu_event *pe;
};
#define map_for_each_metric(__pe, __idx, __map, __metric) \
map_for_each_event(__pe, __idx, __map) \
if (__pe->metric_expr && \
(match_metric(__pe->metric_group, __metric) || \
match_metric(__pe->metric_name, __metric)))
const struct pmu_event *metricgroup__find_metric(const char *metric,
const struct pmu_events_map *map)
static int metricgroup__find_metric_callback(const struct pmu_event *pe,
const struct pmu_events_table *table __maybe_unused,
void *vdata)
{
const struct pmu_event *pe;
int i;
struct metricgroup__find_metric_data *data = vdata;
map_for_each_event(pe, i, map) {
if (match_metric(pe->metric_name, metric))
return pe;
}
if (!match_metric(pe->metric_name, data->metric))
return 0;
return NULL;
memcpy(data->pe, pe, sizeof(*pe));
return 1;
}
static bool metricgroup__find_metric(const char *metric,
const struct pmu_events_table *table,
struct pmu_event *pe)
{
struct metricgroup__find_metric_data data = {
.metric = metric,
.pe = pe,
};
return pmu_events_table_for_each_event(table, metricgroup__find_metric_callback, &data)
? true : false;
}
static int add_metric(struct list_head *metric_list,
@ -1077,7 +1111,7 @@ static int add_metric(struct list_head *metric_list,
bool metric_no_group,
struct metric *root_metric,
const struct visited_metric *visited,
const struct pmu_events_map *map)
const struct pmu_events_table *table)
{
int ret = 0;
@ -1085,7 +1119,7 @@ static int add_metric(struct list_head *metric_list,
if (!strstr(pe->metric_expr, "?")) {
ret = __add_metric(metric_list, pe, modifier, metric_no_group, 0,
root_metric, visited, map);
root_metric, visited, table);
} else {
int j, count;
@ -1098,14 +1132,15 @@ static int add_metric(struct list_head *metric_list,
for (j = 0; j < count && !ret; j++)
ret = __add_metric(metric_list, pe, modifier, metric_no_group, j,
root_metric, visited, map);
root_metric, visited, table);
}
return ret;
}
static int metricgroup__add_metric_sys_event_iter(const struct pmu_event *pe,
void *data)
const struct pmu_events_table *table __maybe_unused,
void *data)
{
struct metricgroup_add_iter_data *d = data;
int ret;
@ -1114,7 +1149,7 @@ static int metricgroup__add_metric_sys_event_iter(const struct pmu_event *pe,
return 0;
ret = add_metric(d->metric_list, pe, d->modifier, d->metric_no_group,
d->root_metric, d->visited, d->map);
d->root_metric, d->visited, d->table);
if (ret)
goto out;
@ -1152,6 +1187,33 @@ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l,
return right_count - left_count;
}
struct metricgroup__add_metric_data {
struct list_head *list;
const char *metric_name;
const char *modifier;
bool metric_no_group;
bool has_match;
};
static int metricgroup__add_metric_callback(const struct pmu_event *pe,
const struct pmu_events_table *table,
void *vdata)
{
struct metricgroup__add_metric_data *data = vdata;
int ret = 0;
if (pe->metric_expr &&
(match_metric(pe->metric_group, data->metric_name) ||
match_metric(pe->metric_name, data->metric_name))) {
data->has_match = true;
ret = add_metric(data->list, pe, data->modifier, data->metric_no_group,
/*root_metric=*/NULL,
/*visited_metrics=*/NULL, table);
}
return ret;
}
/**
* metricgroup__add_metric - Find and add a metric, or a metric group.
* @metric_name: The name of the metric or metric group. For example, "IPC"
@ -1162,32 +1224,37 @@ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l,
* global. Grouping is the default but due to multiplexing the
* user may override.
* @metric_list: The list that the metric or metric group are added to.
* @map: The map that is searched for metrics, most commonly the table for the
* @table: The table that is searched for metrics, most commonly the table for the
* architecture perf is running upon.
*/
static int metricgroup__add_metric(const char *metric_name, const char *modifier,
bool metric_no_group,
struct list_head *metric_list,
const struct pmu_events_map *map)
const struct pmu_events_table *table)
{
const struct pmu_event *pe;
LIST_HEAD(list);
int i, ret;
int ret;
bool has_match = false;
/*
* Iterate over all metrics seeing if metric matches either the name or
* group. When it does add the metric to the list.
*/
map_for_each_metric(pe, i, map, metric_name) {
has_match = true;
ret = add_metric(&list, pe, modifier, metric_no_group,
/*root_metric=*/NULL,
/*visited_metrics=*/NULL, map);
{
struct metricgroup__add_metric_data data = {
.list = &list,
.metric_name = metric_name,
.modifier = modifier,
.metric_no_group = metric_no_group,
.has_match = false,
};
/*
* Iterate over all metrics seeing if metric matches either the
* name or group. When it does add the metric to the list.
*/
ret = pmu_events_table_for_each_event(table, metricgroup__add_metric_callback,
&data);
if (ret)
goto out;
}
has_match = data.has_match;
}
{
struct metricgroup_iter_data data = {
.fn = metricgroup__add_metric_sys_event_iter,
@ -1198,7 +1265,7 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier
.metric_no_group = metric_no_group,
.has_match = &has_match,
.ret = &ret,
.map = map,
.table = table,
},
};
@ -1227,12 +1294,12 @@ out:
* global. Grouping is the default but due to multiplexing the
* user may override.
* @metric_list: The list that metrics are added to.
* @map: The map that is searched for metrics, most commonly the table for the
* @table: The table that is searched for metrics, most commonly the table for the
* architecture perf is running upon.
*/
static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
struct list_head *metric_list,
const struct pmu_events_map *map)
const struct pmu_events_table *table)
{
char *list_itr, *list_copy, *metric_name, *modifier;
int ret, count = 0;
@ -1249,7 +1316,7 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
ret = metricgroup__add_metric(metric_name, modifier,
metric_no_group, metric_list,
map);
table);
if (ret == -EINVAL)
pr_err("Cannot find metric or group `%s'\n", metric_name);
@ -1440,7 +1507,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
bool metric_no_merge,
struct perf_pmu *fake_pmu,
struct rblist *metric_events_list,
const struct pmu_events_map *map)
const struct pmu_events_table *table)
{
struct evlist *combined_evlist = NULL;
LIST_HEAD(metric_list);
@ -1451,7 +1518,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
if (metric_events_list->nr_entries == 0)
metricgroup__rblist_init(metric_events_list);
ret = metricgroup__add_metric_list(str, metric_no_group,
&metric_list, map);
&metric_list, table);
if (ret)
goto out;
@ -1586,43 +1653,47 @@ int metricgroup__parse_groups(const struct option *opt,
struct rblist *metric_events)
{
struct evlist *perf_evlist = *(struct evlist **)opt->value;
const struct pmu_events_map *map = pmu_events_map__find();
const struct pmu_events_table *table = pmu_events_table__find();
return parse_groups(perf_evlist, str, metric_no_group,
metric_no_merge, NULL, metric_events, map);
metric_no_merge, NULL, metric_events, table);
}
int metricgroup__parse_groups_test(struct evlist *evlist,
const struct pmu_events_map *map,
const struct pmu_events_table *table,
const char *str,
bool metric_no_group,
bool metric_no_merge,
struct rblist *metric_events)
{
return parse_groups(evlist, str, metric_no_group,
metric_no_merge, &perf_pmu__fake, metric_events, map);
metric_no_merge, &perf_pmu__fake, metric_events, table);
}
static int metricgroup__has_metric_callback(const struct pmu_event *pe,
const struct pmu_events_table *table __maybe_unused,
void *vdata)
{
const char *metric = vdata;
if (!pe->metric_expr)
return 0;
if (match_metric(pe->metric_name, metric))
return 1;
return 0;
}
bool metricgroup__has_metric(const char *metric)
{
const struct pmu_events_map *map = pmu_events_map__find();
const struct pmu_event *pe;
int i;
const struct pmu_events_table *table = pmu_events_table__find();
if (!map)
if (!table)
return false;
for (i = 0; ; i++) {
pe = &map->table[i];
if (!pe->name && !pe->metric_group && !pe->metric_name)
break;
if (!pe->metric_expr)
continue;
if (match_metric(pe->metric_name, metric))
return true;
}
return false;
return pmu_events_table_for_each_event(table, metricgroup__has_metric_callback,
(void *)metric) ? true : false;
}
int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,

View File

@ -11,7 +11,6 @@ struct evlist;
struct evsel;
struct option;
struct rblist;
struct pmu_events_map;
struct cgroup;
/**
@ -70,10 +69,8 @@ int metricgroup__parse_groups(const struct option *opt,
bool metric_no_group,
bool metric_no_merge,
struct rblist *metric_events);
const struct pmu_event *metricgroup__find_metric(const char *metric,
const struct pmu_events_map *map);
int metricgroup__parse_groups_test(struct evlist *evlist,
const struct pmu_events_map *map,
const struct pmu_events_table *table,
const char *str,
bool metric_no_group,
bool metric_no_merge,

Some files were not shown because too many files have changed in this diff Show More