perf/x86/intel/pt: Add support for address range filtering in PT

Newer versions of Intel PT support address ranges, which can be used to
define IP address range-based filters or TraceSTOP regions. Number of
ranges in enumerated via cpuid.

This patch implements PMU callbacks and related low-level code to allow
filter validation, configuration and programming into the hardware.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: vince@deater.net
Link: http://lkml.kernel.org/r/1461771888-10409-7-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Alexander Shishkin 2016-04-27 18:44:47 +03:00 committed by Ingo Molnar
parent 375637bc52
commit eadf48cab4
2 changed files with 194 additions and 11 deletions

View File

@ -265,6 +265,75 @@ static bool pt_event_valid(struct perf_event *event)
* These all are cpu affine and operate on a local PT
*/
/* Address ranges and their corresponding msr configuration registers */
static const struct pt_address_range {
unsigned long msr_a;
unsigned long msr_b;
unsigned int reg_off;
} pt_address_ranges[] = {
{
.msr_a = MSR_IA32_RTIT_ADDR0_A,
.msr_b = MSR_IA32_RTIT_ADDR0_B,
.reg_off = RTIT_CTL_ADDR0_OFFSET,
},
{
.msr_a = MSR_IA32_RTIT_ADDR1_A,
.msr_b = MSR_IA32_RTIT_ADDR1_B,
.reg_off = RTIT_CTL_ADDR1_OFFSET,
},
{
.msr_a = MSR_IA32_RTIT_ADDR2_A,
.msr_b = MSR_IA32_RTIT_ADDR2_B,
.reg_off = RTIT_CTL_ADDR2_OFFSET,
},
{
.msr_a = MSR_IA32_RTIT_ADDR3_A,
.msr_b = MSR_IA32_RTIT_ADDR3_B,
.reg_off = RTIT_CTL_ADDR3_OFFSET,
}
};
static u64 pt_config_filters(struct perf_event *event)
{
struct pt_filters *filters = event->hw.addr_filters;
struct pt *pt = this_cpu_ptr(&pt_ctx);
unsigned int range = 0;
u64 rtit_ctl = 0;
if (!filters)
return 0;
perf_event_addr_filters_sync(event);
for (range = 0; range < filters->nr_filters; range++) {
struct pt_filter *filter = &filters->filter[range];
/*
* Note, if the range has zero start/end addresses due
* to its dynamic object not being loaded yet, we just
* go ahead and program zeroed range, which will simply
* produce no data. Note^2: if executable code at 0x0
* is a concern, we can set up an "invalid" configuration
* such as msr_b < msr_a.
*/
/* avoid redundant msr writes */
if (pt->filters.filter[range].msr_a != filter->msr_a) {
wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a);
pt->filters.filter[range].msr_a = filter->msr_a;
}
if (pt->filters.filter[range].msr_b != filter->msr_b) {
wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b);
pt->filters.filter[range].msr_b = filter->msr_b;
}
rtit_ctl |= filter->config << pt_address_ranges[range].reg_off;
}
return rtit_ctl;
}
static void pt_config(struct perf_event *event)
{
u64 reg;
@ -274,7 +343,8 @@ static void pt_config(struct perf_event *event)
wrmsrl(MSR_IA32_RTIT_STATUS, 0);
}
reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
reg = pt_config_filters(event);
reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
if (!event->attr.exclude_kernel)
reg |= RTIT_CTL_OS;
@ -921,6 +991,82 @@ static void pt_buffer_free_aux(void *data)
kfree(buf);
}
static int pt_addr_filters_init(struct perf_event *event)
{
struct pt_filters *filters;
int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
if (!pt_cap_get(PT_CAP_num_address_ranges))
return 0;
filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node);
if (!filters)
return -ENOMEM;
if (event->parent)
memcpy(filters, event->parent->hw.addr_filters,
sizeof(*filters));
event->hw.addr_filters = filters;
return 0;
}
static void pt_addr_filters_fini(struct perf_event *event)
{
kfree(event->hw.addr_filters);
event->hw.addr_filters = NULL;
}
static int pt_event_addr_filters_validate(struct list_head *filters)
{
struct perf_addr_filter *filter;
int range = 0;
list_for_each_entry(filter, filters, entry) {
/* PT doesn't support single address triggers */
if (!filter->range)
return -EOPNOTSUPP;
if (!filter->inode && !kernel_ip(filter->offset))
return -EINVAL;
if (++range > pt_cap_get(PT_CAP_num_address_ranges))
return -EOPNOTSUPP;
}
return 0;
}
static void pt_event_addr_filters_sync(struct perf_event *event)
{
struct perf_addr_filters_head *head = perf_event_addr_filters(event);
unsigned long msr_a, msr_b, *offs = event->addr_filters_offs;
struct pt_filters *filters = event->hw.addr_filters;
struct perf_addr_filter *filter;
int range = 0;
if (!filters)
return;
list_for_each_entry(filter, &head->list, entry) {
if (filter->inode && !offs[range]) {
msr_a = msr_b = 0;
} else {
/* apply the offset */
msr_a = filter->offset + offs[range];
msr_b = filter->size + msr_a;
}
filters->filter[range].msr_a = msr_a;
filters->filter[range].msr_b = msr_b;
filters->filter[range].config = filter->filter ? 1 : 2;
range++;
}
filters->nr_filters = range;
}
/**
* intel_pt_interrupt() - PT PMI handler
*/
@ -1128,6 +1274,7 @@ static void pt_event_read(struct perf_event *event)
static void pt_event_destroy(struct perf_event *event)
{
pt_addr_filters_fini(event);
x86_del_exclusive(x86_lbr_exclusive_pt);
}
@ -1142,6 +1289,11 @@ static int pt_event_init(struct perf_event *event)
if (x86_add_exclusive(x86_lbr_exclusive_pt))
return -EBUSY;
if (pt_addr_filters_init(event)) {
x86_del_exclusive(x86_lbr_exclusive_pt);
return -ENOMEM;
}
event->destroy = pt_event_destroy;
return 0;
@ -1195,16 +1347,21 @@ static __init int pt_init(void)
PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
pt_pmu.pmu.attr_groups = pt_attr_groups;
pt_pmu.pmu.task_ctx_nr = perf_sw_context;
pt_pmu.pmu.event_init = pt_event_init;
pt_pmu.pmu.add = pt_event_add;
pt_pmu.pmu.del = pt_event_del;
pt_pmu.pmu.start = pt_event_start;
pt_pmu.pmu.stop = pt_event_stop;
pt_pmu.pmu.read = pt_event_read;
pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
pt_pmu.pmu.free_aux = pt_buffer_free_aux;
pt_pmu.pmu.attr_groups = pt_attr_groups;
pt_pmu.pmu.task_ctx_nr = perf_sw_context;
pt_pmu.pmu.event_init = pt_event_init;
pt_pmu.pmu.add = pt_event_add;
pt_pmu.pmu.del = pt_event_del;
pt_pmu.pmu.start = pt_event_start;
pt_pmu.pmu.stop = pt_event_stop;
pt_pmu.pmu.read = pt_event_read;
pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
pt_pmu.pmu.free_aux = pt_buffer_free_aux;
pt_pmu.pmu.addr_filters_sync = pt_event_addr_filters_sync;
pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate;
pt_pmu.pmu.nr_addr_filters =
pt_cap_get(PT_CAP_num_address_ranges);
ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
return ret;

View File

@ -140,14 +140,40 @@ struct pt_buffer {
struct topa_entry *topa_index[0];
};
#define PT_FILTERS_NUM 4
/**
* struct pt_filter - IP range filter configuration
* @msr_a: range start, goes to RTIT_ADDRn_A
* @msr_b: range end, goes to RTIT_ADDRn_B
* @config: 4-bit field in RTIT_CTL
*/
struct pt_filter {
unsigned long msr_a;
unsigned long msr_b;
unsigned long config;
};
/**
* struct pt_filters - IP range filtering context
* @filter: filters defined for this context
* @nr_filters: number of defined filters in the @filter array
*/
struct pt_filters {
struct pt_filter filter[PT_FILTERS_NUM];
unsigned int nr_filters;
};
/**
* struct pt - per-cpu pt context
* @handle: perf output handle
* @filters: last configured filters
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
* @vmx_on: 1 if VMX is ON on this cpu
*/
struct pt {
struct perf_output_handle handle;
struct pt_filters filters;
int handle_nmi;
int vmx_on;
};