perf cs-etm: Handle PERF_RECORD_AUX_OUTPUT_HW_ID packet

When using dynamically assigned CoreSight trace IDs the drivers can output
the ID / CPU association as a PERF_RECORD_AUX_OUTPUT_HW_ID packet.

Update cs-etm decoder to handle this packet by setting the CPU/Trace ID
mapping.

Reviewed-by: James Clark <james.clark@arm.com>
Signed-off-by: Mike Leach <mike.leach@linaro.org>
Acked-by: Suzuki Poulouse <suzuki.poulose@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Darren Hart <darren@os.amperecomputing.com>
Cc: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Cc: coresight@lists.linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lore.kernel.org/r/20230331055645.26918-2-mike.leach@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Mike Leach 2023-03-31 06:56:45 +01:00 committed by Arnaldo Carvalho de Melo
parent e5fa5b4110
commit b6521ea2a0
3 changed files with 250 additions and 18 deletions

View File

@ -31,6 +31,9 @@
*/
#define CORESIGHT_TRACE_ID_UNUSED_FLAG BIT(31)
/* Value to set for unused trace ID values */
#define CORESIGHT_TRACE_ID_UNUSED_VAL 0x7F
/*
* Below are the definition of bit offsets for perf option, and works as
* arbitrary values for all ETM versions.
@ -55,4 +58,16 @@
#define ETM4_CFG_BIT_RETSTK 12
#define ETM4_CFG_BIT_VMID_OPT 15
/*
* Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload.
* Used to associate a CPU with the CoreSight Trace ID.
* [07:00] - Trace ID - uses 8 bits to make value easy to read in file.
* [59:08] - Unused (SBZ)
* [63:60] - Version
*/
#define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0)
#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60)
#define CS_AUX_HW_ID_CURR_VERSION 0
#endif

View File

@ -668,6 +668,7 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
switch (t_params->protocol) {
case CS_ETM_PROTO_ETMV3:
case CS_ETM_PROTO_PTM:
csid = (t_params->etmv3.reg_idr & CORESIGHT_TRACE_ID_VAL_MASK);
cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3);
decoder->decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ?
OCSD_BUILTIN_DCD_ETMV3 :
@ -675,11 +676,13 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
trace_config = &config_etmv3;
break;
case CS_ETM_PROTO_ETMV4i:
csid = (t_params->etmv4.reg_traceidr & CORESIGHT_TRACE_ID_VAL_MASK);
cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4);
decoder->decoder_name = OCSD_BUILTIN_DCD_ETMV4I;
trace_config = &trace_config_etmv4;
break;
case CS_ETM_PROTO_ETE:
csid = (t_params->ete.reg_traceidr & CORESIGHT_TRACE_ID_VAL_MASK);
cs_etm_decoder__gen_ete_config(t_params, &trace_config_ete);
decoder->decoder_name = OCSD_BUILTIN_DCD_ETE;
trace_config = &trace_config_ete;
@ -688,6 +691,10 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
return -1;
}
/* if the CPU has no trace ID associated, no decoder needed */
if (csid == CORESIGHT_TRACE_ID_UNUSED_VAL)
return 0;
if (d_params->operation == CS_ETM_OPERATION_DECODE) {
if (ocsd_dt_create_decoder(decoder->dcd_tree,
decoder->decoder_name,

View File

@ -220,6 +220,143 @@ static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
return 0;
}
static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
{
u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
switch (cs_etm_magic) {
case __perf_cs_etmv3_magic:
*trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
CORESIGHT_TRACE_ID_VAL_MASK);
break;
case __perf_cs_etmv4_magic:
case __perf_cs_ete_magic:
*trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
CORESIGHT_TRACE_ID_VAL_MASK);
break;
default:
return -EINVAL;
}
return 0;
}
/*
* update metadata trace ID from the value found in the AUX_HW_INFO packet.
* This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present.
*/
static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
{
u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
switch (cs_etm_magic) {
case __perf_cs_etmv3_magic:
cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
break;
case __perf_cs_etmv4_magic:
case __perf_cs_ete_magic:
cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
break;
default:
return -EINVAL;
}
return 0;
}
/*
* FIELD_GET (linux/bitfield.h) not available outside kernel code,
* and the header contains too many dependencies to just copy over,
* so roll our own based on the original
*/
#define __bf_shf(x) (__builtin_ffsll(x) - 1)
#define FIELD_GET(_mask, _reg) \
({ \
(typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \
})
/*
* Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
*
* The payload associates the Trace ID and the CPU.
* The routine is tolerant of seeing multiple packets with the same association,
* but a CPU / Trace ID association changing during a session is an error.
*/
static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
union perf_event *event)
{
struct cs_etm_auxtrace *etm;
struct perf_sample sample;
struct int_node *inode;
struct evsel *evsel;
u64 *cpu_data;
u64 hw_id;
int cpu, version, err;
u8 trace_chan_id, curr_chan_id;
/* extract and parse the HW ID */
hw_id = event->aux_output_hw_id.hw_id;
version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
/* check that we can handle this version */
if (version > CS_AUX_HW_ID_CURR_VERSION)
return -EINVAL;
/* get access to the etm metadata */
etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
if (!etm || !etm->metadata)
return -EINVAL;
/* parse the sample to get the CPU */
evsel = evlist__event2evsel(session->evlist, event);
if (!evsel)
return -EINVAL;
err = evsel__parse_sample(evsel, event, &sample);
if (err)
return err;
cpu = sample.cpu;
if (cpu == -1) {
/* no CPU in the sample - possibly recorded with an old version of perf */
pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
return -EINVAL;
}
/* See if the ID is mapped to a CPU, and it matches the current CPU */
inode = intlist__find(traceid_list, trace_chan_id);
if (inode) {
cpu_data = inode->priv;
if ((int)cpu_data[CS_ETM_CPU] != cpu) {
pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
return -EINVAL;
}
/* check that the mapped ID matches */
err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data);
if (err)
return err;
if (curr_chan_id != trace_chan_id) {
pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
return -EINVAL;
}
/* mapped and matched - return OK */
return 0;
}
/* not one we've seen before - lets map it */
cpu_data = etm->metadata[cpu];
err = cs_etm__map_trace_id(trace_chan_id, cpu_data);
if (err)
return err;
/*
* if we are picking up the association from the packet, need to plug
* the correct trace ID into the metadata for setting up decoders later.
*/
err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
return err;
}
void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
u8 trace_chan_id)
{
@ -2668,11 +2805,16 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
}
/*
* In per-thread mode, CPU is set to -1, but TID will be set instead. See
* auxtrace_mmap_params__set_idx(). Return 'not found' if neither CPU nor TID match.
* In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
* auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
* CPU as we set this always for the AUX_OUTPUT_HW_ID event.
* So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
* Return 'not found' if mismatch.
*/
if ((auxtrace_event->cpu == (__u32) -1 && auxtrace_event->tid != sample->tid) ||
auxtrace_event->cpu != sample->cpu)
if (auxtrace_event->cpu == (__u32) -1) {
if (auxtrace_event->tid != sample->tid)
return 1;
} else if (auxtrace_event->cpu != sample->cpu)
return 1;
if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
@ -2721,6 +2863,17 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
return 1;
}
static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
u64 offset __maybe_unused, void *data __maybe_unused)
{
/* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
(*(int *)data)++; /* increment found count */
return cs_etm__process_aux_output_hw_id(session, event);
}
return 0;
}
static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
u64 offset __maybe_unused, void *data __maybe_unused)
{
@ -2839,13 +2992,13 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
cs_etm_magic = metadata[i][CS_ETM_MAGIC];
switch (cs_etm_magic) {
case __perf_cs_etmv3_magic:
trace_chan_id = (u8)((metadata[i][CS_ETM_ETMTRACEIDR]) &
CORESIGHT_TRACE_ID_VAL_MASK);
metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
break;
case __perf_cs_etmv4_magic:
case __perf_cs_ete_magic:
trace_chan_id = (u8)((metadata[i][CS_ETMV4_TRCTRACEIDR]) &
CORESIGHT_TRACE_ID_VAL_MASK);
metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
break;
default:
/* unknown magic number */
@ -2858,6 +3011,35 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
return 0;
}
/*
* If we found AUX_HW_ID packets, then set any metadata marked as unused to the
* unused value to reduce the number of unneeded decoders created.
*/
static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
{
u64 cs_etm_magic;
int i;
for (i = 0; i < num_cpu; i++) {
cs_etm_magic = metadata[i][CS_ETM_MAGIC];
switch (cs_etm_magic) {
case __perf_cs_etmv3_magic:
if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
break;
case __perf_cs_etmv4_magic:
case __perf_cs_ete_magic:
if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
break;
default:
/* unknown magic number */
return -EINVAL;
}
}
return 0;
}
int cs_etm__process_auxtrace_info_full(union perf_event *event,
struct perf_session *session)
{
@ -2869,6 +3051,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
int priv_size = 0;
int num_cpu;
int err = 0;
int aux_hw_id_found;
int i, j;
u64 *ptr = NULL;
u64 **metadata = NULL;
@ -3017,8 +3200,43 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
if (err)
goto err_delete_thread;
/* before aux records are queued, need to map metadata to trace IDs */
err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
/*
* Map Trace ID values to CPU metadata.
*
* Trace metadata will always contain Trace ID values from the legacy algorithm. If the
* files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata
* ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set.
*
* The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
* the same IDs as the old algorithm as far as is possible, unless there are clashes
* in which case a different value will be used. This means an older perf may still
* be able to record and read files generate on a newer system.
*
* For a perf able to interpret AUX_HW_ID packets we first check for the presence of
* those packets. If they are there then the values will be mapped and plugged into
* the metadata. We then set any remaining metadata values with the used flag to a
* value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required.
*
* If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
* then we map Trace ID values to CPU directly from the metadata - clearing any unused
* flags if present.
*/
/* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
aux_hw_id_found = 0;
err = perf_session__peek_events(session, session->header.data_offset,
session->header.data_size,
cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
if (err)
goto err_delete_thread;
/* if HW ID found then clear any unused metadata ID values */
if (aux_hw_id_found)
err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
/* otherwise, this is a file with metadata values only, map from metadata */
else
err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
if (err)
goto err_delete_thread;
@ -3027,14 +3245,6 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
goto err_delete_thread;
etm->data_queued = etm->queues.populated;
/*
* Print warning in pipe mode, see cs_etm__process_auxtrace_event() and
* cs_etm__queue_aux_fragment() for details relating to limitations.
*/
if (!etm->data_queued)
pr_warning("CS ETM warning: Coresight decode and TRBE support requires random file access.\n"
"Continuing with best effort decoding in piped mode.\n\n");
return 0;
err_delete_thread: