perf session: Load data directory files for analysis

Load data directory files and provide basic raw dump and aggregated
analysis support of data directories in report mode, still with no
memory consumption optimizations.

READER_MAX_SIZE is chosen based on the results of measurements on
different machines on perf.data directory sizes >1GB. On machines
with big core count (192 cores) the difference between 1MB and 2MB
is about 4%. Other sizes (>2MB) are quite equal to 2MB.
On machines with small core count (4-24) there is no differences
between 1-16 MB sizes. So this constant is 2MB.

Suggested-by: Jiri Olsa <jolsa@kernel.org>
Reviewed-by: Riccardo Mancini <rickyman7@gmail.com>
Signed-off-by: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Riccardo Mancini <rickyman7@gmail.com>
Acked-by: Namhyung Kim <namhyung@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Antonov <alexander.antonov@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Budankov <abudankov@huawei.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/3f10c13a226c0ceb53e88a082f847b91c1ae2c25.1642440724.git.alexey.v.bayduraev@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Alexey Bayduraev 2022-01-17 21:34:35 +03:00 committed by Arnaldo Carvalho de Melo
parent b5f2511d4b
commit bb6be405c4

View file

@ -2186,6 +2186,8 @@ struct reader {
u64 file_pos;
u64 file_offset;
u64 head;
u64 size;
bool done;
struct zstd_data zstd_data;
struct decomp_data decomp_data;
};
@ -2303,6 +2305,7 @@ reader__read_event(struct reader *rd, struct perf_session *session,
if (skip)
size += skip;
rd->size += size;
rd->head += size;
rd->file_pos += size;
@ -2411,6 +2414,133 @@ static int __perf_session__process_events(struct perf_session *session)
return err;
}
/*
* Processing 2 MB of data from each reader in sequence,
* because that's the way the ordered events sorting works
* most efficiently.
*/
#define READER_MAX_SIZE (2 * 1024 * 1024)
/*
* This function reads, merge and process directory data.
* It assumens the version 1 of directory data, where each
* data file holds per-cpu data, already sorted by kernel.
*/
static int __perf_session__process_dir_events(struct perf_session *session)
{
struct perf_data *data = session->data;
struct perf_tool *tool = session->tool;
int i, ret, readers, nr_readers;
struct ui_progress prog;
u64 total_size = perf_data__size(session->data);
struct reader *rd;
perf_tool__fill_defaults(tool);
ui_progress__init_size(&prog, total_size, "Sorting events...");
nr_readers = 1;
for (i = 0; i < data->dir.nr; i++) {
if (data->dir.files[i].size)
nr_readers++;
}
rd = zalloc(nr_readers * sizeof(struct reader));
if (!rd)
return -ENOMEM;
rd[0] = (struct reader) {
.fd = perf_data__fd(session->data),
.data_size = session->header.data_size,
.data_offset = session->header.data_offset,
.process = process_simple,
.in_place_update = session->data->in_place_update,
};
ret = reader__init(&rd[0], NULL);
if (ret)
goto out_err;
ret = reader__mmap(&rd[0], session);
if (ret)
goto out_err;
readers = 1;
for (i = 0; i < data->dir.nr; i++) {
if (!data->dir.files[i].size)
continue;
rd[readers] = (struct reader) {
.fd = data->dir.files[i].fd,
.data_size = data->dir.files[i].size,
.data_offset = 0,
.process = process_simple,
.in_place_update = session->data->in_place_update,
};
ret = reader__init(&rd[readers], NULL);
if (ret)
goto out_err;
ret = reader__mmap(&rd[readers], session);
if (ret)
goto out_err;
readers++;
}
i = 0;
while (readers) {
if (session_done())
break;
if (rd[i].done) {
i = (i + 1) % nr_readers;
continue;
}
if (reader__eof(&rd[i])) {
rd[i].done = true;
readers--;
continue;
}
session->active_decomp = &rd[i].decomp_data;
ret = reader__read_event(&rd[i], session, &prog);
if (ret < 0) {
goto out_err;
} else if (ret == READER_NODATA) {
ret = reader__mmap(&rd[i], session);
if (ret)
goto out_err;
}
if (rd[i].size >= READER_MAX_SIZE) {
rd[i].size = 0;
i = (i + 1) % nr_readers;
}
}
ret = ordered_events__flush(&session->ordered_events, OE_FLUSH__FINAL);
if (ret)
goto out_err;
ret = perf_session__flush_thread_stacks(session);
out_err:
ui_progress__finish();
if (!tool->no_warn)
perf_session__warn_about_errors(session);
/*
* We may switching perf.data output, make ordered_events
* reusable.
*/
ordered_events__reinit(&session->ordered_events);
session->one_mmap = false;
session->active_decomp = &session->decomp_data;
for (i = 0; i < nr_readers; i++)
reader__release_decomp(&rd[i]);
zfree(&rd);
return ret;
}
int perf_session__process_events(struct perf_session *session)
{
if (perf_session__register_idle_thread(session) < 0)
@ -2419,6 +2549,9 @@ int perf_session__process_events(struct perf_session *session)
if (perf_data__is_pipe(session->data))
return __perf_session__process_pipe_events(session);
if (perf_data__is_dir(session->data))
return __perf_session__process_dir_events(session);
return __perf_session__process_events(session);
}