timer_migration: Add tracepoints

The timer pull logic needs proper debugging aids. Add tracepoints so the
hierarchical idle machinery can be diagnosed.

Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240222103403.31923-1-anna-maria@linutronix.de
This commit is contained in:
Anna-Maria Behnsen 2024-02-22 11:34:03 +01:00 committed by Thomas Gleixner
parent 7ee9887703
commit 36e40df35d
3 changed files with 325 additions and 0 deletions

View File

@ -17499,6 +17499,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
F: fs/timerfd.c
F: include/linux/time_namespace.h
F: include/linux/timer*
F: include/trace/events/timer*
F: kernel/time/*timer*
F: kernel/time/namespace.c

View File

@ -0,0 +1,298 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM timer_migration
#if !defined(_TRACE_TIMER_MIGRATION_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_TIMER_MIGRATION_H
#include <linux/tracepoint.h>
/* Group events */
TRACE_EVENT(tmigr_group_set,
TP_PROTO(struct tmigr_group *group),
TP_ARGS(group),
TP_STRUCT__entry(
__field( void *, group )
__field( unsigned int, lvl )
__field( unsigned int, numa_node )
),
TP_fast_assign(
__entry->group = group;
__entry->lvl = group->level;
__entry->numa_node = group->numa_node;
),
TP_printk("group=%p lvl=%d numa=%d",
__entry->group, __entry->lvl, __entry->numa_node)
);
TRACE_EVENT(tmigr_connect_child_parent,
TP_PROTO(struct tmigr_group *child),
TP_ARGS(child),
TP_STRUCT__entry(
__field( void *, child )
__field( void *, parent )
__field( unsigned int, lvl )
__field( unsigned int, numa_node )
__field( unsigned int, num_children )
__field( u32, childmask )
),
TP_fast_assign(
__entry->child = child;
__entry->parent = child->parent;
__entry->lvl = child->parent->level;
__entry->numa_node = child->parent->numa_node;
__entry->num_children = child->parent->num_children;
__entry->childmask = child->childmask;
),
TP_printk("group=%p childmask=%0x parent=%p lvl=%d numa=%d num_children=%d",
__entry->child, __entry->childmask, __entry->parent,
__entry->lvl, __entry->numa_node, __entry->num_children)
);
TRACE_EVENT(tmigr_connect_cpu_parent,
TP_PROTO(struct tmigr_cpu *tmc),
TP_ARGS(tmc),
TP_STRUCT__entry(
__field( void *, parent )
__field( unsigned int, cpu )
__field( unsigned int, lvl )
__field( unsigned int, numa_node )
__field( unsigned int, num_children )
__field( u32, childmask )
),
TP_fast_assign(
__entry->parent = tmc->tmgroup;
__entry->cpu = tmc->cpuevt.cpu;
__entry->lvl = tmc->tmgroup->level;
__entry->numa_node = tmc->tmgroup->numa_node;
__entry->num_children = tmc->tmgroup->num_children;
__entry->childmask = tmc->childmask;
),
TP_printk("cpu=%d childmask=%0x parent=%p lvl=%d numa=%d num_children=%d",
__entry->cpu, __entry->childmask, __entry->parent,
__entry->lvl, __entry->numa_node, __entry->num_children)
);
DECLARE_EVENT_CLASS(tmigr_group_and_cpu,
TP_PROTO(struct tmigr_group *group, union tmigr_state state, u32 childmask),
TP_ARGS(group, state, childmask),
TP_STRUCT__entry(
__field( void *, group )
__field( void *, parent )
__field( unsigned int, lvl )
__field( unsigned int, numa_node )
__field( u32, childmask )
__field( u8, active )
__field( u8, migrator )
),
TP_fast_assign(
__entry->group = group;
__entry->parent = group->parent;
__entry->lvl = group->level;
__entry->numa_node = group->numa_node;
__entry->childmask = childmask;
__entry->active = state.active;
__entry->migrator = state.migrator;
),
TP_printk("group=%p lvl=%d numa=%d active=%0x migrator=%0x "
"parent=%p childmask=%0x",
__entry->group, __entry->lvl, __entry->numa_node,
__entry->active, __entry->migrator,
__entry->parent, __entry->childmask)
);
DEFINE_EVENT(tmigr_group_and_cpu, tmigr_group_set_cpu_inactive,
TP_PROTO(struct tmigr_group *group, union tmigr_state state, u32 childmask),
TP_ARGS(group, state, childmask)
);
DEFINE_EVENT(tmigr_group_and_cpu, tmigr_group_set_cpu_active,
TP_PROTO(struct tmigr_group *group, union tmigr_state state, u32 childmask),
TP_ARGS(group, state, childmask)
);
/* CPU events*/
DECLARE_EVENT_CLASS(tmigr_cpugroup,
TP_PROTO(struct tmigr_cpu *tmc),
TP_ARGS(tmc),
TP_STRUCT__entry(
__field( u64, wakeup )
__field( void *, parent )
__field( unsigned int, cpu )
),
TP_fast_assign(
__entry->wakeup = tmc->wakeup;
__entry->parent = tmc->tmgroup;
__entry->cpu = tmc->cpuevt.cpu;
),
TP_printk("cpu=%d parent=%p wakeup=%llu", __entry->cpu, __entry->parent, __entry->wakeup)
);
DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_new_timer,
TP_PROTO(struct tmigr_cpu *tmc),
TP_ARGS(tmc)
);
DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_active,
TP_PROTO(struct tmigr_cpu *tmc),
TP_ARGS(tmc)
);
DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_online,
TP_PROTO(struct tmigr_cpu *tmc),
TP_ARGS(tmc)
);
DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_offline,
TP_PROTO(struct tmigr_cpu *tmc),
TP_ARGS(tmc)
);
DEFINE_EVENT(tmigr_cpugroup, tmigr_handle_remote_cpu,
TP_PROTO(struct tmigr_cpu *tmc),
TP_ARGS(tmc)
);
DECLARE_EVENT_CLASS(tmigr_idle,
TP_PROTO(struct tmigr_cpu *tmc, u64 nextevt),
TP_ARGS(tmc, nextevt),
TP_STRUCT__entry(
__field( u64, nextevt)
__field( u64, wakeup)
__field( void *, parent)
__field( unsigned int, cpu)
),
TP_fast_assign(
__entry->nextevt = nextevt;
__entry->wakeup = tmc->wakeup;
__entry->parent = tmc->tmgroup;
__entry->cpu = tmc->cpuevt.cpu;
),
TP_printk("cpu=%d parent=%p nextevt=%llu wakeup=%llu",
__entry->cpu, __entry->parent, __entry->nextevt, __entry->wakeup)
);
DEFINE_EVENT(tmigr_idle, tmigr_cpu_idle,
TP_PROTO(struct tmigr_cpu *tmc, u64 nextevt),
TP_ARGS(tmc, nextevt)
);
DEFINE_EVENT(tmigr_idle, tmigr_cpu_new_timer_idle,
TP_PROTO(struct tmigr_cpu *tmc, u64 nextevt),
TP_ARGS(tmc, nextevt)
);
TRACE_EVENT(tmigr_update_events,
TP_PROTO(struct tmigr_group *child, struct tmigr_group *group,
union tmigr_state childstate, union tmigr_state groupstate,
u64 nextevt),
TP_ARGS(child, group, childstate, groupstate, nextevt),
TP_STRUCT__entry(
__field( void *, child )
__field( void *, group )
__field( u64, nextevt )
__field( u64, group_next_expiry )
__field( u64, child_evt_expiry )
__field( unsigned int, group_lvl )
__field( unsigned int, child_evtcpu )
__field( u8, child_active )
__field( u8, group_active )
),
TP_fast_assign(
__entry->child = child;
__entry->group = group;
__entry->nextevt = nextevt;
__entry->group_next_expiry = group->next_expiry;
__entry->child_evt_expiry = child ? child->groupevt.nextevt.expires : 0;
__entry->group_lvl = group->level;
__entry->child_evtcpu = child ? child->groupevt.cpu : 0;
__entry->child_active = childstate.active;
__entry->group_active = groupstate.active;
),
TP_printk("child=%p group=%p group_lvl=%d child_active=%0x group_active=%0x "
"nextevt=%llu next_expiry=%llu child_evt_expiry=%llu child_evtcpu=%d",
__entry->child, __entry->group, __entry->group_lvl, __entry->child_active,
__entry->group_active,
__entry->nextevt, __entry->group_next_expiry, __entry->child_evt_expiry,
__entry->child_evtcpu)
);
TRACE_EVENT(tmigr_handle_remote,
TP_PROTO(struct tmigr_group *group),
TP_ARGS(group),
TP_STRUCT__entry(
__field( void * , group )
__field( unsigned int , lvl )
),
TP_fast_assign(
__entry->group = group;
__entry->lvl = group->level;
),
TP_printk("group=%p lvl=%d",
__entry->group, __entry->lvl)
);
#endif /* _TRACE_TIMER_MIGRATION_H */
/* This part must be outside protection */
#include <trace/define_trace.h>

View File

@ -14,6 +14,9 @@
#include "timer_migration.h"
#include "tick-internal.h"
#define CREATE_TRACE_POINTS
#include <trace/events/timer_migration.h>
/*
* The timer migration mechanism is built on a hierarchy of groups. The
* lowest level group contains CPUs, the next level groups of CPU groups
@ -663,6 +666,8 @@ static bool tmigr_active_up(struct tmigr_group *group,
*/
group->groupevt.ignore = true;
trace_tmigr_group_set_cpu_active(group, newstate, childmask);
return walk_done;
}
@ -672,6 +677,8 @@ static void __tmigr_cpu_activate(struct tmigr_cpu *tmc)
data.childmask = tmc->childmask;
trace_tmigr_cpu_active(tmc);
tmc->cpuevt.ignore = true;
WRITE_ONCE(tmc->wakeup, KTIME_MAX);
@ -829,6 +836,9 @@ check_toplvl:
data->firstexp = tmigr_next_groupevt_expires(group);
}
trace_tmigr_update_events(child, group, childstate, groupstate,
nextexp);
unlock:
raw_spin_unlock(&group->lock);
@ -863,6 +873,8 @@ static u64 tmigr_new_timer(struct tmigr_cpu *tmc, u64 nextexp)
if (tmc->remote)
return KTIME_MAX;
trace_tmigr_cpu_new_timer(tmc);
tmc->cpuevt.ignore = false;
data.remote = false;
@ -904,6 +916,8 @@ static void tmigr_handle_remote_cpu(unsigned int cpu, u64 now,
return;
}
trace_tmigr_handle_remote_cpu(tmc);
tmc->remote = true;
WRITE_ONCE(tmc->wakeup, KTIME_MAX);
@ -984,6 +998,7 @@ static bool tmigr_handle_remote_up(struct tmigr_group *group,
childmask = data->childmask;
trace_tmigr_handle_remote(group);
again:
/*
* Handle the group only if @childmask is the migrator or if the
@ -1206,6 +1221,7 @@ u64 tmigr_cpu_new_timer(u64 nextexp)
*/
WRITE_ONCE(tmc->wakeup, ret);
trace_tmigr_cpu_new_timer_idle(tmc, nextexp);
raw_spin_unlock(&tmc->lock);
return ret;
}
@ -1298,6 +1314,8 @@ static bool tmigr_inactive_up(struct tmigr_group *group,
*/
WARN_ON_ONCE(data->firstexp != KTIME_MAX && group->parent);
trace_tmigr_group_set_cpu_inactive(group, newstate, childmask);
return walk_done;
}
@ -1350,6 +1368,7 @@ u64 tmigr_cpu_deactivate(u64 nextexp)
*/
WRITE_ONCE(tmc->wakeup, ret);
trace_tmigr_cpu_idle(tmc, nextexp);
raw_spin_unlock(&tmc->lock);
return ret;
}
@ -1467,6 +1486,7 @@ static struct tmigr_group *tmigr_get_group(unsigned int cpu, int node,
/* Setup successful. Add it to the hierarchy */
list_add(&group->list, &tmigr_level_list[lvl]);
trace_tmigr_group_set(group);
return group;
}
@ -1484,6 +1504,8 @@ static void tmigr_connect_child_parent(struct tmigr_group *child,
raw_spin_unlock(&parent->lock);
raw_spin_unlock_irq(&child->lock);
trace_tmigr_connect_child_parent(child);
/*
* To prevent inconsistent states, active children need to be active in
* the new parent as well. Inactive children are already marked inactive
@ -1575,6 +1597,8 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node)
raw_spin_unlock_irq(&group->lock);
trace_tmigr_connect_cpu_parent(tmc);
/* There are no children that need to be connected */
continue;
} else {
@ -1642,6 +1666,7 @@ static int tmigr_cpu_online(unsigned int cpu)
WRITE_ONCE(tmc->wakeup, KTIME_MAX);
}
raw_spin_lock_irq(&tmc->lock);
trace_tmigr_cpu_online(tmc);
tmc->idle = timer_base_is_idle();
if (!tmc->idle)
__tmigr_cpu_activate(tmc);
@ -1681,6 +1706,7 @@ static int tmigr_cpu_offline(unsigned int cpu)
* offline; Therefore nextevt value is set to KTIME_MAX
*/
firstexp = __tmigr_cpu_deactivate(tmc, KTIME_MAX);
trace_tmigr_cpu_offline(tmc);
raw_spin_unlock_irq(&tmc->lock);
if (firstexp != KTIME_MAX) {