delayacct: track delays from IRQ/SOFTIRQ

Delay accounting does not track the delay of IRQ/SOFTIRQ.  While
IRQ/SOFTIRQ could have obvious impact on some workloads productivity, such
as when workloads are running on system which is busy handling network
IRQ/SOFTIRQ.

Get the delay of IRQ/SOFTIRQ could help users to reduce such delay.  Such
as setting interrupt affinity or task affinity, using kernel thread for
NAPI etc.  This is inspired by "sched/psi: Add PSI_IRQ to track
IRQ/SOFTIRQ pressure"[1].  Also fix some code indent problems of older
code.

And update tools/accounting/getdelays.c:
    / # ./getdelays -p 156 -di
    print delayacct stats ON
    printing IO accounting
    PID     156

    CPU             count     real total  virtual total    delay total  delay average
                       15       15836008       16218149      275700790         18.380ms
    IO              count    delay total  delay average
                        0              0          0.000ms
    SWAP            count    delay total  delay average
                        0              0          0.000ms
    RECLAIM         count    delay total  delay average
                        0              0          0.000ms
    THRASHING       count    delay total  delay average
                        0              0          0.000ms
    COMPACT         count    delay total  delay average
                        0              0          0.000ms
    WPCOPY          count    delay total  delay average
                       36        7586118          0.211ms
    IRQ             count    delay total  delay average
                       42         929161          0.022ms

[1] commit 52b1364ba0b1("sched/psi: Add PSI_IRQ to track IRQ/SOFTIRQ pressure")

Link: https://lkml.kernel.org/r/202304081728353557233@zte.com.cn
Signed-off-by: Yang Yang <yang.yang29@zte.com.cn>
Cc: Jiang Xuexin <jiang.xuexin@zte.com.cn>
Cc: wangyong <wang.yong12@zte.com.cn>
Cc: junhua huang <huang.junhua@zte.com.cn>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Yang Yang 2023-04-08 17:28:35 +08:00 committed by Andrew Morton
parent 29692fc92c
commit a3b2aeac9d
6 changed files with 58 additions and 15 deletions

View File

@ -16,6 +16,7 @@ d) memory reclaim
e) thrashing e) thrashing
f) direct compact f) direct compact
g) write-protect copy g) write-protect copy
h) IRQ/SOFTIRQ
and makes these statistics available to userspace through and makes these statistics available to userspace through
the taskstats interface. the taskstats interface.
@ -49,7 +50,7 @@ this structure. See
for a description of the fields pertaining to delay accounting. for a description of the fields pertaining to delay accounting.
It will generally be in the form of counters returning the cumulative It will generally be in the form of counters returning the cumulative
delay seen for cpu, sync block I/O, swapin, memory reclaim, thrash page delay seen for cpu, sync block I/O, swapin, memory reclaim, thrash page
cache, direct compact, write-protect copy etc. cache, direct compact, write-protect copy, IRQ/SOFTIRQ etc.
Taking the difference of two successive readings of a given Taking the difference of two successive readings of a given
counter (say cpu_delay_total) for a task will give the delay counter (say cpu_delay_total) for a task will give the delay
@ -118,7 +119,9 @@ Get sum of delays, since system boot, for all pids with tgid 5::
0 0 0.000ms 0 0 0.000ms
COMPACT count delay total delay average COMPACT count delay total delay average
0 0 0.000ms 0 0 0.000ms
WPCOPY count delay total delay average WPCOPY count delay total delay average
0 0 0.000ms
IRQ count delay total delay average
0 0 0.000ms 0 0 0.000ms
Get IO accounting for pid 1, it works only with -p:: Get IO accounting for pid 1, it works only with -p::

View File

@ -48,10 +48,13 @@ struct task_delay_info {
u64 wpcopy_start; u64 wpcopy_start;
u64 wpcopy_delay; /* wait for write-protect copy */ u64 wpcopy_delay; /* wait for write-protect copy */
u64 irq_delay; /* wait for IRQ/SOFTIRQ */
u32 freepages_count; /* total count of memory reclaim */ u32 freepages_count; /* total count of memory reclaim */
u32 thrashing_count; /* total count of thrash waits */ u32 thrashing_count; /* total count of thrash waits */
u32 compact_count; /* total count of memory compact */ u32 compact_count; /* total count of memory compact */
u32 wpcopy_count; /* total count of write-protect copy */ u32 wpcopy_count; /* total count of write-protect copy */
u32 irq_count; /* total count of IRQ/SOFTIRQ */
}; };
#endif #endif
@ -81,6 +84,7 @@ extern void __delayacct_compact_start(void);
extern void __delayacct_compact_end(void); extern void __delayacct_compact_end(void);
extern void __delayacct_wpcopy_start(void); extern void __delayacct_wpcopy_start(void);
extern void __delayacct_wpcopy_end(void); extern void __delayacct_wpcopy_end(void);
extern void __delayacct_irq(struct task_struct *task, u32 delta);
static inline void delayacct_tsk_init(struct task_struct *tsk) static inline void delayacct_tsk_init(struct task_struct *tsk)
{ {
@ -215,6 +219,15 @@ static inline void delayacct_wpcopy_end(void)
__delayacct_wpcopy_end(); __delayacct_wpcopy_end();
} }
static inline void delayacct_irq(struct task_struct *task, u32 delta)
{
if (!static_branch_unlikely(&delayacct_key))
return;
if (task->delays)
__delayacct_irq(task, delta);
}
#else #else
static inline void delayacct_init(void) static inline void delayacct_init(void)
{} {}
@ -253,6 +266,8 @@ static inline void delayacct_wpcopy_start(void)
{} {}
static inline void delayacct_wpcopy_end(void) static inline void delayacct_wpcopy_end(void)
{} {}
static inline void delayacct_irq(struct task_struct *task, u32 delta)
{}
#endif /* CONFIG_TASK_DELAY_ACCT */ #endif /* CONFIG_TASK_DELAY_ACCT */

View File

@ -34,7 +34,7 @@
*/ */
#define TASKSTATS_VERSION 13 #define TASKSTATS_VERSION 14
#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
* in linux/sched.h */ * in linux/sched.h */
@ -198,6 +198,10 @@ struct taskstats {
/* v13: Delay waiting for write-protect copy */ /* v13: Delay waiting for write-protect copy */
__u64 wpcopy_count; __u64 wpcopy_count;
__u64 wpcopy_delay_total; __u64 wpcopy_delay_total;
/* v14: Delay waiting for IRQ/SOFTIRQ */
__u64 irq_count;
__u64 irq_delay_total;
}; };

View File

@ -179,12 +179,15 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp; d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp;
tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay; tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay;
d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp; d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp;
tmp = d->irq_delay_total + tsk->delays->irq_delay;
d->irq_delay_total = (tmp < d->irq_delay_total) ? 0 : tmp;
d->blkio_count += tsk->delays->blkio_count; d->blkio_count += tsk->delays->blkio_count;
d->swapin_count += tsk->delays->swapin_count; d->swapin_count += tsk->delays->swapin_count;
d->freepages_count += tsk->delays->freepages_count; d->freepages_count += tsk->delays->freepages_count;
d->thrashing_count += tsk->delays->thrashing_count; d->thrashing_count += tsk->delays->thrashing_count;
d->compact_count += tsk->delays->compact_count; d->compact_count += tsk->delays->compact_count;
d->wpcopy_count += tsk->delays->wpcopy_count; d->wpcopy_count += tsk->delays->wpcopy_count;
d->irq_count += tsk->delays->irq_count;
raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
return 0; return 0;
@ -274,3 +277,14 @@ void __delayacct_wpcopy_end(void)
&current->delays->wpcopy_delay, &current->delays->wpcopy_delay,
&current->delays->wpcopy_count); &current->delays->wpcopy_count);
} }
void __delayacct_irq(struct task_struct *task, u32 delta)
{
unsigned long flags;
raw_spin_lock_irqsave(&task->delays->lock, flags);
task->delays->irq_delay += delta;
task->delays->irq_count++;
raw_spin_unlock_irqrestore(&task->delays->lock, flags);
}

View File

@ -704,6 +704,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
rq->prev_irq_time += irq_delta; rq->prev_irq_time += irq_delta;
delta -= irq_delta; delta -= irq_delta;
psi_account_irqtime(rq->curr, irq_delta); psi_account_irqtime(rq->curr, irq_delta);
delayacct_irq(rq->curr, irq_delta);
#endif #endif
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
if (static_key_false((&paravirt_steal_rq_enabled))) { if (static_key_false((&paravirt_steal_rq_enabled))) {

View File

@ -198,17 +198,19 @@ static void print_delayacct(struct taskstats *t)
printf("\n\nCPU %15s%15s%15s%15s%15s\n" printf("\n\nCPU %15s%15s%15s%15s%15s\n"
" %15llu%15llu%15llu%15llu%15.3fms\n" " %15llu%15llu%15llu%15llu%15.3fms\n"
"IO %15s%15s%15s\n" "IO %15s%15s%15s\n"
" %15llu%15llu%15.3fms\n" " %15llu%15llu%15.3fms\n"
"SWAP %15s%15s%15s\n" "SWAP %15s%15s%15s\n"
" %15llu%15llu%15.3fms\n" " %15llu%15llu%15.3fms\n"
"RECLAIM %12s%15s%15s\n" "RECLAIM %12s%15s%15s\n"
" %15llu%15llu%15.3fms\n" " %15llu%15llu%15.3fms\n"
"THRASHING%12s%15s%15s\n" "THRASHING%12s%15s%15s\n"
" %15llu%15llu%15.3fms\n" " %15llu%15llu%15.3fms\n"
"COMPACT %12s%15s%15s\n" "COMPACT %12s%15s%15s\n"
" %15llu%15llu%15.3fms\n" " %15llu%15llu%15.3fms\n"
"WPCOPY %12s%15s%15s\n" "WPCOPY %12s%15s%15s\n"
" %15llu%15llu%15.3fms\n", " %15llu%15llu%15.3fms\n"
"IRQ %15s%15s%15s\n"
" %15llu%15llu%15.3fms\n",
"count", "real total", "virtual total", "count", "real total", "virtual total",
"delay total", "delay average", "delay total", "delay average",
(unsigned long long)t->cpu_count, (unsigned long long)t->cpu_count,
@ -219,27 +221,31 @@ static void print_delayacct(struct taskstats *t)
"count", "delay total", "delay average", "count", "delay total", "delay average",
(unsigned long long)t->blkio_count, (unsigned long long)t->blkio_count,
(unsigned long long)t->blkio_delay_total, (unsigned long long)t->blkio_delay_total,
average_ms((double)t->blkio_delay_total, t->blkio_count), average_ms((double)t->blkio_delay_total, t->blkio_count),
"count", "delay total", "delay average", "count", "delay total", "delay average",
(unsigned long long)t->swapin_count, (unsigned long long)t->swapin_count,
(unsigned long long)t->swapin_delay_total, (unsigned long long)t->swapin_delay_total,
average_ms((double)t->swapin_delay_total, t->swapin_count), average_ms((double)t->swapin_delay_total, t->swapin_count),
"count", "delay total", "delay average", "count", "delay total", "delay average",
(unsigned long long)t->freepages_count, (unsigned long long)t->freepages_count,
(unsigned long long)t->freepages_delay_total, (unsigned long long)t->freepages_delay_total,
average_ms((double)t->freepages_delay_total, t->freepages_count), average_ms((double)t->freepages_delay_total, t->freepages_count),
"count", "delay total", "delay average", "count", "delay total", "delay average",
(unsigned long long)t->thrashing_count, (unsigned long long)t->thrashing_count,
(unsigned long long)t->thrashing_delay_total, (unsigned long long)t->thrashing_delay_total,
average_ms((double)t->thrashing_delay_total, t->thrashing_count), average_ms((double)t->thrashing_delay_total, t->thrashing_count),
"count", "delay total", "delay average", "count", "delay total", "delay average",
(unsigned long long)t->compact_count, (unsigned long long)t->compact_count,
(unsigned long long)t->compact_delay_total, (unsigned long long)t->compact_delay_total,
average_ms((double)t->compact_delay_total, t->compact_count), average_ms((double)t->compact_delay_total, t->compact_count),
"count", "delay total", "delay average", "count", "delay total", "delay average",
(unsigned long long)t->wpcopy_count, (unsigned long long)t->wpcopy_count,
(unsigned long long)t->wpcopy_delay_total, (unsigned long long)t->wpcopy_delay_total,
average_ms((double)t->wpcopy_delay_total, t->wpcopy_count)); average_ms((double)t->wpcopy_delay_total, t->wpcopy_count),
"count", "delay total", "delay average",
(unsigned long long)t->irq_count,
(unsigned long long)t->irq_delay_total,
average_ms((double)t->irq_delay_total, t->irq_count));
} }
static void task_context_switch_counts(struct taskstats *t) static void task_context_switch_counts(struct taskstats *t)