delayacct: support swapin delay accounting for swapping without blkio

Currently delayacct accounts swapin delay only for swapping that cause
blkio.  If we use zram for swapping, tools/accounting/getdelays can't
get any SWAP delay.

It's useful to get zram swapin delay information, for example to adjust
compress algorithm or /proc/sys/vm/swappiness.

Reference to PSI, it accounts any kind of swapping by doing its work in
swap_readpage(), no matter whether swapping causes blkio.  Let delayacct
do the similar work.

Link: https://lkml.kernel.org/r/20211112083813.8559-1-yang.yang29@zte.com.cn
Signed-off-by: Yang Yang <yang.yang29@zte.com.cn>
Reported-by: Zeal Robot <zealci@zte.com.cn>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Yang Yang 2022-01-19 18:10:02 -08:00 committed by Linus Torvalds
parent e83a4472bf
commit a3d5dc908a
4 changed files with 43 additions and 41 deletions

View File

@ -9,14 +9,6 @@
#include <uapi/linux/taskstats.h>
/*
* Per-task flags relevant to delay accounting
* maintained privately to avoid exhausting similar flags in sched.h:PF_*
* Used to set current->delays->flags
*/
#define DELAYACCT_PF_SWAPIN 0x00000001 /* I am doing a swapin */
#define DELAYACCT_PF_BLKIO 0x00000002 /* I am waiting on IO */
#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info {
raw_spinlock_t lock;
@ -37,13 +29,13 @@ struct task_delay_info {
* associated with the operation is added to XXX_delay.
* XXX_delay contains the accumulated delay time in nanoseconds.
*/
u64 blkio_start; /* Shared by blkio, swapin */
u64 blkio_start;
u64 blkio_delay; /* wait for sync block io completion */
u64 swapin_delay; /* wait for swapin block io completion */
u64 swapin_start;
u64 swapin_delay; /* wait for swapin */
u32 blkio_count; /* total count of the number of sync block */
/* io operations performed */
u32 swapin_count; /* total count of the number of swapin block */
/* io operations performed */
u32 swapin_count; /* total count of swapin */
u64 freepages_start;
u64 freepages_delay; /* wait for memory reclaim */
@ -79,14 +71,8 @@ extern void __delayacct_freepages_start(void);
extern void __delayacct_freepages_end(void);
extern void __delayacct_thrashing_start(void);
extern void __delayacct_thrashing_end(void);
static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
{
if (p->delays)
return (p->delays->flags & DELAYACCT_PF_BLKIO);
else
return 0;
}
extern void __delayacct_swapin_start(void);
extern void __delayacct_swapin_end(void);
static inline void delayacct_set_flag(struct task_struct *p, int flag)
{
@ -123,7 +109,6 @@ static inline void delayacct_blkio_start(void)
if (!static_branch_unlikely(&delayacct_key))
return;
delayacct_set_flag(current, DELAYACCT_PF_BLKIO);
if (current->delays)
__delayacct_blkio_start();
}
@ -135,7 +120,6 @@ static inline void delayacct_blkio_end(struct task_struct *p)
if (p->delays)
__delayacct_blkio_end(p);
delayacct_clear_flag(p, DELAYACCT_PF_BLKIO);
}
static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
@ -169,6 +153,18 @@ static inline void delayacct_thrashing_end(void)
__delayacct_thrashing_end();
}
static inline void delayacct_swapin_start(void)
{
if (current->delays)
__delayacct_swapin_start();
}
static inline void delayacct_swapin_end(void)
{
if (current->delays)
__delayacct_swapin_end();
}
#else
static inline void delayacct_set_flag(struct task_struct *p, int flag)
{}
@ -199,6 +195,10 @@ static inline void delayacct_thrashing_start(void)
{}
static inline void delayacct_thrashing_end(void)
{}
static inline void delayacct_swapin_start(void)
{}
static inline void delayacct_swapin_end(void)
{}
#endif /* CONFIG_TASK_DELAY_ACCT */

View File

@ -100,19 +100,10 @@ void __delayacct_blkio_start(void)
*/
void __delayacct_blkio_end(struct task_struct *p)
{
struct task_delay_info *delays = p->delays;
u64 *total;
u32 *count;
if (p->delays->flags & DELAYACCT_PF_SWAPIN) {
total = &delays->swapin_delay;
count = &delays->swapin_count;
} else {
total = &delays->blkio_delay;
count = &delays->blkio_count;
}
delayacct_end(&delays->lock, &delays->blkio_start, total, count);
delayacct_end(&p->delays->lock,
&p->delays->blkio_start,
&p->delays->blkio_delay,
&p->delays->blkio_count);
}
int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
@ -179,8 +170,7 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk)
unsigned long flags;
raw_spin_lock_irqsave(&tsk->delays->lock, flags);
ret = nsec_to_clock_t(tsk->delays->blkio_delay +
tsk->delays->swapin_delay);
ret = nsec_to_clock_t(tsk->delays->blkio_delay);
raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
return ret;
}
@ -210,3 +200,16 @@ void __delayacct_thrashing_end(void)
&current->delays->thrashing_delay,
&current->delays->thrashing_count);
}
void __delayacct_swapin_start(void)
{
current->delays->swapin_start = local_clock();
}
void __delayacct_swapin_end(void)
{
delayacct_end(&current->delays->lock,
&current->delays->swapin_start,
&current->delays->swapin_delay,
&current->delays->swapin_count);
}

View File

@ -3507,7 +3507,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (unlikely(!si))
goto out;
delayacct_set_flag(current, DELAYACCT_PF_SWAPIN);
page = lookup_swap_cache(entry, vma, vmf->address);
swapcache = page;
@ -3555,7 +3554,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
vmf->address, &vmf->ptl);
if (likely(pte_same(*vmf->pte, vmf->orig_pte)))
ret = VM_FAULT_OOM;
delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
goto unlock;
}
@ -3569,13 +3567,11 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
* owner processes (which may be unknown at hwpoison time)
*/
ret = VM_FAULT_HWPOISON;
delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
goto out_release;
}
locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags);
delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
if (!locked) {
ret |= VM_FAULT_RETRY;
goto out_release;

View File

@ -25,6 +25,7 @@
#include <linux/psi.h>
#include <linux/uio.h>
#include <linux/sched/task.h>
#include <linux/delayacct.h>
void end_swap_bio_write(struct bio *bio)
{
@ -370,6 +371,7 @@ int swap_readpage(struct page *page, bool synchronous)
* significant part of overall IO time.
*/
psi_memstall_enter(&pflags);
delayacct_swapin_start();
if (frontswap_load(page) == 0) {
SetPageUptodate(page);
@ -432,6 +434,7 @@ int swap_readpage(struct page *page, bool synchronous)
out:
psi_memstall_leave(&pflags);
delayacct_swapin_end();
return ret;
}