md/bitmap: factor out a helper to set timeout

Register/unregister 'mddev->thread' are both under 'reconfig_mutex',
however, some context didn't hold the mutex to access mddev->thread,
which can cause null-ptr-deference:

1) md_bitmap_daemon_work() can be called from md_check_recovery() where
'reconfig_mutex' is not held, deference 'mddev->thread' might cause
null-ptr-deference, because md_unregister_thread() reset the pointer
before stopping the thread.

2) timeout_store() access 'mddev->thread' multiple times,
null-ptr-deference can be triggered if 'mddev->thread' is reset in the
middle.

This patch factor out a helper to set timeout, the new helper always
check if 'mddev->thread' is null first, so that problem 1 can be fixed.

Now that this helper only access 'mddev->thread' once, but it's possible
that 'mddev->thread' can be freed while this helper is still in progress,
hence the problem is not fixed yet. Follow up patches will fix this by
protecting md_thread with rcu.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230523021017.3048783-5-yukuai1@huaweicloud.com
This commit is contained in:
Yu Kuai 2023-05-23 10:10:16 +08:00 committed by Song Liu
parent c333673a78
commit 4eeb6535cd

View file

@ -1234,11 +1234,22 @@ static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap,
sector_t offset, sector_t *blocks,
int create);
static void mddev_set_timeout(struct mddev *mddev, unsigned long timeout,
bool force)
{
struct md_thread *thread = mddev->thread;
if (!thread)
return;
if (force || thread->timeout < MAX_SCHEDULE_TIMEOUT)
thread->timeout = timeout;
}
/*
* bitmap daemon -- periodically wakes up to clean bits and flush pages
* out to disk
*/
void md_bitmap_daemon_work(struct mddev *mddev)
{
struct bitmap *bitmap;
@ -1262,7 +1273,7 @@ void md_bitmap_daemon_work(struct mddev *mddev)
bitmap->daemon_lastrun = jiffies;
if (bitmap->allclean) {
mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true);
goto done;
}
bitmap->allclean = 1;
@ -1359,8 +1370,7 @@ void md_bitmap_daemon_work(struct mddev *mddev)
done:
if (bitmap->allclean == 0)
mddev->thread->timeout =
mddev->bitmap_info.daemon_sleep;
mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true);
mutex_unlock(&mddev->bitmap_info.mutex);
}
@ -1821,8 +1831,7 @@ void md_bitmap_destroy(struct mddev *mddev)
mddev->bitmap = NULL; /* disconnect from the md device */
spin_unlock(&mddev->lock);
mutex_unlock(&mddev->bitmap_info.mutex);
if (mddev->thread)
mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true);
md_bitmap_free(bitmap);
}
@ -1965,7 +1974,7 @@ int md_bitmap_load(struct mddev *mddev)
/* Kick recovery in case any bits were set */
set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true);
md_wakeup_thread(mddev->thread);
md_bitmap_update_sb(bitmap);
@ -2470,17 +2479,11 @@ timeout_store(struct mddev *mddev, const char *buf, size_t len)
timeout = MAX_SCHEDULE_TIMEOUT-1;
if (timeout < 1)
timeout = 1;
mddev->bitmap_info.daemon_sleep = timeout;
if (mddev->thread) {
/* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then
* the bitmap is all clean and we don't need to
* adjust the timeout right now
*/
if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT)
mddev->thread->timeout = timeout;
}
mddev->bitmap_info.daemon_sleep = timeout;
mddev_set_timeout(mddev, timeout, false);
md_wakeup_thread(mddev->thread);
return len;
}