btrfs: introduce offload_csum_mode to tweak checksum offloading behavior

We disable offloading checksum to workqueues and do it synchronously when
the checksum algorithm is fast. However, as reported in the link below,
RAID0 with multiple devices may suffer from the sync checksum, because
"fast checksum" is still not fast enough to catch up with RAID0 writing.

We don't have an effective way to determine whether to offload or not,
for now add a sysfs knob so this can be debugged. This is intentionally
under CONFIG_BTRFS_DEBUG so ti's not exposed to users as it may be
removed in the future agin.

Introduce fs_devices->offload_csum_mode, so that a btrfs developer can
change the behavior by writing to /sys/fs/btrfs/<uuid>/offload_csum. The
default is "auto" which is the same as the previous behavior. Or, you
can set "on" or "off" (or "y" or "n" whatever kstrtobool() accepts) to
always/never offload checksum.

More benchmark need to be collected with this knob to implement a proper
criteria to enable/disable checksum offloading.

Link: https://lore.kernel.org/linux-btrfs/20230731152223.4EFB.409509F4@e16-tech.com/
Link: https://lore.kernel.org/linux-btrfs/p3vo3g7pqn664mhmdhlotu5dzcna6vjtcoc2hb2lsgo2fwct7k@xzaxclba5tae/
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Naohiro Aota 2024-02-05 22:01:16 +09:00 committed by David Sterba
parent b2324e08b8
commit 2761ece893
3 changed files with 81 additions and 1 deletions

View File

@ -608,8 +608,20 @@ static void run_one_async_done(struct btrfs_work *work, bool do_free)
static bool should_async_write(struct btrfs_bio *bbio)
{
bool auto_csum_mode = true;
#ifdef CONFIG_BTRFS_DEBUG
struct btrfs_fs_devices *fs_devices = bbio->fs_info->fs_devices;
enum btrfs_offload_csum_mode csum_mode = READ_ONCE(fs_devices->offload_csum_mode);
if (csum_mode == BTRFS_OFFLOAD_CSUM_FORCE_OFF)
return false;
auto_csum_mode = (csum_mode == BTRFS_OFFLOAD_CSUM_AUTO);
#endif
/* Submit synchronously if the checksum implementation is fast. */
if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
if (auto_csum_mode && test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
return false;
/*

View File

@ -1307,6 +1307,47 @@ static ssize_t btrfs_bg_reclaim_threshold_store(struct kobject *kobj,
BTRFS_ATTR_RW(, bg_reclaim_threshold, btrfs_bg_reclaim_threshold_show,
btrfs_bg_reclaim_threshold_store);
#ifdef CONFIG_BTRFS_DEBUG
static ssize_t btrfs_offload_csum_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
{
struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
switch (READ_ONCE(fs_devices->offload_csum_mode)) {
case BTRFS_OFFLOAD_CSUM_AUTO:
return sysfs_emit(buf, "auto\n");
case BTRFS_OFFLOAD_CSUM_FORCE_ON:
return sysfs_emit(buf, "1\n");
case BTRFS_OFFLOAD_CSUM_FORCE_OFF:
return sysfs_emit(buf, "0\n");
default:
WARN_ON(1);
return -EINVAL;
}
}
static ssize_t btrfs_offload_csum_store(struct kobject *kobj,
struct kobj_attribute *a, const char *buf,
size_t len)
{
struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
int ret;
bool val;
ret = kstrtobool(buf, &val);
if (ret == 0)
WRITE_ONCE(fs_devices->offload_csum_mode,
val ? BTRFS_OFFLOAD_CSUM_FORCE_ON : BTRFS_OFFLOAD_CSUM_FORCE_OFF);
else if (ret == -EINVAL && sysfs_streq(buf, "auto"))
WRITE_ONCE(fs_devices->offload_csum_mode, BTRFS_OFFLOAD_CSUM_AUTO);
else
return -EINVAL;
return len;
}
BTRFS_ATTR_RW(, offload_csum, btrfs_offload_csum_show, btrfs_offload_csum_store);
#endif
/*
* Per-filesystem information and stats.
*
@ -1326,6 +1367,9 @@ static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(, bg_reclaim_threshold),
BTRFS_ATTR_PTR(, commit_stats),
BTRFS_ATTR_PTR(, temp_fsid),
#ifdef CONFIG_BTRFS_DEBUG
BTRFS_ATTR_PTR(, offload_csum),
#endif
NULL,
};

View File

@ -291,6 +291,25 @@ enum btrfs_read_policy {
BTRFS_NR_READ_POLICY,
};
#ifdef CONFIG_BTRFS_DEBUG
/*
* Checksum mode - offload it to workqueues or do it synchronously in
* btrfs_submit_chunk().
*/
enum btrfs_offload_csum_mode {
/*
* Choose offloading checksum or do it synchronously automatically.
* Do it synchronously if the checksum is fast, or offload to workqueues
* otherwise.
*/
BTRFS_OFFLOAD_CSUM_AUTO,
/* Always offload checksum to workqueues. */
BTRFS_OFFLOAD_CSUM_FORCE_ON,
/* Never offload checksum to workqueues. */
BTRFS_OFFLOAD_CSUM_FORCE_OFF,
};
#endif
struct btrfs_fs_devices {
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
@ -395,6 +414,11 @@ struct btrfs_fs_devices {
/* Policy used to read the mirrored stripes. */
enum btrfs_read_policy read_policy;
#ifdef CONFIG_BTRFS_DEBUG
/* Checksum mode - offload it or do it synchronously. */
enum btrfs_offload_csum_mode offload_csum_mode;
#endif
};
#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \