diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ccb30ca9ebb2..0af5ecbda9a3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2471,6 +2471,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, trans->block_rsv = &block_rsv; trans->bytes_reserved = block_rsv.size; + btrfs_record_snapshot_destroy(trans, dir); + ret = btrfs_unlink_subvol(trans, root, dir, dest->root_key.objectid, dentry->d_name.name, @@ -2522,8 +2524,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, out_end_trans: trans->block_rsv = NULL; trans->bytes_reserved = 0; - if (!err) - btrfs_record_snapshot_destroy(trans, dir); ret = btrfs_end_transaction(trans, root); if (ret && !err) err = ret; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 43c6781af654..9f6372dd0eab 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4771,6 +4771,42 @@ out_unlock: return err; } +/* + * Check if we must fallback to a transaction commit when logging an inode. + * This must be called after logging the inode and is used only in the context + * when fsyncing an inode requires the need to log some other inode - in which + * case we can't lock the i_mutex of each other inode we need to log as that + * can lead to deadlocks with concurrent fsync against other inodes (as we can + * log inodes up or down in the hierarchy) or rename operations for example. So + * we take the log_mutex of the inode after we have logged it and then check for + * its last_unlink_trans value - this is safe because any task setting + * last_unlink_trans must take the log_mutex and it must do this before it does + * the actual unlink operation, so if we do this check before a concurrent task + * sets last_unlink_trans it means we've logged a consistent version/state of + * all the inode items, otherwise we are not sure and must do a transaction + * commit (the concurrent task migth have only updated last_unlink_trans before + * we logged the inode or it might have also done the unlink). + */ +static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans, + struct inode *inode) +{ + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + bool ret = false; + + mutex_lock(&BTRFS_I(inode)->log_mutex); + if (BTRFS_I(inode)->last_unlink_trans > fs_info->last_trans_committed) { + /* + * Make sure any commits to the log are forced to be full + * commits. + */ + btrfs_set_log_full_commit(fs_info, trans); + ret = true; + } + mutex_unlock(&BTRFS_I(inode)->log_mutex); + + return ret; +} + /* * follow the dentry parent pointers up the chain and see if any * of the directories in it require a full commit before they can @@ -4784,7 +4820,6 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, u64 last_committed) { int ret = 0; - struct btrfs_root *root; struct dentry *old_parent = NULL; struct inode *orig_inode = inode; @@ -4816,14 +4851,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, BTRFS_I(inode)->logged_trans = trans->transid; smp_mb(); - if (BTRFS_I(inode)->last_unlink_trans > last_committed) { - root = BTRFS_I(inode)->root; - - /* - * make sure any commits to the log are forced - * to be full commits - */ - btrfs_set_log_full_commit(root->fs_info, trans); + if (btrfs_must_commit_transaction(trans, inode)) { ret = 1; break; } @@ -4982,6 +5010,9 @@ process_leaf: btrfs_release_path(path); ret = btrfs_log_inode(trans, root, di_inode, log_mode, 0, LLONG_MAX, ctx); + if (!ret && + btrfs_must_commit_transaction(trans, di_inode)) + ret = 1; iput(di_inode); if (ret) goto next_dir_inode; @@ -5096,6 +5127,9 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, ret = btrfs_log_inode(trans, root, dir_inode, LOG_INODE_ALL, 0, LLONG_MAX, ctx); + if (!ret && + btrfs_must_commit_transaction(trans, dir_inode)) + ret = 1; iput(dir_inode); if (ret) goto out; @@ -5447,6 +5481,9 @@ error: * They revolve around files there were unlinked from the directory, and * this function updates the parent directory so that a full commit is * properly done if it is fsync'd later after the unlinks are done. + * + * Must be called before the unlink operations (updates to the subvolume tree, + * inodes, etc) are done. */ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, struct inode *dir, struct inode *inode, @@ -5462,8 +5499,11 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, * into the file. When the file is logged we check it and * don't log the parents if the file is fully on disk. */ - if (S_ISREG(inode->i_mode)) + if (S_ISREG(inode->i_mode)) { + mutex_lock(&BTRFS_I(inode)->log_mutex); BTRFS_I(inode)->last_unlink_trans = trans->transid; + mutex_unlock(&BTRFS_I(inode)->log_mutex); + } /* * if this directory was already logged any new @@ -5494,7 +5534,9 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, return; record: + mutex_lock(&BTRFS_I(dir)->log_mutex); BTRFS_I(dir)->last_unlink_trans = trans->transid; + mutex_unlock(&BTRFS_I(dir)->log_mutex); } /* @@ -5505,11 +5547,16 @@ record: * corresponding to the deleted snapshot's root, which could lead to replaying * it after replaying the log tree of the parent directory (which would replay * the snapshot delete operation). + * + * Must be called before the actual snapshot destroy operation (updates to the + * parent root and tree of tree roots trees, etc) are done. */ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, struct inode *dir) { + mutex_lock(&BTRFS_I(dir)->log_mutex); BTRFS_I(dir)->last_unlink_trans = trans->transid; + mutex_unlock(&BTRFS_I(dir)->log_mutex); } /*