Various bug fixes for ext4 fast commit and inline data handling. Also

fix regression introduced as part of moving to the new mount API.
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEK2m5VNv+CHkogTfJ8vlZVpUNgaMFAmH7/AUACgkQ8vlZVpUN
 gaOsuQf/TFH8QNBSeEkT5ybnrS51KGTv88mdUVMcsmSMhmAFxiGJLFtMLFu9LG7b
 bJYCg+Q9Rieb1qqqtGNyLe4p3ewShSzBFu8p7hzKMfu0EEcrJwTYVywSX0oYhMMm
 9o+V6CPcGYVZtImihdsmDvgMRRkzoevHQFx+OLhkaq4Qd9ZEdohchYIhRFNXwd+w
 CJiL0TFAnrb4QfWgtq3HyY7aoQumf8YI15C+RTfykzCBhZRFRKXjVXPdIjfGe4O2
 Fpjr4gSsgYK0Er0LLJvESeFFVpFz+NV7q9W/Vj5ahaKJDpiVGzL/OPZsnafzHPPy
 CSa+iP3ZLcTb+KRTOZ1mgjvS34Cmyw==
 =DpdZ
 -----END PGP SIGNATURE-----

Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 fixes from Ted Ts'o:
 "Various bug fixes for ext4 fast commit and inline data handling.

  Also fix regression introduced as part of moving to the new mount API"

* tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  fs/ext4: fix comments mentioning i_mutex
  ext4: fix incorrect type issue during replay_del_range
  jbd2: fix kernel-doc descriptions for jbd2_journal_shrink_{scan,count}()
  ext4: fix potential NULL pointer dereference in ext4_fill_super()
  jbd2: refactor wait logic for transaction updates into a common function
  jbd2: cleanup unused functions declarations from jbd2.h
  ext4: fix error handling in ext4_fc_record_modified_inode()
  ext4: remove redundant max inline_size check in ext4_da_write_inline_data_begin()
  ext4: fix error handling in ext4_restore_inline_data()
  ext4: fast commit may miss file actions
  ext4: fast commit may not fallback for ineligible commit
  ext4: modify the logic of ext4_mb_new_blocks_simple
  ext4: prevent used blocks from being allocated during fast commit replay
This commit is contained in:
Linus Torvalds 2022-02-06 10:34:45 -08:00
commit d8ad2ce873
19 changed files with 196 additions and 160 deletions

View file

@ -139,7 +139,7 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
/* /*
* Inode operation get_posix_acl(). * Inode operation get_posix_acl().
* *
* inode->i_mutex: don't care * inode->i_rwsem: don't care
*/ */
struct posix_acl * struct posix_acl *
ext4_get_acl(struct inode *inode, int type, bool rcu) ext4_get_acl(struct inode *inode, int type, bool rcu)
@ -183,7 +183,7 @@ ext4_get_acl(struct inode *inode, int type, bool rcu)
/* /*
* Set the access or default ACL of an inode. * Set the access or default ACL of an inode.
* *
* inode->i_mutex: down unless called from ext4_new_inode * inode->i_rwsem: down unless called from ext4_new_inode
*/ */
static int static int
__ext4_set_acl(handle_t *handle, struct inode *inode, int type, __ext4_set_acl(handle_t *handle, struct inode *inode, int type,
@ -271,8 +271,8 @@ ext4_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
/* /*
* Initialize the ACLs of a new inode. Called from ext4_new_inode. * Initialize the ACLs of a new inode. Called from ext4_new_inode.
* *
* dir->i_mutex: down * dir->i_rwsem: down
* inode->i_mutex: up (access to inode is still exclusive) * inode->i_rwsem: up (access to inode is still exclusive)
*/ */
int int
ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)

View file

@ -1028,7 +1028,7 @@ struct ext4_inode_info {
/* /*
* Extended attributes can be read independently of the main file * Extended attributes can be read independently of the main file
* data. Taking i_mutex even when reading would cause contention * data. Taking i_rwsem even when reading would cause contention
* between readers of EAs and writers of regular file data, so * between readers of EAs and writers of regular file data, so
* instead we synchronize on xattr_sem when reading or changing * instead we synchronize on xattr_sem when reading or changing
* EAs. * EAs.
@ -1750,6 +1750,7 @@ struct ext4_sb_info {
spinlock_t s_fc_lock; spinlock_t s_fc_lock;
struct buffer_head *s_fc_bh; struct buffer_head *s_fc_bh;
struct ext4_fc_stats s_fc_stats; struct ext4_fc_stats s_fc_stats;
tid_t s_fc_ineligible_tid;
#ifdef CONFIG_EXT4_DEBUG #ifdef CONFIG_EXT4_DEBUG
int s_fc_debug_max_replay; int s_fc_debug_max_replay;
#endif #endif
@ -1795,10 +1796,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
enum { enum {
EXT4_MF_MNTDIR_SAMPLED, EXT4_MF_MNTDIR_SAMPLED,
EXT4_MF_FS_ABORTED, /* Fatal error detected */ EXT4_MF_FS_ABORTED, /* Fatal error detected */
EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */ EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */
EXT4_MF_FC_COMMITTING /* File system underoing a fast
* commit.
*/
}; };
static inline void ext4_set_mount_flag(struct super_block *sb, int bit) static inline void ext4_set_mount_flag(struct super_block *sb, int bit)
@ -2926,7 +2924,7 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
struct dentry *dentry); struct dentry *dentry);
void ext4_fc_track_create(handle_t *handle, struct dentry *dentry); void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
void ext4_fc_track_inode(handle_t *handle, struct inode *inode); void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
void ext4_fc_mark_ineligible(struct super_block *sb, int reason); void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle);
void ext4_fc_start_update(struct inode *inode); void ext4_fc_start_update(struct inode *inode);
void ext4_fc_stop_update(struct inode *inode); void ext4_fc_stop_update(struct inode *inode);
void ext4_fc_del(struct inode *inode); void ext4_fc_del(struct inode *inode);
@ -2935,6 +2933,9 @@ void ext4_fc_replay_cleanup(struct super_block *sb);
int ext4_fc_commit(journal_t *journal, tid_t commit_tid); int ext4_fc_commit(journal_t *journal, tid_t commit_tid);
int __init ext4_fc_init_dentry_cache(void); int __init ext4_fc_init_dentry_cache(void);
void ext4_fc_destroy_dentry_cache(void); void ext4_fc_destroy_dentry_cache(void);
int ext4_fc_record_regions(struct super_block *sb, int ino,
ext4_lblk_t lblk, ext4_fsblk_t pblk,
int len, int replay);
/* mballoc.c */ /* mballoc.c */
extern const struct seq_operations ext4_mb_seq_groups_ops; extern const struct seq_operations ext4_mb_seq_groups_ops;
@ -3407,7 +3408,7 @@ do { \
#define EXT4_FREECLUSTERS_WATERMARK 0 #define EXT4_FREECLUSTERS_WATERMARK 0
#endif #endif
/* Update i_disksize. Requires i_mutex to avoid races with truncate */ /* Update i_disksize. Requires i_rwsem to avoid races with truncate */
static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
{ {
WARN_ON_ONCE(S_ISREG(inode->i_mode) && WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
@ -3418,7 +3419,7 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
up_write(&EXT4_I(inode)->i_data_sem); up_write(&EXT4_I(inode)->i_data_sem);
} }
/* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */ /* Update i_size, i_disksize. Requires i_rwsem to avoid races with truncate */
static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize) static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
{ {
int changed = 0; int changed = 0;

View file

@ -491,7 +491,7 @@ static inline int ext4_free_data_revoke_credits(struct inode *inode, int blocks)
/* /*
* This function controls whether or not we should try to go down the * This function controls whether or not we should try to go down the
* dioread_nolock code paths, which makes it safe to avoid taking * dioread_nolock code paths, which makes it safe to avoid taking
* i_mutex for direct I/O reads. This only works for extent-based * i_rwsem for direct I/O reads. This only works for extent-based
* files, and it doesn't work if data journaling is enabled, since the * files, and it doesn't work if data journaling is enabled, since the
* dioread_nolock code uses b_private to pass information back to the * dioread_nolock code uses b_private to pass information back to the
* I/O completion handler, and this conflicts with the jbd's use of * I/O completion handler, and this conflicts with the jbd's use of

View file

@ -97,7 +97,7 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
* Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
* moment, get_block can be called only for blocks inside i_size since * moment, get_block can be called only for blocks inside i_size since
* page cache has been already dropped and writes are blocked by * page cache has been already dropped and writes are blocked by
* i_mutex. So we can safely drop the i_data_sem here. * i_rwsem. So we can safely drop the i_data_sem here.
*/ */
BUG_ON(EXT4_JOURNAL(inode) == NULL); BUG_ON(EXT4_JOURNAL(inode) == NULL);
ext4_discard_preallocations(inode, 0); ext4_discard_preallocations(inode, 0);
@ -4572,7 +4572,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT; flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
/* Wait all existing dio workers, newcomers will block on i_mutex */ /* Wait all existing dio workers, newcomers will block on i_rwsem */
inode_dio_wait(inode); inode_dio_wait(inode);
/* Preallocate the range including the unaligned edges */ /* Preallocate the range including the unaligned edges */
@ -4738,7 +4738,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
goto out; goto out;
} }
/* Wait all existing dio workers, newcomers will block on i_mutex */ /* Wait all existing dio workers, newcomers will block on i_rwsem */
inode_dio_wait(inode); inode_dio_wait(inode);
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
@ -5334,7 +5334,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
ret = PTR_ERR(handle); ret = PTR_ERR(handle);
goto out_mmap; goto out_mmap;
} }
ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
down_write(&EXT4_I(inode)->i_data_sem); down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode, 0); ext4_discard_preallocations(inode, 0);
@ -5474,7 +5474,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
ret = PTR_ERR(handle); ret = PTR_ERR(handle);
goto out_mmap; goto out_mmap;
} }
ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
/* Expand file to avoid data loss if there is error while shifting */ /* Expand file to avoid data loss if there is error while shifting */
inode->i_size += len; inode->i_size += len;
@ -5571,7 +5571,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
* stuff such as page-cache locking consistency, bh mapping consistency or * stuff such as page-cache locking consistency, bh mapping consistency or
* extent's data copying must be performed by caller. * extent's data copying must be performed by caller.
* Locking: * Locking:
* i_mutex is held for both inodes * i_rwsem is held for both inodes
* i_data_sem is locked for write for both inodes * i_data_sem is locked for write for both inodes
* Assumptions: * Assumptions:
* All pages from requested range are locked for both inodes * All pages from requested range are locked for both inodes
@ -6091,11 +6091,15 @@ int ext4_ext_clear_bb(struct inode *inode)
ext4_mb_mark_bb(inode->i_sb, ext4_mb_mark_bb(inode->i_sb,
path[j].p_block, 1, 0); path[j].p_block, 1, 0);
ext4_fc_record_regions(inode->i_sb, inode->i_ino,
0, path[j].p_block, 1, 1);
} }
ext4_ext_drop_refs(path); ext4_ext_drop_refs(path);
kfree(path); kfree(path);
} }
ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
ext4_fc_record_regions(inode->i_sb, inode->i_ino,
map.m_lblk, map.m_pblk, map.m_len, 1);
} }
cur = cur + map.m_len; cur = cur + map.m_len;
} }

View file

@ -300,18 +300,32 @@ void ext4_fc_del(struct inode *inode)
} }
/* /*
* Mark file system as fast commit ineligible. This means that next commit * Mark file system as fast commit ineligible, and record latest
* operation would result in a full jbd2 commit. * ineligible transaction tid. This means until the recorded
* transaction, commit operation would result in a full jbd2 commit.
*/ */
void ext4_fc_mark_ineligible(struct super_block *sb, int reason) void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
{ {
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
tid_t tid;
if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
return; return;
ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
if (handle && !IS_ERR(handle))
tid = handle->h_transaction->t_tid;
else {
read_lock(&sbi->s_journal->j_state_lock);
tid = sbi->s_journal->j_running_transaction ?
sbi->s_journal->j_running_transaction->t_tid : 0;
read_unlock(&sbi->s_journal->j_state_lock);
}
spin_lock(&sbi->s_fc_lock);
if (sbi->s_fc_ineligible_tid < tid)
sbi->s_fc_ineligible_tid = tid;
spin_unlock(&sbi->s_fc_lock);
WARN_ON(reason >= EXT4_FC_REASON_MAX); WARN_ON(reason >= EXT4_FC_REASON_MAX);
sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
} }
@ -361,7 +375,8 @@ static int ext4_fc_track_template(
spin_lock(&sbi->s_fc_lock); spin_lock(&sbi->s_fc_lock);
if (list_empty(&EXT4_I(inode)->i_fc_list)) if (list_empty(&EXT4_I(inode)->i_fc_list))
list_add_tail(&EXT4_I(inode)->i_fc_list, list_add_tail(&EXT4_I(inode)->i_fc_list,
(ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ? (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ?
&sbi->s_fc_q[FC_Q_STAGING] : &sbi->s_fc_q[FC_Q_STAGING] :
&sbi->s_fc_q[FC_Q_MAIN]); &sbi->s_fc_q[FC_Q_MAIN]);
spin_unlock(&sbi->s_fc_lock); spin_unlock(&sbi->s_fc_lock);
@ -387,7 +402,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
mutex_unlock(&ei->i_fc_lock); mutex_unlock(&ei->i_fc_lock);
node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
if (!node) { if (!node) {
ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
mutex_lock(&ei->i_fc_lock); mutex_lock(&ei->i_fc_lock);
return -ENOMEM; return -ENOMEM;
} }
@ -400,7 +415,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
if (!node->fcd_name.name) { if (!node->fcd_name.name) {
kmem_cache_free(ext4_fc_dentry_cachep, node); kmem_cache_free(ext4_fc_dentry_cachep, node);
ext4_fc_mark_ineligible(inode->i_sb, ext4_fc_mark_ineligible(inode->i_sb,
EXT4_FC_REASON_NOMEM); EXT4_FC_REASON_NOMEM, NULL);
mutex_lock(&ei->i_fc_lock); mutex_lock(&ei->i_fc_lock);
return -ENOMEM; return -ENOMEM;
} }
@ -414,7 +429,8 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
node->fcd_name.len = dentry->d_name.len; node->fcd_name.len = dentry->d_name.len;
spin_lock(&sbi->s_fc_lock); spin_lock(&sbi->s_fc_lock);
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
list_add_tail(&node->fcd_list, list_add_tail(&node->fcd_list,
&sbi->s_fc_dentry_q[FC_Q_STAGING]); &sbi->s_fc_dentry_q[FC_Q_STAGING]);
else else
@ -502,7 +518,7 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
if (ext4_should_journal_data(inode)) { if (ext4_should_journal_data(inode)) {
ext4_fc_mark_ineligible(inode->i_sb, ext4_fc_mark_ineligible(inode->i_sb,
EXT4_FC_REASON_INODE_JOURNAL_DATA); EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
return; return;
} }
@ -879,7 +895,6 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal)
int ret = 0; int ret = 0;
spin_lock(&sbi->s_fc_lock); spin_lock(&sbi->s_fc_lock);
ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);
list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING); ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
while (atomic_read(&ei->i_fc_updates)) { while (atomic_read(&ei->i_fc_updates)) {
@ -1179,7 +1194,7 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
* Fast commit cleanup routine. This is called after every fast commit and * Fast commit cleanup routine. This is called after every fast commit and
* full commit. full is true if we are called after a full commit. * full commit. full is true if we are called after a full commit.
*/ */
static void ext4_fc_cleanup(journal_t *journal, int full) static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
{ {
struct super_block *sb = journal->j_private; struct super_block *sb = journal->j_private;
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
@ -1197,7 +1212,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
list_del_init(&iter->i_fc_list); list_del_init(&iter->i_fc_list);
ext4_clear_inode_state(&iter->vfs_inode, ext4_clear_inode_state(&iter->vfs_inode,
EXT4_STATE_FC_COMMITTING); EXT4_STATE_FC_COMMITTING);
ext4_fc_reset_inode(&iter->vfs_inode); if (iter->i_sync_tid <= tid)
ext4_fc_reset_inode(&iter->vfs_inode);
/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */ /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
smp_mb(); smp_mb();
#if (BITS_PER_LONG < 64) #if (BITS_PER_LONG < 64)
@ -1226,8 +1242,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
&sbi->s_fc_q[FC_Q_MAIN]); &sbi->s_fc_q[FC_Q_MAIN]);
ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); if (tid >= sbi->s_fc_ineligible_tid) {
ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); sbi->s_fc_ineligible_tid = 0;
ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
}
if (full) if (full)
sbi->s_fc_bytes = 0; sbi->s_fc_bytes = 0;
@ -1392,14 +1410,15 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
if (state->fc_modified_inodes[i] == ino) if (state->fc_modified_inodes[i] == ino)
return 0; return 0;
if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) { if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
state->fc_modified_inodes_size +=
EXT4_FC_REPLAY_REALLOC_INCREMENT;
state->fc_modified_inodes = krealloc( state->fc_modified_inodes = krealloc(
state->fc_modified_inodes, sizeof(int) * state->fc_modified_inodes,
state->fc_modified_inodes_size, sizeof(int) * (state->fc_modified_inodes_size +
GFP_KERNEL); EXT4_FC_REPLAY_REALLOC_INCREMENT),
GFP_KERNEL);
if (!state->fc_modified_inodes) if (!state->fc_modified_inodes)
return -ENOMEM; return -ENOMEM;
state->fc_modified_inodes_size +=
EXT4_FC_REPLAY_REALLOC_INCREMENT;
} }
state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino; state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
return 0; return 0;
@ -1431,7 +1450,9 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
} }
inode = NULL; inode = NULL;
ext4_fc_record_modified_inode(sb, ino); ret = ext4_fc_record_modified_inode(sb, ino);
if (ret)
goto out;
raw_fc_inode = (struct ext4_inode *) raw_fc_inode = (struct ext4_inode *)
(val + offsetof(struct ext4_fc_inode, fc_raw_inode)); (val + offsetof(struct ext4_fc_inode, fc_raw_inode));
@ -1563,16 +1584,23 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
} }
/* /*
* Record physical disk regions which are in use as per fast commit area. Our * Record physical disk regions which are in use as per fast commit area,
* simple replay phase allocator excludes these regions from allocation. * and used by inodes during replay phase. Our simple replay phase
* allocator excludes these regions from allocation.
*/ */
static int ext4_fc_record_regions(struct super_block *sb, int ino, int ext4_fc_record_regions(struct super_block *sb, int ino,
ext4_lblk_t lblk, ext4_fsblk_t pblk, int len) ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay)
{ {
struct ext4_fc_replay_state *state; struct ext4_fc_replay_state *state;
struct ext4_fc_alloc_region *region; struct ext4_fc_alloc_region *region;
state = &EXT4_SB(sb)->s_fc_replay_state; state = &EXT4_SB(sb)->s_fc_replay_state;
/*
* during replay phase, the fc_regions_valid may not same as
* fc_regions_used, update it when do new additions.
*/
if (replay && state->fc_regions_used != state->fc_regions_valid)
state->fc_regions_used = state->fc_regions_valid;
if (state->fc_regions_used == state->fc_regions_size) { if (state->fc_regions_used == state->fc_regions_size) {
state->fc_regions_size += state->fc_regions_size +=
EXT4_FC_REPLAY_REALLOC_INCREMENT; EXT4_FC_REPLAY_REALLOC_INCREMENT;
@ -1590,6 +1618,9 @@ static int ext4_fc_record_regions(struct super_block *sb, int ino,
region->pblk = pblk; region->pblk = pblk;
region->len = len; region->len = len;
if (replay)
state->fc_regions_valid++;
return 0; return 0;
} }
@ -1621,6 +1652,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
} }
ret = ext4_fc_record_modified_inode(sb, inode->i_ino); ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
if (ret)
goto out;
start = le32_to_cpu(ex->ee_block); start = le32_to_cpu(ex->ee_block);
start_pblk = ext4_ext_pblock(ex); start_pblk = ext4_ext_pblock(ex);
@ -1638,18 +1671,14 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
map.m_pblk = 0; map.m_pblk = 0;
ret = ext4_map_blocks(NULL, inode, &map, 0); ret = ext4_map_blocks(NULL, inode, &map, 0);
if (ret < 0) { if (ret < 0)
iput(inode); goto out;
return 0;
}
if (ret == 0) { if (ret == 0) {
/* Range is not mapped */ /* Range is not mapped */
path = ext4_find_extent(inode, cur, NULL, 0); path = ext4_find_extent(inode, cur, NULL, 0);
if (IS_ERR(path)) { if (IS_ERR(path))
iput(inode); goto out;
return 0;
}
memset(&newex, 0, sizeof(newex)); memset(&newex, 0, sizeof(newex));
newex.ee_block = cpu_to_le32(cur); newex.ee_block = cpu_to_le32(cur);
ext4_ext_store_pblock( ext4_ext_store_pblock(
@ -1663,10 +1692,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
up_write((&EXT4_I(inode)->i_data_sem)); up_write((&EXT4_I(inode)->i_data_sem));
ext4_ext_drop_refs(path); ext4_ext_drop_refs(path);
kfree(path); kfree(path);
if (ret) { if (ret)
iput(inode); goto out;
return 0;
}
goto next; goto next;
} }
@ -1679,10 +1706,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
ext4_ext_is_unwritten(ex), ext4_ext_is_unwritten(ex),
start_pblk + cur - start); start_pblk + cur - start);
if (ret) { if (ret)
iput(inode); goto out;
return 0;
}
/* /*
* Mark the old blocks as free since they aren't used * Mark the old blocks as free since they aren't used
* anymore. We maintain an array of all the modified * anymore. We maintain an array of all the modified
@ -1702,10 +1727,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
ext4_ext_is_unwritten(ex), map.m_pblk); ext4_ext_is_unwritten(ex), map.m_pblk);
ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
ext4_ext_is_unwritten(ex), map.m_pblk); ext4_ext_is_unwritten(ex), map.m_pblk);
if (ret) { if (ret)
iput(inode); goto out;
return 0;
}
/* /*
* We may have split the extent tree while toggling the state. * We may have split the extent tree while toggling the state.
* Try to shrink the extent tree now. * Try to shrink the extent tree now.
@ -1717,6 +1740,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
} }
ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
sb->s_blocksize_bits); sb->s_blocksize_bits);
out:
iput(inode); iput(inode);
return 0; return 0;
} }
@ -1746,6 +1770,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
} }
ret = ext4_fc_record_modified_inode(sb, inode->i_ino); ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
if (ret)
goto out;
jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n", jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n",
inode->i_ino, le32_to_cpu(lrange.fc_lblk), inode->i_ino, le32_to_cpu(lrange.fc_lblk),
@ -1755,10 +1781,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
map.m_len = remaining; map.m_len = remaining;
ret = ext4_map_blocks(NULL, inode, &map, 0); ret = ext4_map_blocks(NULL, inode, &map, 0);
if (ret < 0) { if (ret < 0)
iput(inode); goto out;
return 0;
}
if (ret > 0) { if (ret > 0) {
remaining -= ret; remaining -= ret;
cur += ret; cur += ret;
@ -1770,18 +1794,17 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
} }
down_write(&EXT4_I(inode)->i_data_sem); down_write(&EXT4_I(inode)->i_data_sem);
ret = ext4_ext_remove_space(inode, lrange.fc_lblk, ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk),
lrange.fc_lblk + lrange.fc_len - 1); le32_to_cpu(lrange.fc_lblk) +
le32_to_cpu(lrange.fc_len) - 1);
up_write(&EXT4_I(inode)->i_data_sem); up_write(&EXT4_I(inode)->i_data_sem);
if (ret) { if (ret)
iput(inode); goto out;
return 0;
}
ext4_ext_replay_shrink_inode(inode, ext4_ext_replay_shrink_inode(inode,
i_size_read(inode) >> sb->s_blocksize_bits); i_size_read(inode) >> sb->s_blocksize_bits);
ext4_mark_inode_dirty(NULL, inode); ext4_mark_inode_dirty(NULL, inode);
out:
iput(inode); iput(inode);
return 0; return 0;
} }
@ -1937,7 +1960,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
ret = ext4_fc_record_regions(sb, ret = ext4_fc_record_regions(sb,
le32_to_cpu(ext.fc_ino), le32_to_cpu(ext.fc_ino),
le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
ext4_ext_get_actual_len(ex)); ext4_ext_get_actual_len(ex), 0);
if (ret < 0) if (ret < 0)
break; break;
ret = JBD2_FC_REPLAY_CONTINUE; ret = JBD2_FC_REPLAY_CONTINUE;

View file

@ -696,7 +696,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode,
* Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
* moment, get_block can be called only for blocks inside i_size since * moment, get_block can be called only for blocks inside i_size since
* page cache has been already dropped and writes are blocked by * page cache has been already dropped and writes are blocked by
* i_mutex. So we can safely drop the i_data_sem here. * i_rwsem. So we can safely drop the i_data_sem here.
*/ */
BUG_ON(EXT4_JOURNAL(inode) == NULL); BUG_ON(EXT4_JOURNAL(inode) == NULL);
ext4_discard_preallocations(inode, 0); ext4_discard_preallocations(inode, 0);

View file

@ -911,7 +911,7 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,
struct page **pagep, struct page **pagep,
void **fsdata) void **fsdata)
{ {
int ret, inline_size; int ret;
handle_t *handle; handle_t *handle;
struct page *page; struct page *page;
struct ext4_iloc iloc; struct ext4_iloc iloc;
@ -928,14 +928,9 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,
goto out; goto out;
} }
inline_size = ext4_get_max_inline_size(inode); ret = ext4_prepare_inline_data(handle, inode, pos + len);
if (ret && ret != -ENOSPC)
ret = -ENOSPC; goto out_journal;
if (inline_size >= pos + len) {
ret = ext4_prepare_inline_data(handle, inode, pos + len);
if (ret && ret != -ENOSPC)
goto out_journal;
}
/* /*
* We cannot recurse into the filesystem as the transaction * We cannot recurse into the filesystem as the transaction
@ -1133,7 +1128,15 @@ static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
struct ext4_iloc *iloc, struct ext4_iloc *iloc,
void *buf, int inline_size) void *buf, int inline_size)
{ {
ext4_create_inline_data(handle, inode, inline_size); int ret;
ret = ext4_create_inline_data(handle, inode, inline_size);
if (ret) {
ext4_msg(inode->i_sb, KERN_EMERG,
"error restoring inline_data for inode -- potential data loss! (inode %lu, error %d)",
inode->i_ino, ret);
return;
}
ext4_write_inline_data(inode, iloc, buf, 0, inline_size); ext4_write_inline_data(inode, iloc, buf, 0, inline_size);
ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
} }

View file

@ -338,7 +338,7 @@ void ext4_evict_inode(struct inode *inode)
return; return;
no_delete: no_delete:
if (!list_empty(&EXT4_I(inode)->i_fc_list)) if (!list_empty(&EXT4_I(inode)->i_fc_list))
ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
} }
@ -1224,7 +1224,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
/* /*
* __block_write_begin may have instantiated a few blocks * __block_write_begin may have instantiated a few blocks
* outside i_size. Trim these off again. Don't need * outside i_size. Trim these off again. Don't need
* i_size_read because we hold i_mutex. * i_size_read because we hold i_rwsem.
* *
* Add inode to orphan list in case we crash before * Add inode to orphan list in case we crash before
* truncate finishes * truncate finishes
@ -3979,7 +3979,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
} }
/* Wait all existing dio workers, newcomers will block on i_mutex */ /* Wait all existing dio workers, newcomers will block on i_rwsem */
inode_dio_wait(inode); inode_dio_wait(inode);
/* /*
@ -4129,7 +4129,7 @@ int ext4_truncate(struct inode *inode)
/* /*
* There is a possibility that we're either freeing the inode * There is a possibility that we're either freeing the inode
* or it's a completely new inode. In those cases we might not * or it's a completely new inode. In those cases we might not
* have i_mutex locked because it's not necessary. * have i_rwsem locked because it's not necessary.
*/ */
if (!(inode->i_state & (I_NEW|I_FREEING))) if (!(inode->i_state & (I_NEW|I_FREEING)))
WARN_ON(!inode_is_locked(inode)); WARN_ON(!inode_is_locked(inode));
@ -5271,7 +5271,7 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
* transaction are already on disk (truncate waits for pages under * transaction are already on disk (truncate waits for pages under
* writeback). * writeback).
* *
* Called with inode->i_mutex down. * Called with inode->i_rwsem down.
*/ */
int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
struct iattr *attr) struct iattr *attr)
@ -5983,7 +5983,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
return PTR_ERR(handle); return PTR_ERR(handle);
ext4_fc_mark_ineligible(inode->i_sb, ext4_fc_mark_ineligible(inode->i_sb,
EXT4_FC_REASON_JOURNAL_FLAG_CHANGE); EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, handle);
err = ext4_mark_inode_dirty(handle, inode); err = ext4_mark_inode_dirty(handle, inode);
ext4_handle_sync(handle); ext4_handle_sync(handle);
ext4_journal_stop(handle); ext4_journal_stop(handle);

View file

@ -411,7 +411,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
err = -EINVAL; err = -EINVAL;
goto err_out; goto err_out;
} }
ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT); ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT, handle);
/* Protect extent tree against block allocations via delalloc */ /* Protect extent tree against block allocations via delalloc */
ext4_double_down_write_data_sem(inode, inode_bl); ext4_double_down_write_data_sem(inode, inode_bl);
@ -1373,7 +1373,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
err = ext4_resize_fs(sb, n_blocks_count); err = ext4_resize_fs(sb, n_blocks_count);
if (EXT4_SB(sb)->s_journal) { if (EXT4_SB(sb)->s_journal) {
ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE); ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE, NULL);
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0); err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);

View file

@ -5753,7 +5753,8 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
struct super_block *sb = ar->inode->i_sb; struct super_block *sb = ar->inode->i_sb;
ext4_group_t group; ext4_group_t group;
ext4_grpblk_t blkoff; ext4_grpblk_t blkoff;
int i = sb->s_blocksize; ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
ext4_grpblk_t i = 0;
ext4_fsblk_t goal, block; ext4_fsblk_t goal, block;
struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct ext4_super_block *es = EXT4_SB(sb)->s_es;
@ -5775,19 +5776,26 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
ext4_get_group_no_and_offset(sb, ext4_get_group_no_and_offset(sb,
max(ext4_group_first_block_no(sb, group), goal), max(ext4_group_first_block_no(sb, group), goal),
NULL, &blkoff); NULL, &blkoff);
i = mb_find_next_zero_bit(bitmap_bh->b_data, sb->s_blocksize, while (1) {
i = mb_find_next_zero_bit(bitmap_bh->b_data, max,
blkoff); blkoff);
if (i >= max)
break;
if (ext4_fc_replay_check_excluded(sb,
ext4_group_first_block_no(sb, group) + i)) {
blkoff = i + 1;
} else
break;
}
brelse(bitmap_bh); brelse(bitmap_bh);
if (i >= sb->s_blocksize) if (i < max)
continue; break;
if (ext4_fc_replay_check_excluded(sb,
ext4_group_first_block_no(sb, group) + i))
continue;
break;
} }
if (group >= ext4_get_groups_count(sb) && i >= sb->s_blocksize) if (group >= ext4_get_groups_count(sb) || i >= max) {
*errp = -ENOSPC;
return 0; return 0;
}
block = ext4_group_first_block_no(sb, group) + i; block = ext4_group_first_block_no(sb, group) + i;
ext4_mb_mark_bb(sb, block, 1, 1); ext4_mb_mark_bb(sb, block, 1, 1);

View file

@ -485,7 +485,7 @@ int ext4_ext_migrate(struct inode *inode)
* when we add extents we extent the journal * when we add extents we extent the journal
*/ */
/* /*
* Even though we take i_mutex we can still cause block * Even though we take i_rwsem we can still cause block
* allocation via mmap write to holes. If we have allocated * allocation via mmap write to holes. If we have allocated
* new blocks we fail migrate. New block allocation will * new blocks we fail migrate. New block allocation will
* clear EXT4_STATE_EXT_MIGRATE flag. The flag is updated * clear EXT4_STATE_EXT_MIGRATE flag. The flag is updated

View file

@ -3889,7 +3889,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
* dirents in directories. * dirents in directories.
*/ */
ext4_fc_mark_ineligible(old.inode->i_sb, ext4_fc_mark_ineligible(old.inode->i_sb,
EXT4_FC_REASON_RENAME_DIR); EXT4_FC_REASON_RENAME_DIR, handle);
} else { } else {
if (new.inode) if (new.inode)
ext4_fc_track_unlink(handle, new.dentry); ext4_fc_track_unlink(handle, new.dentry);
@ -4049,7 +4049,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
if (unlikely(retval)) if (unlikely(retval))
goto end_rename; goto end_rename;
ext4_fc_mark_ineligible(new.inode->i_sb, ext4_fc_mark_ineligible(new.inode->i_sb,
EXT4_FC_REASON_CROSS_RENAME); EXT4_FC_REASON_CROSS_RENAME, handle);
if (old.dir_bh) { if (old.dir_bh) {
retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
if (retval) if (retval)

View file

@ -93,7 +93,7 @@ static int ext4_orphan_file_add(handle_t *handle, struct inode *inode)
* At filesystem recovery time, we walk this list deleting unlinked * At filesystem recovery time, we walk this list deleting unlinked
* inodes and truncating linked inodes in ext4_orphan_cleanup(). * inodes and truncating linked inodes in ext4_orphan_cleanup().
* *
* Orphan list manipulation functions must be called under i_mutex unless * Orphan list manipulation functions must be called under i_rwsem unless
* we are just creating the inode or deleting it. * we are just creating the inode or deleting it.
*/ */
int ext4_orphan_add(handle_t *handle, struct inode *inode) int ext4_orphan_add(handle_t *handle, struct inode *inode)
@ -119,7 +119,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
/* /*
* Orphan handling is only valid for files with data blocks * Orphan handling is only valid for files with data blocks
* being truncated, or files being unlinked. Note that we either * being truncated, or files being unlinked. Note that we either
* hold i_mutex, or the inode can not be referenced from outside, * hold i_rwsem, or the inode can not be referenced from outside,
* so i_nlink should not be bumped due to race * so i_nlink should not be bumped due to race
*/ */
ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||

View file

@ -5082,7 +5082,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]); INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
sbi->s_fc_bytes = 0; sbi->s_fc_bytes = 0;
ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); sbi->s_fc_ineligible_tid = 0;
spin_lock_init(&sbi->s_fc_lock); spin_lock_init(&sbi->s_fc_lock);
memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats)); memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
sbi->s_fc_replay_state.fc_regions = NULL; sbi->s_fc_replay_state.fc_regions = NULL;
@ -5540,7 +5540,7 @@ static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
sbi = ext4_alloc_sbi(sb); sbi = ext4_alloc_sbi(sb);
if (!sbi) if (!sbi)
ret = -ENOMEM; return -ENOMEM;
fc->s_fs_info = sbi; fc->s_fs_info = sbi;

View file

@ -2408,7 +2408,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
if (IS_SYNC(inode)) if (IS_SYNC(inode))
ext4_handle_sync(handle); ext4_handle_sync(handle);
} }
ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR); ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
cleanup: cleanup:
brelse(is.iloc.bh); brelse(is.iloc.bh);
@ -2486,7 +2486,7 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
if (error == 0) if (error == 0)
error = error2; error = error2;
} }
ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR); ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, NULL);
return error; return error;
} }
@ -2920,7 +2920,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
error); error);
goto cleanup; goto cleanup;
} }
ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR); ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
} }
error = 0; error = 0;
cleanup: cleanup:

View file

@ -484,22 +484,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start, stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
stats.run.rs_locked); stats.run.rs_locked);
spin_lock(&commit_transaction->t_handle_lock); // waits for any t_updates to finish
while (atomic_read(&commit_transaction->t_updates)) { jbd2_journal_wait_updates(journal);
DEFINE_WAIT(wait);
prepare_to_wait(&journal->j_wait_updates, &wait,
TASK_UNINTERRUPTIBLE);
if (atomic_read(&commit_transaction->t_updates)) {
spin_unlock(&commit_transaction->t_handle_lock);
write_unlock(&journal->j_state_lock);
schedule();
write_lock(&journal->j_state_lock);
spin_lock(&commit_transaction->t_handle_lock);
}
finish_wait(&journal->j_wait_updates, &wait);
}
spin_unlock(&commit_transaction->t_handle_lock);
commit_transaction->t_state = T_SWITCH; commit_transaction->t_state = T_SWITCH;
write_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
@ -817,7 +804,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
commit_transaction->t_state = T_COMMIT_DFLUSH; commit_transaction->t_state = T_COMMIT_DFLUSH;
write_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
/* /*
* If the journal is not located on the file system device, * If the journal is not located on the file system device,
* then we must flush the file system device before we issue * then we must flush the file system device before we issue
* the commit record * the commit record
@ -1170,7 +1157,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (journal->j_commit_callback) if (journal->j_commit_callback)
journal->j_commit_callback(journal, commit_transaction); journal->j_commit_callback(journal, commit_transaction);
if (journal->j_fc_cleanup_callback) if (journal->j_fc_cleanup_callback)
journal->j_fc_cleanup_callback(journal, 1); journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid);
trace_jbd2_end_commit(journal, commit_transaction); trace_jbd2_end_commit(journal, commit_transaction);
jbd_debug(1, "JBD2: commit %d complete, head %d\n", jbd_debug(1, "JBD2: commit %d complete, head %d\n",

View file

@ -771,7 +771,7 @@ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
{ {
jbd2_journal_unlock_updates(journal); jbd2_journal_unlock_updates(journal);
if (journal->j_fc_cleanup_callback) if (journal->j_fc_cleanup_callback)
journal->j_fc_cleanup_callback(journal, 0); journal->j_fc_cleanup_callback(journal, 0, tid);
write_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING; journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
if (fallback) if (fallback)
@ -1287,6 +1287,8 @@ static int jbd2_min_tag_size(void)
/** /**
* jbd2_journal_shrink_scan() * jbd2_journal_shrink_scan()
* @shrink: shrinker to work on
* @sc: reclaim request to process
* *
* Scan the checkpointed buffer on the checkpoint list and release the * Scan the checkpointed buffer on the checkpoint list and release the
* journal_head. * journal_head.
@ -1312,6 +1314,8 @@ static unsigned long jbd2_journal_shrink_scan(struct shrinker *shrink,
/** /**
* jbd2_journal_shrink_count() * jbd2_journal_shrink_count()
* @shrink: shrinker to work on
* @sc: reclaim request to process
* *
* Count the number of checkpoint buffers on the checkpoint list. * Count the number of checkpoint buffers on the checkpoint list.
*/ */

View file

@ -449,7 +449,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
} }
/* OK, account for the buffers that this operation expects to /* OK, account for the buffers that this operation expects to
* use and add the handle to the running transaction. * use and add the handle to the running transaction.
*/ */
update_t_max_wait(transaction, ts); update_t_max_wait(transaction, ts);
handle->h_transaction = transaction; handle->h_transaction = transaction;
@ -836,6 +836,35 @@ int jbd2_journal_restart(handle_t *handle, int nblocks)
} }
EXPORT_SYMBOL(jbd2_journal_restart); EXPORT_SYMBOL(jbd2_journal_restart);
/*
* Waits for any outstanding t_updates to finish.
* This is called with write j_state_lock held.
*/
void jbd2_journal_wait_updates(journal_t *journal)
{
transaction_t *commit_transaction = journal->j_running_transaction;
if (!commit_transaction)
return;
spin_lock(&commit_transaction->t_handle_lock);
while (atomic_read(&commit_transaction->t_updates)) {
DEFINE_WAIT(wait);
prepare_to_wait(&journal->j_wait_updates, &wait,
TASK_UNINTERRUPTIBLE);
if (atomic_read(&commit_transaction->t_updates)) {
spin_unlock(&commit_transaction->t_handle_lock);
write_unlock(&journal->j_state_lock);
schedule();
write_lock(&journal->j_state_lock);
spin_lock(&commit_transaction->t_handle_lock);
}
finish_wait(&journal->j_wait_updates, &wait);
}
spin_unlock(&commit_transaction->t_handle_lock);
}
/** /**
* jbd2_journal_lock_updates () - establish a transaction barrier. * jbd2_journal_lock_updates () - establish a transaction barrier.
* @journal: Journal to establish a barrier on. * @journal: Journal to establish a barrier on.
@ -863,27 +892,9 @@ void jbd2_journal_lock_updates(journal_t *journal)
write_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
} }
/* Wait until there are no running updates */ /* Wait until there are no running t_updates */
while (1) { jbd2_journal_wait_updates(journal);
transaction_t *transaction = journal->j_running_transaction;
if (!transaction)
break;
spin_lock(&transaction->t_handle_lock);
prepare_to_wait(&journal->j_wait_updates, &wait,
TASK_UNINTERRUPTIBLE);
if (!atomic_read(&transaction->t_updates)) {
spin_unlock(&transaction->t_handle_lock);
finish_wait(&journal->j_wait_updates, &wait);
break;
}
spin_unlock(&transaction->t_handle_lock);
write_unlock(&journal->j_state_lock);
schedule();
finish_wait(&journal->j_wait_updates, &wait);
write_lock(&journal->j_state_lock);
}
write_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
/* /*

View file

@ -594,7 +594,7 @@ struct transaction_s
*/ */
unsigned long t_log_start; unsigned long t_log_start;
/* /*
* Number of buffers on the t_buffers list [j_list_lock, no locks * Number of buffers on the t_buffers list [j_list_lock, no locks
* needed for jbd2 thread] * needed for jbd2 thread]
*/ */
@ -1295,7 +1295,7 @@ struct journal_s
* Clean-up after fast commit or full commit. JBD2 calls this function * Clean-up after fast commit or full commit. JBD2 calls this function
* after every commit operation. * after every commit operation.
*/ */
void (*j_fc_cleanup_callback)(struct journal_s *journal, int); void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid);
/** /**
* @j_fc_replay_callback: * @j_fc_replay_callback:
@ -1419,9 +1419,7 @@ extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *);
extern bool __jbd2_journal_refile_buffer(struct journal_head *); extern bool __jbd2_journal_refile_buffer(struct journal_head *);
extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *); extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *);
extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int);
extern void __journal_free_buffer(struct journal_head *bh);
extern void jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); extern void jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int);
extern void __journal_clean_data_list(transaction_t *transaction);
static inline void jbd2_file_log_bh(struct list_head *head, struct buffer_head *bh) static inline void jbd2_file_log_bh(struct list_head *head, struct buffer_head *bh)
{ {
list_add_tail(&bh->b_assoc_buffers, head); list_add_tail(&bh->b_assoc_buffers, head);
@ -1486,9 +1484,6 @@ extern int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
struct buffer_head **bh_out, struct buffer_head **bh_out,
sector_t blocknr); sector_t blocknr);
/* Transaction locking */
extern void __wait_on_journal (journal_t *);
/* Transaction cache support */ /* Transaction cache support */
extern void jbd2_journal_destroy_transaction_cache(void); extern void jbd2_journal_destroy_transaction_cache(void);
extern int __init jbd2_journal_init_transaction_cache(void); extern int __init jbd2_journal_init_transaction_cache(void);
@ -1543,6 +1538,8 @@ extern int jbd2_journal_flush(journal_t *journal, unsigned int flags);
extern void jbd2_journal_lock_updates (journal_t *); extern void jbd2_journal_lock_updates (journal_t *);
extern void jbd2_journal_unlock_updates (journal_t *); extern void jbd2_journal_unlock_updates (journal_t *);
void jbd2_journal_wait_updates(journal_t *);
extern journal_t * jbd2_journal_init_dev(struct block_device *bdev, extern journal_t * jbd2_journal_init_dev(struct block_device *bdev,
struct block_device *fs_dev, struct block_device *fs_dev,
unsigned long long start, int len, int bsize); unsigned long long start, int len, int bsize);
@ -1774,8 +1771,6 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal)
#define BJ_Reserved 4 /* Buffer is reserved for access by journal */ #define BJ_Reserved 4 /* Buffer is reserved for access by journal */
#define BJ_Types 5 #define BJ_Types 5
extern int jbd_blocks_per_page(struct inode *inode);
/* JBD uses a CRC32 checksum */ /* JBD uses a CRC32 checksum */
#define JBD_MAX_CHECKSUM_SIZE 4 #define JBD_MAX_CHECKSUM_SIZE 4