Many ext4 and jbd2 cleanups and bug fixes for v6.6-rc1.

* Cleanups in the ext4 remount code when going to and from read-only
   * Cleanups in ext4's multiblock allocator
   * Cleanups in the jbd2 setup/mounting code paths
   * Performance improvements when appending to a delayed allocation file
   * Miscenallenous syzbot and other bug fixes
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEK2m5VNv+CHkogTfJ8vlZVpUNgaMFAmTwqUMACgkQ8vlZVpUN
 gaMqgwf6Aui6MlrtNJx6CrJt4dxLANQ8G6bsJ2Zr+6QNS1X/GAUrCCyLWWom1dfb
 OJ/n4/JUCNc9v5yLCTqHOE5ZFTdQItOBJUKXbJYff8EdnR+zCUULpj6bPbEs5BKp
 U7CiiZ9TIi9S2TWezvIJKIa2VxgPej7CH/HOt8ISh/Msq8nHvcEEJIyOEvVk9odQ
 LEkiQCsikWaljB7qEOIYo+xgFffMZfttc4zuTkdr/h1I6OWhvQYmlwSnTuAiE7BS
 BVf3ebD2Dg8TChUMXOsk2d743iZNWf/+yTfbXVu93/uEM9vgF0+HO6EerTK8RMeM
 yxhshg9z7ccuFjdY/2NYDXe6pEuDKw==
 =cMIX
 -----END PGP SIGNATURE-----

Merge tag 'ext4_for_linus-6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Many ext4 and jbd2 cleanups and bug fixes:

   - Cleanups in the ext4 remount code when going to and from read-only

   - Cleanups in ext4's multiblock allocator

   - Cleanups in the jbd2 setup/mounting code paths

   - Performance improvements when appending to a delayed allocation file

   - Miscellaneous syzbot and other bug fixes"

* tag 'ext4_for_linus-6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (60 commits)
  ext4: fix slab-use-after-free in ext4_es_insert_extent()
  libfs: remove redundant checks of s_encoding
  ext4: remove redundant checks of s_encoding
  ext4: reject casefold inode flag without casefold feature
  ext4: use LIST_HEAD() to initialize the list_head in mballoc.c
  ext4: do not mark inode dirty every time when appending using delalloc
  ext4: rename s_error_work to s_sb_upd_work
  ext4: add periodic superblock update check
  ext4: drop dio overwrite only flag and associated warning
  ext4: add correct group descriptors and reserved GDT blocks to system zone
  ext4: remove unused function declaration
  ext4: mballoc: avoid garbage value from err
  ext4: use sbi instead of EXT4_SB(sb) in ext4_mb_new_blocks_simple()
  ext4: change the type of blocksize in ext4_mb_init_cache()
  ext4: fix unttached inode after power cut with orphan file feature enabled
  jbd2: correct the end of the journal recovery scan range
  ext4: ext4_get_{dev}_journal return proper error value
  ext4: cleanup ext4_get_dev_journal() and ext4_get_journal()
  jbd2: jbd2_journal_init_{dev,inode} return proper error return value
  jbd2: drop useless error tag in jbd2_journal_wipe()
  ...
This commit is contained in:
Linus Torvalds 2023-08-31 15:18:15 -07:00
commit 3ef96fcfd5
26 changed files with 718 additions and 679 deletions

View File

@ -913,11 +913,11 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
}
/*
* This function returns the number of file system metadata clusters at
* This function returns the number of file system metadata blocks at
* the beginning of a block group, including the reserved gdt blocks.
*/
static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
ext4_group_t block_group)
unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
ext4_group_t block_group)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned num;
@ -935,8 +935,15 @@ static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
} else { /* For META_BG_BLOCK_GROUPS */
num += ext4_bg_num_gdb_meta(sb, block_group);
}
return EXT4_NUM_B2C(sbi, num);
return num;
}
static unsigned int ext4_num_base_meta_clusters(struct super_block *sb,
ext4_group_t block_group)
{
return EXT4_NUM_B2C(EXT4_SB(sb), ext4_num_base_meta_blocks(sb, block_group));
}
/**
* ext4_inode_to_goal_block - return a hint for block allocation
* @inode: inode for block allocation

View File

@ -215,7 +215,6 @@ int ext4_setup_system_zone(struct super_block *sb)
struct ext4_system_blocks *system_blks;
struct ext4_group_desc *gdp;
ext4_group_t i;
int flex_size = ext4_flex_bg_size(sbi);
int ret;
system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL);
@ -223,12 +222,13 @@ int ext4_setup_system_zone(struct super_block *sb)
return -ENOMEM;
for (i=0; i < ngroups; i++) {
unsigned int meta_blks = ext4_num_base_meta_blocks(sb, i);
cond_resched();
if (ext4_bg_has_super(sb, i) &&
((i < 5) || ((i % flex_size) == 0))) {
if (meta_blks != 0) {
ret = add_system_zone(system_blks,
ext4_group_first_block_no(sb, i),
ext4_bg_num_gdb(sb, i) + 1, 0);
meta_blks, 0);
if (ret)
goto err;
}

View File

@ -33,6 +33,8 @@ int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname,
#if IS_ENABLED(CONFIG_UNICODE)
err = ext4_fname_setup_ci_filename(dir, iname, fname);
if (err)
ext4_fname_free_filename(fname);
#endif
return err;
}
@ -51,6 +53,8 @@ int ext4_fname_prepare_lookup(struct inode *dir, struct dentry *dentry,
#if IS_ENABLED(CONFIG_UNICODE)
err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname);
if (err)
ext4_fname_free_filename(fname);
#endif
return err;
}

View File

@ -176,9 +176,6 @@ enum criteria {
EXT4_MB_NUM_CRS
};
/* criteria below which we use fast block scanning and avoid unnecessary IO */
#define CR_FAST CR_GOAL_LEN_SLOW
/*
* Flags used in mballoc's allocation_context flags field.
*
@ -1241,6 +1238,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT2_MB_OPTIMIZE_SCAN 0x00000080 /* Optimize group
* scanning in mballoc
*/
#define EXT4_MOUNT2_ABORT 0x00000100 /* Abort filesystem */
#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
~EXT4_MOUNT_##opt
@ -1258,10 +1256,8 @@ struct ext4_inode_info {
#define ext4_test_and_set_bit __test_and_set_bit_le
#define ext4_set_bit __set_bit_le
#define ext4_set_bit_atomic ext2_set_bit_atomic
#define ext4_test_and_clear_bit __test_and_clear_bit_le
#define ext4_clear_bit __clear_bit_le
#define ext4_clear_bit_atomic ext2_clear_bit_atomic
#define ext4_test_bit test_bit_le
#define ext4_find_next_zero_bit find_next_zero_bit_le
#define ext4_find_next_bit find_next_bit_le
@ -1708,10 +1704,13 @@ struct ext4_sb_info {
const char *s_last_error_func;
time64_t s_last_error_time;
/*
* If we are in a context where we cannot update error information in
* the on-disk superblock, we queue this work to do it.
* If we are in a context where we cannot update the on-disk
* superblock, we queue the work here. This is used to update
* the error information in the superblock, and for periodic
* updates of the superblock called from the commit callback
* function.
*/
struct work_struct s_error_work;
struct work_struct s_sb_upd_work;
/* Ext4 fast commit sub transaction ID */
atomic_t s_fc_subtid;
@ -1804,7 +1803,6 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
*/
enum {
EXT4_MF_MNTDIR_SAMPLED,
EXT4_MF_FS_ABORTED, /* Fatal error detected */
EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */
};
@ -2228,9 +2226,9 @@ extern int ext4_feature_set_ok(struct super_block *sb, int readonly);
#define EXT4_FLAGS_SHUTDOWN 1
#define EXT4_FLAGS_BDEV_IS_DAX 2
static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
static inline int ext4_forced_shutdown(struct super_block *sb)
{
return test_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
return test_bit(EXT4_FLAGS_SHUTDOWN, &EXT4_SB(sb)->s_ext4_flags);
}
/*
@ -2708,7 +2706,6 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
s64 nclusters, unsigned int flags);
extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *);
extern void ext4_check_blocks_bitmap(struct super_block *);
extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
ext4_group_t block_group,
struct buffer_head ** bh);
@ -2864,7 +2861,6 @@ extern void ext4_free_inode(handle_t *, struct inode *);
extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
extern unsigned long ext4_count_free_inodes(struct super_block *);
extern unsigned long ext4_count_dirs(struct super_block *);
extern void ext4_check_inodes_bitmap(struct super_block *);
extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
extern int ext4_init_inode_table(struct super_block *sb,
ext4_group_t group, int barrier);
@ -2907,7 +2903,6 @@ extern int ext4_mb_init(struct super_block *);
extern int ext4_mb_release(struct super_block *);
extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
struct ext4_allocation_request *, int *);
extern int ext4_mb_reserve_blocks(struct super_block *, int);
extern void ext4_discard_preallocations(struct inode *, unsigned int);
extern int __init ext4_init_mballoc(void);
extern void ext4_exit_mballoc(void);
@ -2930,6 +2925,10 @@ extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
extern void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
int len, int state);
static inline bool ext4_mb_cr_expensive(enum criteria cr)
{
return cr >= CR_GOAL_LEN_SLOW;
}
/* inode.c */
void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
@ -2983,7 +2982,6 @@ extern void ext4_evict_inode(struct inode *);
extern void ext4_clear_inode(struct inode *);
extern int ext4_file_getattr(struct mnt_idmap *, const struct path *,
struct kstat *, u32, unsigned int);
extern int ext4_sync_inode(handle_t *, struct inode *);
extern void ext4_dirty_inode(struct inode *, int);
extern int ext4_change_inode_journal_flag(struct inode *, int);
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
@ -3090,6 +3088,8 @@ extern const char *ext4_decode_error(struct super_block *sb, int errno,
extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
ext4_group_t block_group,
unsigned int flags);
extern unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
ext4_group_t block_group);
extern __printf(7, 8)
void __ext4_error(struct super_block *, const char *, unsigned int, bool,
@ -3531,8 +3531,6 @@ extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
/* inline.c */
extern int ext4_get_max_inline_size(struct inode *inode);
extern int ext4_find_inline_data_nolock(struct inode *inode);
extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
unsigned int len);
extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
int ext4_readpage_inline(struct inode *inode, struct folio *folio);

View File

@ -67,11 +67,12 @@ static int ext4_journal_check_start(struct super_block *sb)
might_sleep();
if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
if (unlikely(ext4_forced_shutdown(sb)))
return -EIO;
if (sb_rdonly(sb))
if (WARN_ON_ONCE(sb_rdonly(sb)))
return -EROFS;
WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
journal = EXT4_SB(sb)->s_journal;
/*

View File

@ -878,23 +878,29 @@ retry:
err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
if (err1 != 0)
goto error;
/* Free preallocated extent if it didn't get used. */
if (es1) {
if (!es1->es_len)
__es_free_extent(es1);
es1 = NULL;
}
err2 = __es_insert_extent(inode, &newes, es2);
if (err2 == -ENOMEM && !ext4_es_must_keep(&newes))
err2 = 0;
if (err2 != 0)
goto error;
/* Free preallocated extent if it didn't get used. */
if (es2) {
if (!es2->es_len)
__es_free_extent(es2);
es2 = NULL;
}
if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
(status & EXTENT_STATUS_WRITTEN ||
status & EXTENT_STATUS_UNWRITTEN))
__revise_pending(inode, lblk, len);
/* es is pre-allocated but not used, free it. */
if (es1 && !es1->es_len)
__es_free_extent(es1);
if (es2 && !es2->es_len)
__es_free_extent(es2);
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
if (err1 || err2)
@ -1491,8 +1497,12 @@ retry:
*/
write_lock(&EXT4_I(inode)->i_es_lock);
err = __es_remove_extent(inode, lblk, end, &reserved, es);
if (es && !es->es_len)
__es_free_extent(es);
/* Free preallocated extent if it didn't get used. */
if (es) {
if (!es->es_len)
__es_free_extent(es);
es = NULL;
}
write_unlock(&EXT4_I(inode)->i_es_lock);
if (err)
goto retry;
@ -2047,19 +2057,25 @@ retry:
err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
if (err1 != 0)
goto error;
/* Free preallocated extent if it didn't get used. */
if (es1) {
if (!es1->es_len)
__es_free_extent(es1);
es1 = NULL;
}
err2 = __es_insert_extent(inode, &newes, es2);
if (err2 != 0)
goto error;
/* Free preallocated extent if it didn't get used. */
if (es2) {
if (!es2->es_len)
__es_free_extent(es2);
es2 = NULL;
}
if (allocated)
__insert_pending(inode, lblk);
/* es is pre-allocated but not used, free it. */
if (es1 && !es1->es_len)
__es_free_extent(es1);
if (es2 && !es2->es_len)
__es_free_extent(es2);
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
if (err1 || err2)

View File

@ -131,7 +131,7 @@ static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
if (!iov_iter_count(to))
@ -153,7 +153,7 @@ static ssize_t ext4_file_splice_read(struct file *in, loff_t *ppos,
{
struct inode *inode = file_inode(in);
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
return filemap_splice_read(in, ppos, pipe, len, flags);
}
@ -476,6 +476,11 @@ restart:
* required to change security info in file_modified(), for extending
* I/O, any form of non-overwrite I/O, and unaligned I/O to unwritten
* extents (as partial block zeroing may be required).
*
* Note that unaligned writes are allowed under shared lock so long as
* they are pure overwrites. Otherwise, concurrent unaligned writes risk
* data corruption due to partial block zeroing in the dio layer, and so
* the I/O must occur exclusively.
*/
if (*ilock_shared &&
((!IS_NOSEC(inode) || *extend || !overwrite ||
@ -492,21 +497,12 @@ restart:
/*
* Now that locking is settled, determine dio flags and exclusivity
* requirements. Unaligned writes are allowed under shared lock so long
* as they are pure overwrites. Set the iomap overwrite only flag as an
* added precaution in this case. Even though this is unnecessary, we
* can detect and warn on unexpected -EAGAIN if an unsafe unaligned
* write is ever submitted.
*
* Otherwise, concurrent unaligned writes risk data corruption due to
* partial block zeroing in the dio layer, and so the I/O must occur
* exclusively. The inode lock is already held exclusive if the write is
* non-overwrite or extending, so drain all outstanding dio and set the
* force wait dio flag.
* requirements. We don't use DIO_OVERWRITE_ONLY because we enforce
* behavior already. The inode lock is already held exclusive if the
* write is non-overwrite or extending, so drain all outstanding dio and
* set the force wait dio flag.
*/
if (*ilock_shared && unaligned_io) {
*dio_flags = IOMAP_DIO_OVERWRITE_ONLY;
} else if (!*ilock_shared && (unaligned_io || *extend)) {
if (!*ilock_shared && (unaligned_io || *extend)) {
if (iocb->ki_flags & IOCB_NOWAIT) {
ret = -EAGAIN;
goto out;
@ -608,7 +604,6 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
iomap_ops = &ext4_iomap_overwrite_ops;
ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
dio_flags, NULL, 0);
WARN_ON_ONCE(ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT));
if (ret == -ENOTBLK)
ret = 0;
@ -709,7 +704,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
#ifdef CONFIG_FS_DAX
@ -806,10 +801,9 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file->f_mapping->host;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct dax_device *dax_dev = sbi->s_daxdev;
struct dax_device *dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
if (unlikely(ext4_forced_shutdown(sbi)))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
/*
@ -885,7 +879,7 @@ static int ext4_file_open(struct inode *inode, struct file *filp)
{
int ret;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
ret = ext4_sample_last_mounted(inode->i_sb, filp->f_path.mnt);

View File

@ -131,9 +131,8 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
int ret = 0, err;
bool needs_barrier = false;
struct inode *inode = file->f_mapping->host;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
if (unlikely(ext4_forced_shutdown(sbi)))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
ASSERT(ext4_journal_current_handle() == NULL);
@ -141,14 +140,14 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
trace_ext4_sync_file_enter(file, datasync);
if (sb_rdonly(inode->i_sb)) {
/* Make sure that we read updated s_mount_flags value */
/* Make sure that we read updated s_ext4_flags value */
smp_rmb();
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))
if (ext4_forced_shutdown(inode->i_sb))
ret = -EROFS;
goto out;
}
if (!sbi->s_journal) {
if (!EXT4_SB(inode->i_sb)->s_journal) {
ret = ext4_fsync_nojournal(file, start, end, datasync,
&needs_barrier);
if (needs_barrier)

View File

@ -300,7 +300,7 @@ int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
unsigned char *buff;
struct qstr qstr = {.name = name, .len = len };
if (len && IS_CASEFOLDED(dir) && um &&
if (len && IS_CASEFOLDED(dir) &&
(!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir))) {
buff = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL);
if (!buff)

View File

@ -950,7 +950,7 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
sb = dir->i_sb;
sbi = EXT4_SB(sb);
if (unlikely(ext4_forced_shutdown(sbi)))
if (unlikely(ext4_forced_shutdown(sb)))
return ERR_PTR(-EIO);
ngroups = ext4_get_groups_count(sb);
@ -1523,12 +1523,6 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
int num, ret = 0, used_blks = 0;
unsigned long used_inos = 0;
/* This should not happen, but just to be sure check this */
if (sb_rdonly(sb)) {
ret = 1;
goto out;
}
gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
if (!gdp || !grp)
goto out;

View File

@ -228,7 +228,7 @@ static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc,
struct ext4_inode *raw_inode;
int cp_len = 0;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return;
BUG_ON(!EXT4_I(inode)->i_inline_off);

View File

@ -1114,7 +1114,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index;
unsigned from, to;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
trace_ext4_write_begin(inode, pos, len);
@ -2213,8 +2213,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
if (err < 0) {
struct super_block *sb = inode->i_sb;
if (ext4_forced_shutdown(EXT4_SB(sb)) ||
ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
if (ext4_forced_shutdown(sb))
goto invalidate_dirty_pages;
/*
* Let the uper layers retry transient errors.
@ -2534,14 +2533,13 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
* If the filesystem has aborted, it is read-only, so return
* right away instead of dumping stack traces later on that
* will obscure the real source of the problem. We test
* EXT4_MF_FS_ABORTED instead of sb->s_flag's SB_RDONLY because
* fs shutdown state instead of sb->s_flag's SB_RDONLY because
* the latter could be true if the filesystem is mounted
* read-only, and in that case, ext4_writepages should
* *never* be called, so if that ever happens, we would want
* the stack trace.
*/
if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) ||
ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))) {
if (unlikely(ext4_forced_shutdown(mapping->host->i_sb))) {
ret = -EROFS;
goto out_writepages;
}
@ -2759,7 +2757,7 @@ static int ext4_writepages(struct address_space *mapping,
int ret;
int alloc_ctx;
if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
if (unlikely(ext4_forced_shutdown(sb)))
return -EIO;
alloc_ctx = ext4_writepages_down_read(sb);
@ -2798,16 +2796,16 @@ static int ext4_dax_writepages(struct address_space *mapping,
int ret;
long nr_to_write = wbc->nr_to_write;
struct inode *inode = mapping->host;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
int alloc_ctx;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
alloc_ctx = ext4_writepages_down_read(inode->i_sb);
trace_ext4_writepages(inode, wbc);
ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc);
ret = dax_writeback_mapping_range(mapping,
EXT4_SB(inode->i_sb)->s_daxdev, wbc);
trace_ext4_writepages_result(inode, wbc, ret,
nr_to_write - wbc->nr_to_write);
ext4_writepages_up_read(inode->i_sb, alloc_ctx);
@ -2857,7 +2855,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index;
struct inode *inode = mapping->host;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
index = pos >> PAGE_SHIFT;
@ -2937,14 +2935,73 @@ static int ext4_da_should_update_i_disksize(struct folio *folio,
return 1;
}
static int ext4_da_do_write_end(struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page)
{
struct inode *inode = mapping->host;
loff_t old_size = inode->i_size;
bool disksize_changed = false;
loff_t new_i_size;
/*
* block_write_end() will mark the inode as dirty with I_DIRTY_PAGES
* flag, which all that's needed to trigger page writeback.
*/
copied = block_write_end(NULL, mapping, pos, len, copied, page, NULL);
new_i_size = pos + copied;
/*
* It's important to update i_size while still holding page lock,
* because page writeout could otherwise come in and zero beyond
* i_size.
*
* Since we are holding inode lock, we are sure i_disksize <=
* i_size. We also know that if i_disksize < i_size, there are
* delalloc writes pending in the range up to i_size. If the end of
* the current write is <= i_size, there's no need to touch
* i_disksize since writeback will push i_disksize up to i_size
* eventually. If the end of the current write is > i_size and
* inside an allocated block which ext4_da_should_update_i_disksize()
* checked, we need to update i_disksize here as certain
* ext4_writepages() paths not allocating blocks and update i_disksize.
*/
if (new_i_size > inode->i_size) {
unsigned long end;
i_size_write(inode, new_i_size);
end = (new_i_size - 1) & (PAGE_SIZE - 1);
if (copied && ext4_da_should_update_i_disksize(page_folio(page), end)) {
ext4_update_i_disksize(inode, new_i_size);
disksize_changed = true;
}
}
unlock_page(page);
put_page(page);
if (old_size < pos)
pagecache_isize_extended(inode, old_size, pos);
if (disksize_changed) {
handle_t *handle;
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle))
return PTR_ERR(handle);
ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
}
return copied;
}
static int ext4_da_write_end(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
struct inode *inode = mapping->host;
loff_t new_i_size;
unsigned long start, end;
int write_mode = (int)(unsigned long)fsdata;
struct folio *folio = page_folio(page);
@ -2963,30 +3020,7 @@ static int ext4_da_write_end(struct file *file,
if (unlikely(copied < len) && !PageUptodate(page))
copied = 0;
start = pos & (PAGE_SIZE - 1);
end = start + copied - 1;
/*
* Since we are holding inode lock, we are sure i_disksize <=
* i_size. We also know that if i_disksize < i_size, there are
* delalloc writes pending in the range upto i_size. If the end of
* the current write is <= i_size, there's no need to touch
* i_disksize since writeback will push i_disksize upto i_size
* eventually. If the end of the current write is > i_size and
* inside an allocated block (ext4_da_should_update_i_disksize()
* check), we need to update i_disksize here as certain
* ext4_writepages() paths not allocating blocks update i_disksize.
*
* Note that we defer inode dirtying to generic_write_end() /
* ext4_da_write_inline_data_end().
*/
new_i_size = pos + copied;
if (copied && new_i_size > inode->i_size &&
ext4_da_should_update_i_disksize(folio, end))
ext4_update_i_disksize(inode, new_i_size);
return generic_write_end(file, mapping, pos, len, copied, &folio->page,
fsdata);
return ext4_da_do_write_end(mapping, pos, len, copied, &folio->page);
}
/*
@ -4940,9 +4974,12 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
"iget: bogus i_mode (%o)", inode->i_mode);
goto bad_inode;
}
if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb))
if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb)) {
ext4_error_inode(inode, function, line, 0,
"casefold flag without casefold feature");
ret = -EFSCORRUPTED;
goto bad_inode;
}
if ((err_str = check_igot_inode(inode, flags)) != NULL) {
ext4_error_inode(inode, function, line, 0, err_str);
ret = -EFSCORRUPTED;
@ -5131,11 +5168,10 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
{
int err;
if (WARN_ON_ONCE(current->flags & PF_MEMALLOC) ||
sb_rdonly(inode->i_sb))
if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
return 0;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
if (EXT4_SB(inode->i_sb)->s_journal) {
@ -5255,7 +5291,7 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
const unsigned int ia_valid = attr->ia_valid;
bool inc_ivers = true;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
if (unlikely(IS_IMMUTABLE(inode)))
@ -5674,7 +5710,7 @@ int ext4_mark_iloc_dirty(handle_t *handle,
{
int err = 0;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) {
if (unlikely(ext4_forced_shutdown(inode->i_sb))) {
put_bh(iloc->bh);
return -EIO;
}
@ -5700,7 +5736,7 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
{
int err;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
err = ext4_get_inode_loc(inode, iloc);

View File

@ -802,7 +802,7 @@ int ext4_force_shutdown(struct super_block *sb, u32 flags)
if (flags > EXT4_GOING_FLAGS_NOLOGFLUSH)
return -EINVAL;
if (ext4_forced_shutdown(sbi))
if (ext4_forced_shutdown(sb))
return 0;
ext4_msg(sb, KERN_ALERT, "shut down requested (%d)", flags);

View File

@ -874,7 +874,7 @@ static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context
enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_group_info *iter, *grp;
struct ext4_group_info *iter;
int i;
if (ac->ac_status == AC_STATUS_FOUND)
@ -883,7 +883,6 @@ static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context
if (unlikely(sbi->s_mb_stats && ac->ac_flags & EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED))
atomic_inc(&sbi->s_bal_p2_aligned_bad_suggestions);
grp = NULL;
for (i = ac->ac_2order; i < MB_NUM_ORDERS(ac->ac_sb); i++) {
if (list_empty(&sbi->s_mb_largest_free_orders[i]))
continue;
@ -892,28 +891,22 @@ static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context
read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
continue;
}
grp = NULL;
list_for_each_entry(iter, &sbi->s_mb_largest_free_orders[i],
bb_largest_free_order_node) {
if (sbi->s_mb_stats)
atomic64_inc(&sbi->s_bal_cX_groups_considered[CR_POWER2_ALIGNED]);
if (likely(ext4_mb_good_group(ac, iter->bb_group, CR_POWER2_ALIGNED))) {
grp = iter;
break;
*group = iter->bb_group;
ac->ac_flags |= EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED;
read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
return;
}
}
read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
if (grp)
break;
}
if (!grp) {
/* Increment cr and search again */
*new_cr = CR_GOAL_LEN_FAST;
} else {
*group = grp->bb_group;
ac->ac_flags |= EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED;
}
/* Increment cr and search again if no group is found */
*new_cr = CR_GOAL_LEN_FAST;
}
/*
@ -966,16 +959,25 @@ static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context *
for (i = mb_avg_fragment_size_order(ac->ac_sb, ac->ac_g_ex.fe_len);
i < MB_NUM_ORDERS(ac->ac_sb); i++) {
grp = ext4_mb_find_good_group_avg_frag_lists(ac, i);
if (grp)
break;
if (grp) {
*group = grp->bb_group;
ac->ac_flags |= EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED;
return;
}
}
if (grp) {
*group = grp->bb_group;
ac->ac_flags |= EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED;
} else {
/*
* CR_BEST_AVAIL_LEN works based on the concept that we have
* a larger normalized goal len request which can be trimmed to
* a smaller goal len such that it can still satisfy original
* request len. However, allocation request for non-regular
* files never gets normalized.
* See function ext4_mb_normalize_request() (EXT4_MB_HINT_DATA).
*/
if (ac->ac_flags & EXT4_MB_HINT_DATA)
*new_cr = CR_BEST_AVAIL_LEN;
}
else
*new_cr = CR_GOAL_LEN_SLOW;
}
/*
@ -1051,18 +1053,16 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
ac->ac_g_ex.fe_len);
grp = ext4_mb_find_good_group_avg_frag_lists(ac, frag_order);
if (grp)
break;
if (grp) {
*group = grp->bb_group;
ac->ac_flags |= EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED;
return;
}
}
if (grp) {
*group = grp->bb_group;
ac->ac_flags |= EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED;
} else {
/* Reset goal length to original goal length before falling into CR_GOAL_LEN_SLOW */
ac->ac_g_ex.fe_len = ac->ac_orig_goal_len;
*new_cr = CR_GOAL_LEN_SLOW;
}
/* Reset goal length to original goal length before falling into CR_GOAL_LEN_SLOW */
ac->ac_g_ex.fe_len = ac->ac_orig_goal_len;
*new_cr = CR_GOAL_LEN_SLOW;
}
static inline int should_optimize_scan(struct ext4_allocation_context *ac)
@ -1080,8 +1080,9 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac)
* Return next linear group for allocation. If linear traversal should not be
* performed, this function just returns the same group
*/
static int
next_linear_group(struct ext4_allocation_context *ac, int group, int ngroups)
static ext4_group_t
next_linear_group(struct ext4_allocation_context *ac, ext4_group_t group,
ext4_group_t ngroups)
{
if (!should_optimize_scan(ac))
goto inc_and_return;
@ -1255,7 +1256,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
{
ext4_group_t ngroups;
int blocksize;
unsigned int blocksize;
int blocks_per_page;
int groups_per_page;
int err = 0;
@ -2450,7 +2451,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
break;
}
if (ac->ac_criteria < CR_FAST) {
if (!ext4_mb_cr_expensive(ac->ac_criteria)) {
/*
* In CR_GOAL_LEN_FAST and CR_BEST_AVAIL_LEN, we are
* sure that this group will have a large enough
@ -2553,7 +2554,7 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
BUG_ON(cr < CR_POWER2_ALIGNED || cr >= EXT4_MB_NUM_CRS);
if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp) || !grp))
if (unlikely(!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
return false;
free = grp->bb_free;
@ -2634,7 +2635,12 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
free = grp->bb_free;
if (free == 0)
goto out;
if (cr <= CR_FAST && free < ac->ac_g_ex.fe_len)
/*
* In all criterias except CR_ANY_FREE we try to avoid groups that
* can't possibly satisfy the full goal request due to insufficient
* free blocks.
*/
if (cr < CR_ANY_FREE && free < ac->ac_g_ex.fe_len)
goto out;
if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
goto out;
@ -2658,7 +2664,7 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
* sure we locate metadata blocks in the first block group in
* the flex_bg if possible.
*/
if (cr < CR_FAST &&
if (!ext4_mb_cr_expensive(cr) &&
(!sbi->s_log_groups_per_flex ||
((group & ((1 << sbi->s_log_groups_per_flex) - 1)) != 0)) &&
!(ext4_has_group_desc_csum(sb) &&
@ -2787,8 +2793,8 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
/*
* ac->ac_2order is set only if the fe_len is a power of 2
* if ac->ac_2order is set we also set criteria to 0 so that we
* try exact allocation using buddy.
* if ac->ac_2order is set we also set criteria to CR_POWER2_ALIGNED
* so that we try exact allocation using buddy.
*/
i = fls(ac->ac_g_ex.fe_len);
ac->ac_2order = 0;
@ -2800,10 +2806,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
* requests upto maximum buddy size we have constructed.
*/
if (i >= sbi->s_mb_order2_reqs && i <= MB_NUM_ORDERS(sb)) {
/*
* This should tell if fe_len is exactly power of 2
*/
if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
if (is_power_of_2(ac->ac_g_ex.fe_len))
ac->ac_2order = array_index_nospec(i - 1,
MB_NUM_ORDERS(sb));
}
@ -2848,11 +2851,11 @@ repeat:
/*
* Batch reads of the block allocation bitmaps
* to get multiple READs in flight; limit
* prefetching at cr=0/1, otherwise mballoc can
* spend a lot of time loading imperfect groups
* prefetching at inexpensive CR, otherwise mballoc
* can spend a lot of time loading imperfect groups
*/
if ((prefetch_grp == group) &&
(cr >= CR_FAST ||
(ext4_mb_cr_expensive(cr) ||
prefetch_ios < sbi->s_mb_prefetch_limit)) {
nr = sbi->s_mb_prefetch;
if (ext4_has_feature_flex_bg(sb)) {
@ -3501,11 +3504,10 @@ static void ext4_discard_work(struct work_struct *work)
struct super_block *sb = sbi->s_sb;
struct ext4_free_data *fd, *nfd;
struct ext4_buddy e4b;
struct list_head discard_list;
LIST_HEAD(discard_list);
ext4_group_t grp, load_grp;
int err = 0;
INIT_LIST_HEAD(&discard_list);
spin_lock(&sbi->s_md_lock);
list_splice_init(&sbi->s_discard_list, &discard_list);
spin_unlock(&sbi->s_md_lock);
@ -3879,12 +3881,10 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_free_data *entry, *tmp;
struct list_head freed_data_list;
LIST_HEAD(freed_data_list);
struct list_head *cut_pos = NULL;
bool wake;
INIT_LIST_HEAD(&freed_data_list);
spin_lock(&sbi->s_md_lock);
list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) {
if (entry->efd_tid != commit_tid)
@ -4084,7 +4084,7 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_group_t group;
ext4_grpblk_t blkoff;
int i, err;
int i, err = 0;
int already;
unsigned int clen, clen_changed, thisgrp_len;
@ -4222,12 +4222,13 @@ ext4_mb_pa_rb_next_iter(ext4_lblk_t new_start, ext4_lblk_t cur_start, struct rb_
static inline void
ext4_mb_pa_assert_overlap(struct ext4_allocation_context *ac,
ext4_lblk_t start, ext4_lblk_t end)
ext4_lblk_t start, loff_t end)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
struct ext4_prealloc_space *tmp_pa;
ext4_lblk_t tmp_pa_start, tmp_pa_end;
ext4_lblk_t tmp_pa_start;
loff_t tmp_pa_end;
struct rb_node *iter;
read_lock(&ei->i_prealloc_lock);
@ -4236,7 +4237,7 @@ ext4_mb_pa_assert_overlap(struct ext4_allocation_context *ac,
tmp_pa = rb_entry(iter, struct ext4_prealloc_space,
pa_node.inode_node);
tmp_pa_start = tmp_pa->pa_lstart;
tmp_pa_end = tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len);
tmp_pa_end = pa_logical_end(sbi, tmp_pa);
spin_lock(&tmp_pa->pa_lock);
if (tmp_pa->pa_deleted == 0)
@ -4258,14 +4259,14 @@ ext4_mb_pa_assert_overlap(struct ext4_allocation_context *ac,
*/
static inline void
ext4_mb_pa_adjust_overlap(struct ext4_allocation_context *ac,
ext4_lblk_t *start, ext4_lblk_t *end)
ext4_lblk_t *start, loff_t *end)
{
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_prealloc_space *tmp_pa = NULL, *left_pa = NULL, *right_pa = NULL;
struct rb_node *iter;
ext4_lblk_t new_start, new_end;
ext4_lblk_t tmp_pa_start, tmp_pa_end, left_pa_end = -1, right_pa_start = -1;
ext4_lblk_t new_start, tmp_pa_start, right_pa_start = -1;
loff_t new_end, tmp_pa_end, left_pa_end = -1;
new_start = *start;
new_end = *end;
@ -4284,7 +4285,7 @@ ext4_mb_pa_adjust_overlap(struct ext4_allocation_context *ac,
tmp_pa = rb_entry(iter, struct ext4_prealloc_space,
pa_node.inode_node);
tmp_pa_start = tmp_pa->pa_lstart;
tmp_pa_end = tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len);
tmp_pa_end = pa_logical_end(sbi, tmp_pa);
/* PA must not overlap original request */
spin_lock(&tmp_pa->pa_lock);
@ -4364,8 +4365,7 @@ ext4_mb_pa_adjust_overlap(struct ext4_allocation_context *ac,
}
if (left_pa) {
left_pa_end =
left_pa->pa_lstart + EXT4_C2B(sbi, left_pa->pa_len);
left_pa_end = pa_logical_end(sbi, left_pa);
BUG_ON(left_pa_end > ac->ac_o_ex.fe_logical);
}
@ -4404,8 +4404,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_super_block *es = sbi->s_es;
int bsbits, max;
ext4_lblk_t end;
loff_t size, start_off;
loff_t size, start_off, end;
loff_t orig_size __maybe_unused;
ext4_lblk_t start;
@ -4432,7 +4431,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
/* first, let's learn actual file size
* given current request is allocated */
size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
size = extent_logical_end(sbi, &ac->ac_o_ex);
size = size << bsbits;
if (size < i_size_read(ac->ac_inode))
size = i_size_read(ac->ac_inode);
@ -4766,7 +4765,6 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
struct ext4_locality_group *lg;
struct ext4_prealloc_space *tmp_pa = NULL, *cpa = NULL;
loff_t tmp_pa_end;
struct rb_node *iter;
ext4_fsblk_t goal_block;
@ -4862,9 +4860,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
* pa can possibly satisfy the request hence check if it overlaps
* original logical start and stop searching if it doesn't.
*/
tmp_pa_end = (loff_t)tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len);
if (ac->ac_o_ex.fe_logical >= tmp_pa_end) {
if (ac->ac_o_ex.fe_logical >= pa_logical_end(sbi, tmp_pa)) {
spin_unlock(&tmp_pa->pa_lock);
goto try_group_pa;
}
@ -4984,7 +4980,6 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
n = rb_next(n);
}
return;
}
/*
@ -5180,8 +5175,11 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
pa = ac->ac_pa;
if (ac->ac_b_ex.fe_len < ac->ac_orig_goal_len) {
int new_bex_start;
int new_bex_end;
struct ext4_free_extent ex = {
.fe_logical = ac->ac_g_ex.fe_logical,
.fe_len = ac->ac_orig_goal_len,
};
loff_t orig_goal_end = extent_logical_end(sbi, &ex);
/* we can't allocate as much as normalizer wants.
* so, found space must get proper lstart
@ -5200,29 +5198,23 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
* still cover original start
* 3. Else, keep the best ex at start of original request.
*/
new_bex_end = ac->ac_g_ex.fe_logical +
EXT4_C2B(sbi, ac->ac_orig_goal_len);
new_bex_start = new_bex_end - EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
if (ac->ac_o_ex.fe_logical >= new_bex_start)
ex.fe_len = ac->ac_b_ex.fe_len;
ex.fe_logical = orig_goal_end - EXT4_C2B(sbi, ex.fe_len);
if (ac->ac_o_ex.fe_logical >= ex.fe_logical)
goto adjust_bex;
new_bex_start = ac->ac_g_ex.fe_logical;
new_bex_end =
new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
if (ac->ac_o_ex.fe_logical < new_bex_end)
ex.fe_logical = ac->ac_g_ex.fe_logical;
if (ac->ac_o_ex.fe_logical < extent_logical_end(sbi, &ex))
goto adjust_bex;
new_bex_start = ac->ac_o_ex.fe_logical;
new_bex_end =
new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
ex.fe_logical = ac->ac_o_ex.fe_logical;
adjust_bex:
ac->ac_b_ex.fe_logical = new_bex_start;
ac->ac_b_ex.fe_logical = ex.fe_logical;
BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
BUG_ON(new_bex_end > (ac->ac_g_ex.fe_logical +
EXT4_C2B(sbi, ac->ac_orig_goal_len)));
BUG_ON(extent_logical_end(sbi, &ex) > orig_goal_end);
}
pa->pa_lstart = ac->ac_b_ex.fe_logical;
@ -5419,7 +5411,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
struct buffer_head *bitmap_bh = NULL;
struct ext4_prealloc_space *pa, *tmp;
struct list_head list;
LIST_HEAD(list);
struct ext4_buddy e4b;
struct ext4_inode_info *ei;
int err;
@ -5448,7 +5440,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
goto out_dbg;
}
INIT_LIST_HEAD(&list);
ext4_lock_group(sb, group);
list_for_each_entry_safe(pa, tmp,
&grp->bb_prealloc_list, pa_group_list) {
@ -5529,7 +5520,7 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
struct buffer_head *bitmap_bh = NULL;
struct ext4_prealloc_space *pa, *tmp;
ext4_group_t group = 0;
struct list_head list;
LIST_HEAD(list);
struct ext4_buddy e4b;
struct rb_node *iter;
int err;
@ -5546,8 +5537,6 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
trace_ext4_discard_preallocations(inode,
atomic_read(&ei->i_prealloc_active), needed);
INIT_LIST_HEAD(&list);
if (needed == 0)
needed = UINT_MAX;
@ -5671,7 +5660,7 @@ static inline void ext4_mb_show_pa(struct super_block *sb)
{
ext4_group_t i, ngroups;
if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
if (ext4_forced_shutdown(sb))
return;
ngroups = ext4_get_groups_count(sb);
@ -5705,7 +5694,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
{
struct super_block *sb = ac->ac_sb;
if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
if (ext4_forced_shutdown(sb))
return;
mb_debug(sb, "Can't allocate:"
@ -5738,12 +5727,10 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
#else
static inline void ext4_mb_show_pa(struct super_block *sb)
{
return;
}
static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
{
ext4_mb_show_pa(ac->ac_sb);
return;
}
#endif
@ -5769,7 +5756,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
group_pa_eligible = sbi->s_mb_group_prealloc > 0;
inode_pa_eligible = true;
size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
size = extent_logical_end(sbi, &ac->ac_o_ex);
isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
>> bsbits;
@ -5865,13 +5852,11 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
{
ext4_group_t group = 0;
struct ext4_buddy e4b;
struct list_head discard_list;
LIST_HEAD(discard_list);
struct ext4_prealloc_space *pa, *tmp;
mb_debug(sb, "discard locality group preallocation\n");
INIT_LIST_HEAD(&discard_list);
spin_lock(&lg->lg_prealloc_lock);
list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
pa_node.lg_list,
@ -5984,12 +5969,9 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
spin_unlock(&lg->lg_prealloc_lock);
/* Now trim the list to be not more than 8 elements */
if (lg_prealloc_count > 8) {
if (lg_prealloc_count > 8)
ext4_mb_discard_lg_preallocations(sb, lg,
order, lg_prealloc_count);
return;
}
return ;
}
/*
@ -6102,7 +6084,7 @@ ext4_mb_new_blocks_simple(struct ext4_allocation_request *ar, int *errp)
ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
ext4_grpblk_t i = 0;
ext4_fsblk_t goal, block;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
struct ext4_super_block *es = sbi->s_es;
goal = ar->goal;
if (goal < le32_to_cpu(es->s_first_data_block) ||
@ -6643,7 +6625,6 @@ do_more:
error_return:
brelse(bitmap_bh);
ext4_std_error(sb, err);
return;
}
/**
@ -6746,7 +6727,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
}
ext4_mb_clear_bb(handle, inode, block, count, flags);
return;
}
/**
@ -6936,8 +6916,7 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
void *bitmap;
bitmap = e4b->bd_bitmap;
start = (e4b->bd_info->bb_first_free > start) ?
e4b->bd_info->bb_first_free : start;
start = max(e4b->bd_info->bb_first_free, start);
count = 0;
free_count = 0;
@ -7154,8 +7133,7 @@ ext4_mballoc_query_range(
ext4_lock_group(sb, group);
start = (e4b.bd_info->bb_first_free > start) ?
e4b.bd_info->bb_first_free : start;
start = max(e4b.bd_info->bb_first_free, start);
if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;

View File

@ -233,6 +233,20 @@ static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
(fex->fe_start << EXT4_SB(sb)->s_cluster_bits);
}
static inline loff_t extent_logical_end(struct ext4_sb_info *sbi,
struct ext4_free_extent *fex)
{
/* Use loff_t to avoid end exceeding ext4_lblk_t max. */
return (loff_t)fex->fe_logical + EXT4_C2B(sbi, fex->fe_len);
}
static inline loff_t pa_logical_end(struct ext4_sb_info *sbi,
struct ext4_prealloc_space *pa)
{
/* Use loff_t to avoid end exceeding ext4_lblk_t max. */
return (loff_t)pa->pa_lstart + EXT4_C2B(sbi, pa->pa_len);
}
typedef int (*ext4_mballoc_query_range_fn)(
struct super_block *sb,
ext4_group_t agno,

View File

@ -162,7 +162,7 @@ static int kmmpd(void *data)
memcpy(mmp->mmp_nodename, init_utsname()->nodename,
sizeof(mmp->mmp_nodename));
while (!kthread_should_stop() && !sb_rdonly(sb)) {
while (!kthread_should_stop() && !ext4_forced_shutdown(sb)) {
if (!ext4_has_feature_mmp(sb)) {
ext4_warning(sb, "kmmpd being stopped since MMP feature"
" has been disabled.");

View File

@ -1445,7 +1445,7 @@ int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
struct dx_hash_info *hinfo = &name->hinfo;
int len;
if (!IS_CASEFOLDED(dir) || !dir->i_sb->s_encoding ||
if (!IS_CASEFOLDED(dir) ||
(IS_ENCRYPTED(dir) && !fscrypt_has_encryption_key(dir))) {
cf_name->name = NULL;
return 0;
@ -1496,7 +1496,7 @@ static bool ext4_match(struct inode *parent,
#endif
#if IS_ENABLED(CONFIG_UNICODE)
if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent) &&
if (IS_CASEFOLDED(parent) &&
(!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) {
if (fname->cf_name.name) {
struct qstr cf = {.name = fname->cf_name.name,
@ -2393,7 +2393,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
#if IS_ENABLED(CONFIG_UNICODE)
if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) &&
sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name))
utf8_validate(sb->s_encoding, &dentry->d_name))
return -EINVAL;
#endif
@ -2799,6 +2799,7 @@ static int ext4_add_nondir(handle_t *handle,
return err;
}
drop_nlink(inode);
ext4_mark_inode_dirty(handle, inode);
ext4_orphan_add(handle, inode);
unlock_new_inode(inode);
return err;
@ -3142,7 +3143,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
struct ext4_dir_entry_2 *de;
handle_t *handle = NULL;
if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
if (unlikely(ext4_forced_shutdown(dir->i_sb)))
return -EIO;
/* Initialize quotas before so that eventual writes go in
@ -3302,7 +3303,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
{
int retval;
if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
if (unlikely(ext4_forced_shutdown(dir->i_sb)))
return -EIO;
trace_ext4_unlink_enter(dir, dentry);
@ -3370,7 +3371,7 @@ static int ext4_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct fscrypt_str disk_link;
int retries = 0;
if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
if (unlikely(ext4_forced_shutdown(dir->i_sb)))
return -EIO;
err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
@ -3437,6 +3438,7 @@ retry:
err_drop_inode:
clear_nlink(inode);
ext4_mark_inode_dirty(handle, inode);
ext4_orphan_add(handle, inode);
unlock_new_inode(inode);
if (handle)
@ -4021,6 +4023,7 @@ end_rename:
ext4_resetent(handle, &old,
old.inode->i_ino, old_file_type);
drop_nlink(whiteout);
ext4_mark_inode_dirty(handle, whiteout);
ext4_orphan_add(handle, whiteout);
}
unlock_new_inode(whiteout);
@ -4187,7 +4190,7 @@ static int ext4_rename2(struct mnt_idmap *idmap,
{
int err;
if (unlikely(ext4_forced_shutdown(EXT4_SB(old_dir->i_sb))))
if (unlikely(ext4_forced_shutdown(old_dir->i_sb)))
return -EIO;
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))

View File

@ -184,7 +184,7 @@ static int ext4_end_io_end(ext4_io_end_t *io_end)
io_end->handle = NULL; /* Following call will use up the handle */
ret = ext4_convert_unwritten_io_end_vec(handle, io_end);
if (ret < 0 && !ext4_forced_shutdown(EXT4_SB(inode->i_sb))) {
if (ret < 0 && !ext4_forced_shutdown(inode->i_sb)) {
ext4_msg(inode->i_sb, KERN_EMERG,
"failed to convert unwritten extents to written "
"extents -- potential data loss! "

View File

@ -434,6 +434,57 @@ static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
#define ext4_get_tstamp(es, tstamp) \
__ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
#define EXT4_SB_REFRESH_INTERVAL_SEC (3600) /* seconds (1 hour) */
#define EXT4_SB_REFRESH_INTERVAL_KB (16384) /* kilobytes (16MB) */
/*
* The ext4_maybe_update_superblock() function checks and updates the
* superblock if needed.
*
* This function is designed to update the on-disk superblock only under
* certain conditions to prevent excessive disk writes and unnecessary
* waking of the disk from sleep. The superblock will be updated if:
* 1. More than an hour has passed since the last superblock update, and
* 2. More than 16MB have been written since the last superblock update.
*
* @sb: The superblock
*/
static void ext4_maybe_update_superblock(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
journal_t *journal = sbi->s_journal;
time64_t now;
__u64 last_update;
__u64 lifetime_write_kbytes;
__u64 diff_size;
if (sb_rdonly(sb) || !(sb->s_flags & SB_ACTIVE) ||
!journal || (journal->j_flags & JBD2_UNMOUNT))
return;
now = ktime_get_real_seconds();
last_update = ext4_get_tstamp(es, s_wtime);
if (likely(now - last_update < EXT4_SB_REFRESH_INTERVAL_SEC))
return;
lifetime_write_kbytes = sbi->s_kbytes_written +
((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
sbi->s_sectors_written_start) >> 1);
/* Get the number of kilobytes not written to disk to account
* for statistics and compare with a multiple of 16 MB. This
* is used to determine when the next superblock commit should
* occur (i.e. not more often than once per 16MB if there was
* less written in an hour).
*/
diff_size = lifetime_write_kbytes - le64_to_cpu(es->s_kbytes_written);
if (diff_size > EXT4_SB_REFRESH_INTERVAL_KB)
schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
}
/*
* The del_gendisk() function uninitializes the disk-specific data
* structures, including the bdi structure, without telling anyone
@ -460,6 +511,7 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
BUG_ON(txn->t_state == T_FINISHED);
ext4_process_freed_data(sb, txn->t_tid);
ext4_maybe_update_superblock(sb);
spin_lock(&sbi->s_md_lock);
while (!list_empty(&txn->t_private_list)) {
@ -658,7 +710,7 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
WARN_ON_ONCE(1);
if (!continue_fs && !sb_rdonly(sb)) {
ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
set_bit(EXT4_FLAGS_SHUTDOWN, &EXT4_SB(sb)->s_ext4_flags);
if (journal)
jbd2_journal_abort(journal, -EIO);
}
@ -672,7 +724,7 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
* defer superblock flushing to a workqueue.
*/
if (continue_fs && journal)
schedule_work(&EXT4_SB(sb)->s_error_work);
schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
else
ext4_commit_super(sb);
}
@ -699,10 +751,10 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
sb->s_flags |= SB_RDONLY;
}
static void flush_stashed_error_work(struct work_struct *work)
static void update_super_work(struct work_struct *work)
{
struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
s_error_work);
s_sb_upd_work);
journal_t *journal = sbi->s_journal;
handle_t *handle;
@ -716,6 +768,7 @@ static void flush_stashed_error_work(struct work_struct *work)
*/
if (!sb_rdonly(sbi->s_sb) && journal) {
struct buffer_head *sbh = sbi->s_sbh;
bool call_notify_err;
handle = jbd2_journal_start(journal, 1);
if (IS_ERR(handle))
goto write_directly;
@ -723,6 +776,10 @@ static void flush_stashed_error_work(struct work_struct *work)
jbd2_journal_stop(handle);
goto write_directly;
}
if (sbi->s_add_error_count > 0)
call_notify_err = true;
ext4_update_super(sbi->s_sb);
if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
@ -736,7 +793,10 @@ static void flush_stashed_error_work(struct work_struct *work)
goto write_directly;
}
jbd2_journal_stop(handle);
ext4_notify_error_sysfs(sbi);
if (call_notify_err)
ext4_notify_error_sysfs(sbi);
return;
}
write_directly:
@ -759,7 +819,7 @@ void __ext4_error(struct super_block *sb, const char *function,
struct va_format vaf;
va_list args;
if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
if (unlikely(ext4_forced_shutdown(sb)))
return;
trace_ext4_error(sb, function, line);
@ -784,7 +844,7 @@ void __ext4_error_inode(struct inode *inode, const char *function,
va_list args;
struct va_format vaf;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return;
trace_ext4_error(inode->i_sb, function, line);
@ -819,7 +879,7 @@ void __ext4_error_file(struct file *file, const char *function,
struct inode *inode = file_inode(file);
char pathname[80], *path;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return;
trace_ext4_error(inode->i_sb, function, line);
@ -899,7 +959,7 @@ void __ext4_std_error(struct super_block *sb, const char *function,
char nbuf[16];
const char *errstr;
if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
if (unlikely(ext4_forced_shutdown(sb)))
return;
/* Special case: if the error is EROFS, and we're not already
@ -993,7 +1053,7 @@ __acquires(bitlock)
struct va_format vaf;
va_list args;
if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
if (unlikely(ext4_forced_shutdown(sb)))
return;
trace_ext4_error(sb, function, line);
@ -1019,7 +1079,7 @@ __acquires(bitlock)
if (!bdev_read_only(sb->s_bdev)) {
save_error_info(sb, EFSCORRUPTED, ino, block, function,
line);
schedule_work(&EXT4_SB(sb)->s_error_work);
schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
}
return;
}
@ -1097,26 +1157,6 @@ void ext4_update_dynamic_rev(struct super_block *sb)
*/
}
/*
* Open the external journal device
*/
static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
{
struct block_device *bdev;
bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb,
&fs_holder_ops);
if (IS_ERR(bdev))
goto fail;
return bdev;
fail:
ext4_msg(sb, KERN_ERR,
"failed to open journal device unknown-block(%u,%u) %ld",
MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
return NULL;
}
static inline struct inode *orphan_list_entry(struct list_head *l)
{
return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
@ -1251,10 +1291,10 @@ static void ext4_put_super(struct super_block *sb)
* Unregister sysfs before destroying jbd2 journal.
* Since we could still access attr_journal_task attribute via sysfs
* path which could have sbi->s_journal->j_task as NULL
* Unregister sysfs before flush sbi->s_error_work.
* Unregister sysfs before flush sbi->s_sb_upd_work.
* Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
* read metadata verify failed then will queue error work.
* flush_stashed_error_work will call start_this_handle may trigger
* update_super_work will call start_this_handle may trigger
* BUG_ON.
*/
ext4_unregister_sysfs(sb);
@ -1266,7 +1306,7 @@ static void ext4_put_super(struct super_block *sb)
ext4_unregister_li_request(sb);
ext4_quotas_off(sb, EXT4_MAXQUOTAS);
flush_work(&sbi->s_error_work);
flush_work(&sbi->s_sb_upd_work);
destroy_workqueue(sbi->rsv_conversion_wq);
ext4_release_orphan_info(sb);
@ -1875,6 +1915,7 @@ static const struct mount_opts {
{Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
#endif
{Opt_abort, EXT4_MOUNT2_ABORT, MOPT_SET | MOPT_2},
{Opt_err, 0, 0}
};
@ -1943,8 +1984,6 @@ struct ext4_fs_context {
unsigned int mask_s_mount_opt;
unsigned int vals_s_mount_opt2;
unsigned int mask_s_mount_opt2;
unsigned long vals_s_mount_flags;
unsigned long mask_s_mount_flags;
unsigned int opt_flags; /* MOPT flags */
unsigned int spec;
u32 s_max_batch_time;
@ -2095,12 +2134,6 @@ EXT4_SET_CTX(mount_opt2);
EXT4_CLEAR_CTX(mount_opt2);
EXT4_TEST_CTX(mount_opt2);
static inline void ctx_set_mount_flag(struct ext4_fs_context *ctx, int bit)
{
set_bit(bit, &ctx->mask_s_mount_flags);
set_bit(bit, &ctx->vals_s_mount_flags);
}
static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct ext4_fs_context *ctx = fc->fs_private;
@ -2164,9 +2197,6 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option",
param->key);
return 0;
case Opt_abort:
ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED);
return 0;
case Opt_inlinecrypt:
#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
ctx_set_flags(ctx, SB_INLINECRYPT);
@ -2820,8 +2850,6 @@ static void ext4_apply_options(struct fs_context *fc, struct super_block *sb)
sbi->s_mount_opt |= ctx->vals_s_mount_opt;
sbi->s_mount_opt2 &= ~ctx->mask_s_mount_opt2;
sbi->s_mount_opt2 |= ctx->vals_s_mount_opt2;
sbi->s_mount_flags &= ~ctx->mask_s_mount_flags;
sbi->s_mount_flags |= ctx->vals_s_mount_flags;
sb->s_flags &= ~ctx->mask_s_flags;
sb->s_flags |= ctx->vals_s_flags;
@ -4210,7 +4238,7 @@ int ext4_calculate_overhead(struct super_block *sb)
else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
/* j_inum for internal journal is non-zero */
j_inode = ext4_get_journal_inode(sb, j_inum);
if (j_inode) {
if (!IS_ERR(j_inode)) {
j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
overhead += EXT4_NUM_B2C(sbi, j_blocks);
iput(j_inode);
@ -4948,8 +4976,8 @@ static int ext4_load_and_init_journal(struct super_block *sb,
return 0;
out:
/* flush s_error_work before journal destroy. */
flush_work(&sbi->s_error_work);
/* flush s_sb_upd_work before destroying the journal. */
flush_work(&sbi->s_sb_upd_work);
jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
return -EINVAL;
@ -5272,7 +5300,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
spin_lock_init(&sbi->s_error_lock);
INIT_WORK(&sbi->s_error_work, flush_stashed_error_work);
INIT_WORK(&sbi->s_sb_upd_work, update_super_work);
err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
if (err)
@ -5615,16 +5643,16 @@ failed_mount_wq:
sbi->s_ea_block_cache = NULL;
if (sbi->s_journal) {
/* flush s_error_work before journal destroy. */
flush_work(&sbi->s_error_work);
/* flush s_sb_upd_work before journal destroy. */
flush_work(&sbi->s_sb_upd_work);
jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
}
failed_mount3a:
ext4_es_unregister_shrinker(sbi);
failed_mount3:
/* flush s_error_work before sbi destroy */
flush_work(&sbi->s_error_work);
/* flush s_sb_upd_work before sbi destroy */
flush_work(&sbi->s_sb_upd_work);
del_timer_sync(&sbi->s_err_report);
ext4_stop_mmpd(sbi);
ext4_group_desc_free(sbi);
@ -5751,22 +5779,22 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
if (IS_ERR(journal_inode)) {
ext4_msg(sb, KERN_ERR, "no journal found");
return NULL;
return ERR_CAST(journal_inode);
}
if (!journal_inode->i_nlink) {
make_bad_inode(journal_inode);
iput(journal_inode);
ext4_msg(sb, KERN_ERR, "journal inode is deleted");
return NULL;
return ERR_PTR(-EFSCORRUPTED);
}
if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
ext4_msg(sb, KERN_ERR, "invalid journal inode");
iput(journal_inode);
return ERR_PTR(-EFSCORRUPTED);
}
ext4_debug("Journal inode found at %p: %lld bytes\n",
journal_inode, journal_inode->i_size);
if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
ext4_msg(sb, KERN_ERR, "invalid journal inode");
iput(journal_inode);
return NULL;
}
return journal_inode;
}
@ -5792,24 +5820,21 @@ static int ext4_journal_bmap(journal_t *journal, sector_t *block)
return 0;
}
static journal_t *ext4_get_journal(struct super_block *sb,
unsigned int journal_inum)
static journal_t *ext4_open_inode_journal(struct super_block *sb,
unsigned int journal_inum)
{
struct inode *journal_inode;
journal_t *journal;
if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
return NULL;
journal_inode = ext4_get_journal_inode(sb, journal_inum);
if (!journal_inode)
return NULL;
if (IS_ERR(journal_inode))
return ERR_CAST(journal_inode);
journal = jbd2_journal_init_inode(journal_inode);
if (!journal) {
if (IS_ERR(journal)) {
ext4_msg(sb, KERN_ERR, "Could not load journal inode");
iput(journal_inode);
return NULL;
return ERR_CAST(journal);
}
journal->j_private = sb;
journal->j_bmap = ext4_journal_bmap;
@ -5817,43 +5842,47 @@ static journal_t *ext4_get_journal(struct super_block *sb,
return journal;
}
static journal_t *ext4_get_dev_journal(struct super_block *sb,
dev_t j_dev)
static struct block_device *ext4_get_journal_blkdev(struct super_block *sb,
dev_t j_dev, ext4_fsblk_t *j_start,
ext4_fsblk_t *j_len)
{
struct buffer_head *bh;
journal_t *journal;
ext4_fsblk_t start;
ext4_fsblk_t len;
struct block_device *bdev;
int hblock, blocksize;
ext4_fsblk_t sb_block;
unsigned long offset;
struct ext4_super_block *es;
struct block_device *bdev;
if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
return NULL;
int errno;
/* see get_tree_bdev why this is needed and safe */
up_write(&sb->s_umount);
bdev = ext4_blkdev_get(j_dev, sb);
bdev = blkdev_get_by_dev(j_dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb,
&fs_holder_ops);
down_write(&sb->s_umount);
if (bdev == NULL)
return NULL;
if (IS_ERR(bdev)) {
ext4_msg(sb, KERN_ERR,
"failed to open journal device unknown-block(%u,%u) %ld",
MAJOR(j_dev), MINOR(j_dev), PTR_ERR(bdev));
return ERR_CAST(bdev);
}
blocksize = sb->s_blocksize;
hblock = bdev_logical_block_size(bdev);
if (blocksize < hblock) {
ext4_msg(sb, KERN_ERR,
"blocksize too small for journal device");
errno = -EINVAL;
goto out_bdev;
}
sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
offset = EXT4_MIN_BLOCK_SIZE % blocksize;
set_blocksize(bdev, blocksize);
if (!(bh = __bread(bdev, sb_block, blocksize))) {
bh = __bread(bdev, sb_block, blocksize);
if (!bh) {
ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
"external journal");
errno = -EINVAL;
goto out_bdev;
}
@ -5861,57 +5890,74 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
!(le32_to_cpu(es->s_feature_incompat) &
EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
ext4_msg(sb, KERN_ERR, "external journal has "
"bad superblock");
brelse(bh);
goto out_bdev;
ext4_msg(sb, KERN_ERR, "external journal has bad superblock");
errno = -EFSCORRUPTED;
goto out_bh;
}
if ((le32_to_cpu(es->s_feature_ro_compat) &
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
es->s_checksum != ext4_superblock_csum(sb, es)) {
ext4_msg(sb, KERN_ERR, "external journal has "
"corrupt superblock");
brelse(bh);
goto out_bdev;
ext4_msg(sb, KERN_ERR, "external journal has corrupt superblock");
errno = -EFSCORRUPTED;
goto out_bh;
}
if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
ext4_msg(sb, KERN_ERR, "journal UUID does not match");
brelse(bh);
goto out_bdev;
errno = -EFSCORRUPTED;
goto out_bh;
}
len = ext4_blocks_count(es);
start = sb_block + 1;
brelse(bh); /* we're done with the superblock */
*j_start = sb_block + 1;
*j_len = ext4_blocks_count(es);
brelse(bh);
return bdev;
journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
start, len, blocksize);
if (!journal) {
out_bh:
brelse(bh);
out_bdev:
blkdev_put(bdev, sb);
return ERR_PTR(errno);
}
static journal_t *ext4_open_dev_journal(struct super_block *sb,
dev_t j_dev)
{
journal_t *journal;
ext4_fsblk_t j_start;
ext4_fsblk_t j_len;
struct block_device *journal_bdev;
int errno = 0;
journal_bdev = ext4_get_journal_blkdev(sb, j_dev, &j_start, &j_len);
if (IS_ERR(journal_bdev))
return ERR_CAST(journal_bdev);
journal = jbd2_journal_init_dev(journal_bdev, sb->s_bdev, j_start,
j_len, sb->s_blocksize);
if (IS_ERR(journal)) {
ext4_msg(sb, KERN_ERR, "failed to create device journal");
errno = PTR_ERR(journal);
goto out_bdev;
}
journal->j_private = sb;
if (ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO, true)) {
ext4_msg(sb, KERN_ERR, "I/O error on journal device");
goto out_journal;
}
if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
ext4_msg(sb, KERN_ERR, "External journal has more than one "
"user (unsupported) - %d",
be32_to_cpu(journal->j_superblock->s_nr_users));
errno = -EINVAL;
goto out_journal;
}
EXT4_SB(sb)->s_journal_bdev = bdev;
journal->j_private = sb;
EXT4_SB(sb)->s_journal_bdev = journal_bdev;
ext4_init_journal_params(sb, journal);
return journal;
out_journal:
jbd2_journal_destroy(journal);
out_bdev:
blkdev_put(bdev, sb);
return NULL;
blkdev_put(journal_bdev, sb);
return ERR_PTR(errno);
}
static int ext4_load_journal(struct super_block *sb,
@ -5943,13 +5989,13 @@ static int ext4_load_journal(struct super_block *sb,
}
if (journal_inum) {
journal = ext4_get_journal(sb, journal_inum);
if (!journal)
return -EINVAL;
journal = ext4_open_inode_journal(sb, journal_inum);
if (IS_ERR(journal))
return PTR_ERR(journal);
} else {
journal = ext4_get_dev_journal(sb, journal_dev);
if (!journal)
return -EINVAL;
journal = ext4_open_dev_journal(sb, journal_dev);
if (IS_ERR(journal))
return PTR_ERR(journal);
}
journal_dev_ro = bdev_read_only(journal->j_dev);
@ -6066,7 +6112,7 @@ static void ext4_update_super(struct super_block *sb)
* the clock is set in the future, and this will cause e2fsck
* to complain and force a full file system check.
*/
if (!(sb->s_flags & SB_RDONLY))
if (!sb_rdonly(sb))
ext4_update_tstamp(es, s_wtime);
es->s_kbytes_written =
cpu_to_le64(sbi->s_kbytes_written +
@ -6264,13 +6310,7 @@ static int ext4_clear_journal_err(struct super_block *sb,
*/
int ext4_force_commit(struct super_block *sb)
{
journal_t *journal;
if (sb_rdonly(sb))
return 0;
journal = EXT4_SB(sb)->s_journal;
return ext4_journal_force_commit(journal);
return ext4_journal_force_commit(EXT4_SB(sb)->s_journal);
}
static int ext4_sync_fs(struct super_block *sb, int wait)
@ -6280,7 +6320,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
bool needs_barrier = false;
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (unlikely(ext4_forced_shutdown(sbi)))
if (unlikely(ext4_forced_shutdown(sb)))
return 0;
trace_ext4_sync_fs(sb, wait);
@ -6329,12 +6369,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
static int ext4_freeze(struct super_block *sb)
{
int error = 0;
journal_t *journal;
if (sb_rdonly(sb))
return 0;
journal = EXT4_SB(sb)->s_journal;
journal_t *journal = EXT4_SB(sb)->s_journal;
if (journal) {
/* Now we set up the journal barrier. */
@ -6368,7 +6403,7 @@ out:
*/
static int ext4_unfreeze(struct super_block *sb)
{
if (sb_rdonly(sb) || ext4_forced_shutdown(EXT4_SB(sb)))
if (ext4_forced_shutdown(sb))
return 0;
if (EXT4_SB(sb)->s_journal) {
@ -6484,7 +6519,7 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
goto restore_opts;
}
if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
if (test_opt2(sb, ABORT))
ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
@ -6498,10 +6533,10 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
}
/* Flush outstanding errors before changing fs state */
flush_work(&sbi->s_error_work);
flush_work(&sbi->s_sb_upd_work);
if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) {
if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
if (ext4_forced_shutdown(sb)) {
err = -EROFS;
goto restore_opts;
}
@ -6662,7 +6697,7 @@ restore_opts:
* If there was a failing r/w to ro transition, we may need to
* re-enable quota
*/
if ((sb->s_flags & SB_RDONLY) && !(old_sb_flags & SB_RDONLY) &&
if (sb_rdonly(sb) && !(old_sb_flags & SB_RDONLY) &&
sb_any_quota_suspended(sb))
dquot_resume(sb, -1);
sb->s_flags = old_sb_flags;
@ -7071,6 +7106,13 @@ static int ext4_quota_off(struct super_block *sb, int type)
err = dquot_quota_off(sb, type);
if (err || ext4_has_feature_quota(sb))
goto out_put;
/*
* When the filesystem was remounted read-only first, we cannot cleanup
* inode flags here. Bad luck but people should be using QUOTA feature
* these days anyway.
*/
if (sb_rdonly(sb))
goto out_put;
inode_lock(inode);
/*

View File

@ -701,7 +701,7 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
{
int error;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
if (strlen(name) > 255)

View File

@ -40,18 +40,6 @@ static inline void __buffer_unlink(struct journal_head *jh)
}
}
/*
* Check a checkpoint buffer could be release or not.
*
* Requires j_list_lock
*/
static inline bool __cp_buffer_busy(struct journal_head *jh)
{
struct buffer_head *bh = jh2bh(jh);
return (jh->b_transaction || buffer_locked(bh) || buffer_dirty(bh));
}
/*
* __jbd2_log_wait_for_space: wait until there is space in the journal.
*
@ -349,6 +337,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
/* Checkpoint list management */
enum shrink_type {SHRINK_DESTROY, SHRINK_BUSY_STOP, SHRINK_BUSY_SKIP};
/*
* journal_shrink_one_cp_list
*
@ -360,7 +350,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
* Called with j_list_lock held.
*/
static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
bool destroy, bool *released)
enum shrink_type type,
bool *released)
{
struct journal_head *last_jh;
struct journal_head *next_jh = jh;
@ -376,12 +367,15 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
jh = next_jh;
next_jh = jh->b_cpnext;
if (destroy) {
if (type == SHRINK_DESTROY) {
ret = __jbd2_journal_remove_checkpoint(jh);
} else {
ret = jbd2_journal_try_remove_checkpoint(jh);
if (ret < 0)
continue;
if (ret < 0) {
if (type == SHRINK_BUSY_SKIP)
continue;
break;
}
}
nr_freed++;
@ -445,7 +439,7 @@ again:
tid = transaction->t_tid;
freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list,
false, &released);
SHRINK_BUSY_SKIP, &released);
nr_freed += freed;
(*nr_to_scan) -= min(*nr_to_scan, freed);
if (*nr_to_scan == 0)
@ -485,19 +479,21 @@ out:
void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
{
transaction_t *transaction, *last_transaction, *next_transaction;
enum shrink_type type;
bool released;
transaction = journal->j_checkpoint_transactions;
if (!transaction)
return;
type = destroy ? SHRINK_DESTROY : SHRINK_BUSY_STOP;
last_transaction = transaction->t_cpprev;
next_transaction = transaction;
do {
transaction = next_transaction;
next_transaction = transaction->t_cpnext;
journal_shrink_one_cp_list(transaction->t_checkpoint_list,
destroy, &released);
type, &released);
/*
* This function only frees up some memory if possible so we
* dont have an obligation to finish processing. Bail out if
@ -631,6 +627,8 @@ int jbd2_journal_try_remove_checkpoint(struct journal_head *jh)
{
struct buffer_head *bh = jh2bh(jh);
if (jh->b_transaction)
return -EBUSY;
if (!trylock_buffer(bh))
return -EBUSY;
if (buffer_dirty(bh)) {

View File

@ -115,14 +115,6 @@ void __jbd2_debug(int level, const char *file, const char *func,
#endif
/* Checksumming functions */
static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
{
if (!jbd2_journal_has_csum_v2or3_feature(j))
return 1;
return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
}
static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
{
__u32 csum;
@ -1333,6 +1325,189 @@ static unsigned long jbd2_journal_shrink_count(struct shrinker *shrink,
return count;
}
/*
* If the journal init or create aborts, we need to mark the journal
* superblock as being NULL to prevent the journal destroy from writing
* back a bogus superblock.
*/
static void journal_fail_superblock(journal_t *journal)
{
struct buffer_head *bh = journal->j_sb_buffer;
brelse(bh);
journal->j_sb_buffer = NULL;
}
/*
* Check the superblock for a given journal, performing initial
* validation of the format.
*/
static int journal_check_superblock(journal_t *journal)
{
journal_superblock_t *sb = journal->j_superblock;
int num_fc_blks;
int err = -EINVAL;
if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
printk(KERN_WARNING "JBD2: no valid journal superblock found\n");
return err;
}
if (be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V1 &&
be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V2) {
printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
return err;
}
if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
printk(KERN_WARNING "JBD2: journal file too short\n");
return err;
}
if (be32_to_cpu(sb->s_first) == 0 ||
be32_to_cpu(sb->s_first) >= journal->j_total_len) {
printk(KERN_WARNING
"JBD2: Invalid start block of journal: %u\n",
be32_to_cpu(sb->s_first));
return err;
}
/*
* If this is a V2 superblock, then we have to check the
* features flags on it.
*/
if (!jbd2_format_support_feature(journal))
return 0;
if ((sb->s_feature_ro_compat &
~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
(sb->s_feature_incompat &
~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
printk(KERN_WARNING "JBD2: Unrecognised features on journal\n");
return err;
}
num_fc_blks = jbd2_has_feature_fast_commit(journal) ?
jbd2_journal_get_num_fc_blks(sb) : 0;
if (be32_to_cpu(sb->s_maxlen) < JBD2_MIN_JOURNAL_BLOCKS ||
be32_to_cpu(sb->s_maxlen) - JBD2_MIN_JOURNAL_BLOCKS < num_fc_blks) {
printk(KERN_ERR "JBD2: journal file too short %u,%d\n",
be32_to_cpu(sb->s_maxlen), num_fc_blks);
return err;
}
if (jbd2_has_feature_csum2(journal) &&
jbd2_has_feature_csum3(journal)) {
/* Can't have checksum v2 and v3 at the same time! */
printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
"at the same time!\n");
return err;
}
if (jbd2_journal_has_csum_v2or3_feature(journal) &&
jbd2_has_feature_checksum(journal)) {
/* Can't have checksum v1 and v2 on at the same time! */
printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
"at the same time!\n");
return err;
}
/* Load the checksum driver */
if (jbd2_journal_has_csum_v2or3_feature(journal)) {
if (sb->s_checksum_type != JBD2_CRC32C_CHKSUM) {
printk(KERN_ERR "JBD2: Unknown checksum type\n");
return err;
}
journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
if (IS_ERR(journal->j_chksum_driver)) {
printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
err = PTR_ERR(journal->j_chksum_driver);
journal->j_chksum_driver = NULL;
return err;
}
/* Check superblock checksum */
if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
printk(KERN_ERR "JBD2: journal checksum error\n");
err = -EFSBADCRC;
return err;
}
}
return 0;
}
static int journal_revoke_records_per_block(journal_t *journal)
{
int record_size;
int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t);
if (jbd2_has_feature_64bit(journal))
record_size = 8;
else
record_size = 4;
if (jbd2_journal_has_csum_v2or3(journal))
space -= sizeof(struct jbd2_journal_block_tail);
return space / record_size;
}
/*
* Load the on-disk journal superblock and read the key fields into the
* journal_t.
*/
static int journal_load_superblock(journal_t *journal)
{
int err;
struct buffer_head *bh;
journal_superblock_t *sb;
bh = getblk_unmovable(journal->j_dev, journal->j_blk_offset,
journal->j_blocksize);
if (bh)
err = bh_read(bh, 0);
if (!bh || err < 0) {
pr_err("%s: Cannot read journal superblock\n", __func__);
brelse(bh);
return -EIO;
}
journal->j_sb_buffer = bh;
sb = (journal_superblock_t *)bh->b_data;
journal->j_superblock = sb;
err = journal_check_superblock(journal);
if (err) {
journal_fail_superblock(journal);
return err;
}
journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
journal->j_tail = be32_to_cpu(sb->s_start);
journal->j_first = be32_to_cpu(sb->s_first);
journal->j_errno = be32_to_cpu(sb->s_errno);
journal->j_last = be32_to_cpu(sb->s_maxlen);
if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
journal->j_total_len = be32_to_cpu(sb->s_maxlen);
/* Precompute checksum seed for all metadata */
if (jbd2_journal_has_csum_v2or3(journal))
journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
sizeof(sb->s_uuid));
journal->j_revoke_records_per_block =
journal_revoke_records_per_block(journal);
if (jbd2_has_feature_fast_commit(journal)) {
journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
journal->j_last = journal->j_fc_last -
jbd2_journal_get_num_fc_blks(sb);
journal->j_fc_first = journal->j_last + 1;
journal->j_fc_off = 0;
}
return 0;
}
/*
* Management for journal control blocks: functions to create and
* destroy journal_t structures, and to initialise and read existing
@ -1349,12 +1524,21 @@ static journal_t *journal_init_common(struct block_device *bdev,
static struct lock_class_key jbd2_trans_commit_key;
journal_t *journal;
int err;
struct buffer_head *bh;
int n;
journal = kzalloc(sizeof(*journal), GFP_KERNEL);
if (!journal)
return NULL;
return ERR_PTR(-ENOMEM);
journal->j_blocksize = blocksize;
journal->j_dev = bdev;
journal->j_fs_dev = fs_dev;
journal->j_blk_offset = start;
journal->j_total_len = len;
err = journal_load_superblock(journal);
if (err)
goto err_cleanup;
init_waitqueue_head(&journal->j_wait_transaction_locked);
init_waitqueue_head(&journal->j_wait_done_commit);
@ -1367,12 +1551,15 @@ static journal_t *journal_init_common(struct block_device *bdev,
mutex_init(&journal->j_checkpoint_mutex);
spin_lock_init(&journal->j_revoke_lock);
spin_lock_init(&journal->j_list_lock);
spin_lock_init(&journal->j_history_lock);
rwlock_init(&journal->j_state_lock);
journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
journal->j_min_batch_time = 0;
journal->j_max_batch_time = 15000; /* 15ms */
atomic_set(&journal->j_reserved_credits, 0);
lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle",
&jbd2_trans_commit_key, 0);
/* The journal is marked for error until we succeed with recovery! */
journal->j_flags = JBD2_ABORT;
@ -1382,18 +1569,11 @@ static journal_t *journal_init_common(struct block_device *bdev,
if (err)
goto err_cleanup;
spin_lock_init(&journal->j_history_lock);
lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle",
&jbd2_trans_commit_key, 0);
/* journal descriptor can store up to n blocks -bzzz */
journal->j_blocksize = blocksize;
journal->j_dev = bdev;
journal->j_fs_dev = fs_dev;
journal->j_blk_offset = start;
journal->j_total_len = len;
/* We need enough buffers to write out full descriptor block. */
/*
* journal descriptor can store up to n blocks, we need enough
* buffers to write out full descriptor block.
*/
err = -ENOMEM;
n = journal->j_blocksize / jbd2_min_tag_size();
journal->j_wbufsize = n;
journal->j_fc_wbuf = NULL;
@ -1402,37 +1582,30 @@ static journal_t *journal_init_common(struct block_device *bdev,
if (!journal->j_wbuf)
goto err_cleanup;
bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
if (!bh) {
pr_err("%s: Cannot get buffer for journal superblock\n",
__func__);
err = percpu_counter_init(&journal->j_checkpoint_jh_count, 0,
GFP_KERNEL);
if (err)
goto err_cleanup;
}
journal->j_sb_buffer = bh;
journal->j_superblock = (journal_superblock_t *)bh->b_data;
journal->j_shrink_transaction = NULL;
journal->j_shrinker.scan_objects = jbd2_journal_shrink_scan;
journal->j_shrinker.count_objects = jbd2_journal_shrink_count;
journal->j_shrinker.seeks = DEFAULT_SEEKS;
journal->j_shrinker.batch = journal->j_max_transaction_buffers;
if (percpu_counter_init(&journal->j_checkpoint_jh_count, 0, GFP_KERNEL))
err = register_shrinker(&journal->j_shrinker, "jbd2-journal:(%u:%u)",
MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
if (err)
goto err_cleanup;
if (register_shrinker(&journal->j_shrinker, "jbd2-journal:(%u:%u)",
MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev))) {
percpu_counter_destroy(&journal->j_checkpoint_jh_count);
goto err_cleanup;
}
return journal;
err_cleanup:
brelse(journal->j_sb_buffer);
percpu_counter_destroy(&journal->j_checkpoint_jh_count);
kfree(journal->j_wbuf);
jbd2_journal_destroy_revoke(journal);
journal_fail_superblock(journal);
kfree(journal);
return NULL;
return ERR_PTR(err);
}
/* jbd2_journal_init_dev and jbd2_journal_init_inode:
@ -1465,8 +1638,8 @@ journal_t *jbd2_journal_init_dev(struct block_device *bdev,
journal_t *journal;
journal = journal_init_common(bdev, fs_dev, start, len, blocksize);
if (!journal)
return NULL;
if (IS_ERR(journal))
return ERR_CAST(journal);
snprintf(journal->j_devname, sizeof(journal->j_devname),
"%pg", journal->j_dev);
@ -1492,11 +1665,9 @@ journal_t *jbd2_journal_init_inode(struct inode *inode)
blocknr = 0;
err = bmap(inode, &blocknr);
if (err || !blocknr) {
pr_err("%s: Cannot locate journal superblock\n",
__func__);
return NULL;
pr_err("%s: Cannot locate journal superblock\n", __func__);
return err ? ERR_PTR(err) : ERR_PTR(-EINVAL);
}
jbd2_debug(1, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n",
@ -1506,8 +1677,8 @@ journal_t *jbd2_journal_init_inode(struct inode *inode)
journal = journal_init_common(inode->i_sb->s_bdev, inode->i_sb->s_bdev,
blocknr, inode->i_size >> inode->i_sb->s_blocksize_bits,
inode->i_sb->s_blocksize);
if (!journal)
return NULL;
if (IS_ERR(journal))
return ERR_CAST(journal);
journal->j_inode = inode;
snprintf(journal->j_devname, sizeof(journal->j_devname),
@ -1518,18 +1689,6 @@ journal_t *jbd2_journal_init_inode(struct inode *inode)
return journal;
}
/*
* If the journal init or create aborts, we need to mark the journal
* superblock as being NULL to prevent the journal destroy from writing
* back a bogus superblock.
*/
static void journal_fail_superblock(journal_t *journal)
{
struct buffer_head *bh = journal->j_sb_buffer;
brelse(bh);
journal->j_sb_buffer = NULL;
}
/*
* Given a journal_t structure, initialise the various fields for
* startup of a new journaling session. We use this both when creating
@ -1886,163 +2045,6 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
}
EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
static int journal_revoke_records_per_block(journal_t *journal)
{
int record_size;
int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t);
if (jbd2_has_feature_64bit(journal))
record_size = 8;
else
record_size = 4;
if (jbd2_journal_has_csum_v2or3(journal))
space -= sizeof(struct jbd2_journal_block_tail);
return space / record_size;
}
/*
* Read the superblock for a given journal, performing initial
* validation of the format.
*/
static int journal_get_superblock(journal_t *journal)
{
struct buffer_head *bh;
journal_superblock_t *sb;
int err;
bh = journal->j_sb_buffer;
J_ASSERT(bh != NULL);
if (buffer_verified(bh))
return 0;
err = bh_read(bh, 0);
if (err < 0) {
printk(KERN_ERR
"JBD2: IO error reading journal superblock\n");
goto out;
}
sb = journal->j_superblock;
err = -EINVAL;
if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
printk(KERN_WARNING "JBD2: no valid journal superblock found\n");
goto out;
}
if (be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V1 &&
be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V2) {
printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
goto out;
}
if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
printk(KERN_WARNING "JBD2: journal file too short\n");
goto out;
}
if (be32_to_cpu(sb->s_first) == 0 ||
be32_to_cpu(sb->s_first) >= journal->j_total_len) {
printk(KERN_WARNING
"JBD2: Invalid start block of journal: %u\n",
be32_to_cpu(sb->s_first));
goto out;
}
if (jbd2_has_feature_csum2(journal) &&
jbd2_has_feature_csum3(journal)) {
/* Can't have checksum v2 and v3 at the same time! */
printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
"at the same time!\n");
goto out;
}
if (jbd2_journal_has_csum_v2or3_feature(journal) &&
jbd2_has_feature_checksum(journal)) {
/* Can't have checksum v1 and v2 on at the same time! */
printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
"at the same time!\n");
goto out;
}
if (!jbd2_verify_csum_type(journal, sb)) {
printk(KERN_ERR "JBD2: Unknown checksum type\n");
goto out;
}
/* Load the checksum driver */
if (jbd2_journal_has_csum_v2or3_feature(journal)) {
journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
if (IS_ERR(journal->j_chksum_driver)) {
printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
err = PTR_ERR(journal->j_chksum_driver);
journal->j_chksum_driver = NULL;
goto out;
}
/* Check superblock checksum */
if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
printk(KERN_ERR "JBD2: journal checksum error\n");
err = -EFSBADCRC;
goto out;
}
}
set_buffer_verified(bh);
return 0;
out:
journal_fail_superblock(journal);
return err;
}
/*
* Load the on-disk journal superblock and read the key fields into the
* journal_t.
*/
static int load_superblock(journal_t *journal)
{
int err;
journal_superblock_t *sb;
int num_fc_blocks;
err = journal_get_superblock(journal);
if (err)
return err;
sb = journal->j_superblock;
journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
journal->j_tail = be32_to_cpu(sb->s_start);
journal->j_first = be32_to_cpu(sb->s_first);
journal->j_errno = be32_to_cpu(sb->s_errno);
journal->j_last = be32_to_cpu(sb->s_maxlen);
if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
journal->j_total_len = be32_to_cpu(sb->s_maxlen);
/* Precompute checksum seed for all metadata */
if (jbd2_journal_has_csum_v2or3(journal))
journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
sizeof(sb->s_uuid));
journal->j_revoke_records_per_block =
journal_revoke_records_per_block(journal);
if (jbd2_has_feature_fast_commit(journal)) {
journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
num_fc_blocks = jbd2_journal_get_num_fc_blks(sb);
if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS)
journal->j_last = journal->j_fc_last - num_fc_blocks;
journal->j_fc_first = journal->j_last + 1;
journal->j_fc_off = 0;
}
return 0;
}
/**
* jbd2_journal_load() - Read journal from disk.
* @journal: Journal to act on.
@ -2054,28 +2056,7 @@ static int load_superblock(journal_t *journal)
int jbd2_journal_load(journal_t *journal)
{
int err;
journal_superblock_t *sb;
err = load_superblock(journal);
if (err)
return err;
sb = journal->j_superblock;
/*
* If this is a V2 superblock, then we have to check the
* features flags on it.
*/
if (jbd2_format_support_feature(journal)) {
if ((sb->s_feature_ro_compat &
~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
(sb->s_feature_incompat &
~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
printk(KERN_WARNING
"JBD2: Unrecognised features on journal\n");
return -EINVAL;
}
}
journal_superblock_t *sb = journal->j_superblock;
/*
* Create a slab for this blocksize
@ -2086,8 +2067,11 @@ int jbd2_journal_load(journal_t *journal)
/* Let the recovery code check whether it needs to recover any
* data from the journal. */
if (jbd2_journal_recover(journal))
goto recovery_error;
err = jbd2_journal_recover(journal);
if (err) {
pr_warn("JBD2: journal recovery failed\n");
return err;
}
if (journal->j_failed_commit) {
printk(KERN_ERR "JBD2: journal transaction %u on %s "
@ -2104,15 +2088,14 @@ int jbd2_journal_load(journal_t *journal)
/* OK, we've finished with the dynamic journal bits:
* reinitialise the dynamic contents of the superblock in memory
* and reset them on disk. */
if (journal_reset(journal))
goto recovery_error;
err = journal_reset(journal);
if (err) {
pr_warn("JBD2: journal reset failed\n");
return err;
}
journal->j_flags |= JBD2_LOADED;
return 0;
recovery_error:
printk(KERN_WARNING "JBD2: recovery failed\n");
return -EIO;
}
/**
@ -2224,8 +2207,6 @@ int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat,
if (!compat && !ro && !incompat)
return 1;
if (journal_get_superblock(journal))
return 0;
if (!jbd2_format_support_feature(journal))
return 0;
@ -2515,16 +2496,12 @@ out:
int jbd2_journal_wipe(journal_t *journal, int write)
{
int err = 0;
int err;
J_ASSERT (!(journal->j_flags & JBD2_LOADED));
err = load_superblock(journal);
if (err)
return err;
if (!journal->j_tail)
goto no_recovery;
return 0;
printk(KERN_WARNING "JBD2: %s recovery information on journal\n",
write ? "Clearing" : "Ignoring");
@ -2537,7 +2514,6 @@ int jbd2_journal_wipe(journal_t *journal, int write)
mutex_unlock(&journal->j_checkpoint_mutex);
}
no_recovery:
return err;
}

View File

@ -230,12 +230,8 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
/* Make sure we wrap around the log correctly! */
#define wrap(journal, var) \
do { \
unsigned long _wrap_last = \
jbd2_has_feature_fast_commit(journal) ? \
(journal)->j_fc_last : (journal)->j_last; \
\
if (var >= _wrap_last) \
var -= (_wrap_last - (journal)->j_first); \
if (var >= (journal)->j_last) \
var -= ((journal)->j_last - (journal)->j_first); \
} while (0)
static int fc_do_one_pass(journal_t *journal,
@ -524,9 +520,7 @@ static int do_one_pass(journal_t *journal,
break;
jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
next_commit_ID, next_log_block,
jbd2_has_feature_fast_commit(journal) ?
journal->j_fc_last : journal->j_last);
next_commit_ID, next_log_block, journal->j_last);
/* Skip over each chunk of the transaction looking
* either the next descriptor block or the final commit

View File

@ -1648,16 +1648,6 @@ bool is_empty_dir_inode(struct inode *inode)
}
#if IS_ENABLED(CONFIG_UNICODE)
/*
* Determine if the name of a dentry should be casefolded.
*
* Return: if names will need casefolding
*/
static bool needs_casefold(const struct inode *dir)
{
return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding;
}
/**
* generic_ci_d_compare - generic d_compare implementation for casefolding filesystems
* @dentry: dentry whose name we are checking against
@ -1678,7 +1668,7 @@ static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
char strbuf[DNAME_INLINE_LEN];
int ret;
if (!dir || !needs_casefold(dir))
if (!dir || !IS_CASEFOLDED(dir))
goto fallback;
/*
* If the dentry name is stored in-line, then it may be concurrently
@ -1720,7 +1710,7 @@ static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
const struct unicode_map *um = sb->s_encoding;
int ret = 0;
if (!dir || !needs_casefold(dir))
if (!dir || !IS_CASEFOLDED(dir))
return 0;
ret = utf8_casefold_hash(um, dentry, str);

View File

@ -908,9 +908,9 @@ int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
/* call the kernels journal init function now */
j_journal = jbd2_journal_init_inode(inode);
if (j_journal == NULL) {
if (IS_ERR(j_journal)) {
mlog(ML_ERROR, "Linux journal layer error\n");
status = -EINVAL;
status = PTR_ERR(j_journal);
goto done;
}
@ -1684,9 +1684,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
}
journal = jbd2_journal_init_inode(inode);
if (journal == NULL) {
if (IS_ERR(journal)) {
mlog(ML_ERROR, "Linux journal layer error\n");
status = -EIO;
status = PTR_ERR(journal);
goto done;
}

View File

@ -630,11 +630,6 @@ struct transaction_s
*/
struct list_head t_inode_list;
/*
* Protects info related to handles
*/
spinlock_t t_handle_lock;
/*
* Longest time some handle had to wait for running transaction
*/