Ext4 patches for the 6.10-rc1 merge window:

- more folio conversion patches
  - add support for FS_IOC_GETFSSYSFSPATH
  - mballoc cleaups and add more kunit tests
  - sysfs cleanups and bug fixes
  - miscellaneous bug fixes and cleanups
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEK2m5VNv+CHkogTfJ8vlZVpUNgaMFAmZIMBAACgkQ8vlZVpUN
 gaNvhQf9GAdBxpCLc3fc9mW8oP+okAqQ2etpz7Up5PRjX62P8o89QOXBUHSAZxat
 qOpKu5NaUBdz5mfdg/ptbCRdbsLxQTY670nSYhmseOCHZR/cw4jX1f+FUMj0VoFm
 tu/TR285W6A+i7zb1xOgyUsqN8jbQdm4ASmhVjV67oTLs+A6I8loL0wotlAl+K0U
 g8twZbnNfUaB0jrNyhEzr59bTFUgFMjt8Jv9aH3Oi4rjXGzmS5/xqPCK5Lhl+nCW
 gxIfRphwKlw9+c9osLYRrtRFrexFsQMCGmz2z9F4m7SplHI3A/SVaKSHaFeW/jQ0
 gXP/S91zale6tSeu14gZLY2JqwvI0g==
 =XA7v
 -----END PGP SIGNATURE-----

Merge tag 'ext4_for_linus-6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:

 - more folio conversion patches

 - add support for FS_IOC_GETFSSYSFSPATH

 - mballoc cleaups and add more kunit tests

 - sysfs cleanups and bug fixes

 - miscellaneous bug fixes and cleanups

* tag 'ext4_for_linus-6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (40 commits)
  ext4: fix error pointer dereference in ext4_mb_load_buddy_gfp()
  jbd2: add prefix 'jbd2' for 'shrink_type'
  jbd2: use shrink_type type instead of bool type for __jbd2_journal_clean_checkpoint_list()
  ext4: fix uninitialized ratelimit_state->lock access in __ext4_fill_super()
  ext4: remove calls to to set/clear the folio error flag
  ext4: propagate errors from ext4_sb_bread() in ext4_xattr_block_cache_find()
  ext4: fix mb_cache_entry's e_refcnt leak in ext4_xattr_block_cache_find()
  jbd2: remove redundant assignement to variable err
  ext4: remove the redundant folio_wait_stable()
  ext4: fix potential unnitialized variable
  ext4: convert ac_buddy_page to ac_buddy_folio
  ext4: convert ac_bitmap_page to ac_bitmap_folio
  ext4: convert ext4_mb_init_cache() to take a folio
  ext4: convert bd_buddy_page to bd_buddy_folio
  ext4: convert bd_bitmap_page to bd_bitmap_folio
  ext4: open coding repeated check in next_linear_group
  ext4: use correct criteria name instead stale integer number in comment
  ext4: call ext4_mb_mark_free_simple to free continuous bits in found chunk
  ext4: add test_mb_mark_used_cost to estimate cost of mb_mark_used
  ext4: keep "prefetch_grp" and "nr" consistent
  ...
This commit is contained in:
Linus Torvalds 2024-05-18 14:11:54 -07:00
commit 7991c92f4c
19 changed files with 485 additions and 367 deletions

View File

@ -68,11 +68,6 @@ extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
static inline int
ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
{
/* usually, the umask is applied by posix_acl_create(), but if
ext4 ACL support is disabled at compile time, we need to do
it here, because posix_acl_create() will never be called */
inode->i_mode &= ~current_umask();
return 0;
}
#endif /* CONFIG_EXT4_FS_POSIX_ACL */

View File

@ -213,11 +213,14 @@ enum criteria {
#define EXT4_MB_USE_RESERVED 0x2000
/* Do strict check for free blocks while retrying block allocation */
#define EXT4_MB_STRICT_CHECK 0x4000
/* Large fragment size list lookup succeeded at least once for cr = 0 */
/* Large fragment size list lookup succeeded at least once for
* CR_POWER2_ALIGNED */
#define EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED 0x8000
/* Avg fragment size rb tree lookup succeeded at least once for cr = 1 */
/* Avg fragment size rb tree lookup succeeded at least once for
* CR_GOAL_LEN_FAST */
#define EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED 0x00010000
/* Avg fragment size rb tree lookup succeeded at least once for cr = 1.5 */
/* Avg fragment size rb tree lookup succeeded at least once for
* CR_BEST_AVAIL_LEN */
#define EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED 0x00020000
struct ext4_allocation_request {

View File

@ -3402,9 +3402,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
struct ext4_extent *ex, *abut_ex;
ext4_lblk_t ee_block, eof_block;
unsigned int ee_len, depth, map_len = map->m_len;
int allocated = 0, max_zeroout = 0;
int err = 0;
int split_flag = EXT4_EXT_DATA_VALID2;
int allocated = 0;
unsigned int max_zeroout = 0;
ext_debug(inode, "logical block %llu, max_blocks %u\n",
(unsigned long long)map->m_lblk, map_len);

View File

@ -844,8 +844,7 @@ static int ext4_sample_last_mounted(struct super_block *sb,
if (err)
goto out_journal;
lock_buffer(sbi->s_sbh);
strncpy(sbi->s_es->s_last_mounted, cp,
sizeof(sbi->s_es->s_last_mounted));
strtomem_pad(sbi->s_es->s_last_mounted, cp, 0);
ext4_superblock_csum_set(sb);
unlock_buffer(sbi->s_sbh);
ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
@ -885,7 +884,7 @@ static int ext4_file_open(struct inode *inode, struct file *filp)
return ret;
}
filp->f_mode |= FMODE_NOWAIT;
filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
return dquot_file_open(inode, filp);
}

View File

@ -1865,7 +1865,7 @@ static int mpage_submit_folio(struct mpage_da_data *mpd, struct folio *folio)
len = folio_size(folio);
if (folio_pos(folio) + len > size &&
!ext4_verity_in_progress(mpd->inode))
len = size & ~PAGE_MASK;
len = size & (len - 1);
err = ext4_bio_write_folio(&mpd->io_submit, folio, len);
if (!err)
mpd->wbc->nr_to_write--;
@ -2334,7 +2334,7 @@ static int mpage_journal_page_buffers(handle_t *handle,
if (folio_pos(folio) + len > size &&
!ext4_verity_in_progress(inode))
len = size - folio_pos(folio);
len = size & (len - 1);
return ext4_journal_folio_buffers(handle, folio, len);
}
@ -2887,9 +2887,6 @@ retry:
if (IS_ERR(folio))
return PTR_ERR(folio);
/* In case writeback began while the folio was unlocked */
folio_wait_stable(folio);
#ifdef CONFIG_FS_ENCRYPTION
ret = ext4_block_write_begin(folio, pos, len, ext4_da_get_block_prep);
#else
@ -3530,7 +3527,6 @@ static const struct address_space_operations ext4_aops = {
.bmap = ext4_bmap,
.invalidate_folio = ext4_invalidate_folio,
.release_folio = ext4_release_folio,
.direct_IO = noop_direct_IO,
.migrate_folio = buffer_migrate_folio,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_folio = generic_error_remove_folio,
@ -3547,7 +3543,6 @@ static const struct address_space_operations ext4_journalled_aops = {
.bmap = ext4_bmap,
.invalidate_folio = ext4_journalled_invalidate_folio,
.release_folio = ext4_release_folio,
.direct_IO = noop_direct_IO,
.migrate_folio = buffer_migrate_folio_norefs,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_folio = generic_error_remove_folio,
@ -3564,7 +3559,6 @@ static const struct address_space_operations ext4_da_aops = {
.bmap = ext4_bmap,
.invalidate_folio = ext4_invalidate_folio,
.release_folio = ext4_release_folio,
.direct_IO = noop_direct_IO,
.migrate_folio = buffer_migrate_folio,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_folio = generic_error_remove_folio,
@ -3573,7 +3567,6 @@ static const struct address_space_operations ext4_da_aops = {
static const struct address_space_operations ext4_dax_aops = {
.writepages = ext4_dax_writepages,
.direct_IO = noop_direct_IO,
.dirty_folio = noop_dirty_folio,
.bmap = ext4_bmap,
.swap_activate = ext4_iomap_swap_activate,

View File

@ -1150,9 +1150,8 @@ static int ext4_ioctl_getlabel(struct ext4_sb_info *sbi, char __user *user_label
*/
BUILD_BUG_ON(EXT4_LABEL_MAX >= FSLABEL_MAX);
memset(label, 0, sizeof(label));
lock_buffer(sbi->s_sbh);
strncpy(label, sbi->s_es->s_volume_name, EXT4_LABEL_MAX);
strscpy_pad(label, sbi->s_es->s_volume_name);
unlock_buffer(sbi->s_sbh);
if (copy_to_user(user_label, label, sizeof(label)))

View File

@ -30,7 +30,31 @@ struct mbt_ext4_super_block {
#define MBT_CTX(_sb) (&MBT_SB(_sb)->mbt_ctx)
#define MBT_GRP_CTX(_sb, _group) (&MBT_CTX(_sb)->grp_ctx[_group])
static struct inode *mbt_alloc_inode(struct super_block *sb)
{
struct ext4_inode_info *ei;
ei = kmalloc(sizeof(struct ext4_inode_info), GFP_KERNEL);
if (!ei)
return NULL;
INIT_LIST_HEAD(&ei->i_orphan);
init_rwsem(&ei->xattr_sem);
init_rwsem(&ei->i_data_sem);
inode_init_once(&ei->vfs_inode);
ext4_fc_init_inode(&ei->vfs_inode);
return &ei->vfs_inode;
}
static void mbt_free_inode(struct inode *inode)
{
kfree(EXT4_I(inode));
}
static const struct super_operations mbt_sops = {
.alloc_inode = mbt_alloc_inode,
.free_inode = mbt_free_inode,
};
static void mbt_kill_sb(struct super_block *sb)
@ -859,6 +883,56 @@ static void test_mb_free_blocks(struct kunit *test)
ext4_mb_unload_buddy(&e4b);
}
#define COUNT_FOR_ESTIMATE 100000
static void test_mb_mark_used_cost(struct kunit *test)
{
struct ext4_buddy e4b;
struct super_block *sb = (struct super_block *)test->priv;
struct ext4_free_extent ex;
int ret;
struct test_range ranges[TEST_RANGE_COUNT];
int i, j;
unsigned long start, end, all = 0;
/* buddy cache assumes that each page contains at least one block */
if (sb->s_blocksize > PAGE_SIZE)
kunit_skip(test, "blocksize exceeds pagesize");
ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b);
KUNIT_ASSERT_EQ(test, ret, 0);
ex.fe_group = TEST_GOAL_GROUP;
for (j = 0; j < COUNT_FOR_ESTIMATE; j++) {
mbt_generate_test_ranges(sb, ranges, TEST_RANGE_COUNT);
start = jiffies;
for (i = 0; i < TEST_RANGE_COUNT; i++) {
if (ranges[i].len == 0)
continue;
ex.fe_start = ranges[i].start;
ex.fe_len = ranges[i].len;
ext4_lock_group(sb, TEST_GOAL_GROUP);
mb_mark_used(&e4b, &ex);
ext4_unlock_group(sb, TEST_GOAL_GROUP);
}
end = jiffies;
all += (end - start);
for (i = 0; i < TEST_RANGE_COUNT; i++) {
if (ranges[i].len == 0)
continue;
ext4_lock_group(sb, TEST_GOAL_GROUP);
mb_free_blocks(NULL, &e4b, ranges[i].start,
ranges[i].len);
ext4_unlock_group(sb, TEST_GOAL_GROUP);
}
}
kunit_info(test, "costed jiffies %lu\n", all);
ext4_mb_unload_buddy(&e4b);
}
static const struct mbt_ext4_block_layout mbt_test_layouts[] = {
{
.blocksize_bits = 10,
@ -901,6 +975,8 @@ static struct kunit_case mbt_test_cases[] = {
KUNIT_CASE_PARAM(test_mb_mark_used, mbt_layouts_gen_params),
KUNIT_CASE_PARAM(test_mb_free_blocks, mbt_layouts_gen_params),
KUNIT_CASE_PARAM(test_mark_diskspace_used, mbt_layouts_gen_params),
KUNIT_CASE_PARAM_ATTR(test_mb_mark_used_cost, mbt_layouts_gen_params,
{ .speed = KUNIT_SPEED_SLOW }),
{}
};

View File

@ -831,6 +831,8 @@ static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len)
return 0;
if (order == MB_NUM_ORDERS(sb))
order--;
if (WARN_ON_ONCE(order > MB_NUM_ORDERS(sb)))
order = MB_NUM_ORDERS(sb) - 1;
return order;
}
@ -1008,6 +1010,8 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
* goal length.
*/
order = fls(ac->ac_g_ex.fe_len) - 1;
if (WARN_ON_ONCE(order - 1 > MB_NUM_ORDERS(ac->ac_sb)))
order = MB_NUM_ORDERS(ac->ac_sb);
min_order = order - sbi->s_mb_best_avail_max_trim_order;
if (min_order < 0)
min_order = 0;
@ -1076,23 +1080,11 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac)
}
/*
* Return next linear group for allocation. If linear traversal should not be
* performed, this function just returns the same group
* Return next linear group for allocation.
*/
static ext4_group_t
next_linear_group(struct ext4_allocation_context *ac, ext4_group_t group,
ext4_group_t ngroups)
next_linear_group(ext4_group_t group, ext4_group_t ngroups)
{
if (!should_optimize_scan(ac))
goto inc_and_return;
if (ac->ac_groups_linear_remaining) {
ac->ac_groups_linear_remaining--;
goto inc_and_return;
}
return group;
inc_and_return:
/*
* Artificially restricted ngroups for non-extent
* files makes group > ngroups possible on first loop.
@ -1118,8 +1110,19 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
{
*new_cr = ac->ac_criteria;
if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining) {
*group = next_linear_group(ac, *group, ngroups);
if (!should_optimize_scan(ac)) {
*group = next_linear_group(*group, ngroups);
return;
}
/*
* Optimized scanning can return non adjacent groups which can cause
* seek overhead for rotational disks. So try few linear groups before
* trying optimized scan.
*/
if (ac->ac_groups_linear_remaining) {
*group = next_linear_group(*group, ngroups);
ac->ac_groups_linear_remaining--;
return;
}
@ -1131,8 +1134,9 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
ext4_mb_choose_next_group_best_avail(ac, new_cr, group);
} else {
/*
* TODO: For CR=2, we can arrange groups in an rb tree sorted by
* bb_free. But until that happens, we should never come here.
* TODO: For CR_GOAL_LEN_SLOW, we can arrange groups in an
* rb tree sorted by bb_free. But until that happens, we should
* never come here.
*/
WARN_ON(1);
}
@ -1270,7 +1274,7 @@ static void mb_regenerate_buddy(struct ext4_buddy *e4b)
* for this page; do not hold this lock when calling this routine!
*/
static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
static int ext4_mb_init_cache(struct folio *folio, char *incore, gfp_t gfp)
{
ext4_group_t ngroups;
unsigned int blocksize;
@ -1288,13 +1292,13 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
char *bitmap;
struct ext4_group_info *grinfo;
inode = page->mapping->host;
inode = folio->mapping->host;
sb = inode->i_sb;
ngroups = ext4_get_groups_count(sb);
blocksize = i_blocksize(inode);
blocks_per_page = PAGE_SIZE / blocksize;
mb_debug(sb, "init page %lu\n", page->index);
mb_debug(sb, "init folio %lu\n", folio->index);
groups_per_page = blocks_per_page >> 1;
if (groups_per_page == 0)
@ -1309,9 +1313,9 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
} else
bh = &bhs;
first_group = page->index * blocks_per_page / 2;
first_group = folio->index * blocks_per_page / 2;
/* read all groups the page covers into the cache */
/* read all groups the folio covers into the cache */
for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
if (group >= ngroups)
break;
@ -1322,10 +1326,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
/*
* If page is uptodate then we came here after online resize
* which added some new uninitialized group info structs, so
* we must skip all initialized uptodate buddies on the page,
* we must skip all initialized uptodate buddies on the folio,
* which may be currently in use by an allocating task.
*/
if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
if (folio_test_uptodate(folio) &&
!EXT4_MB_GRP_NEED_INIT(grinfo)) {
bh[i] = NULL;
continue;
}
@ -1349,7 +1354,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
err = err2;
}
first_block = page->index * blocks_per_page;
first_block = folio->index * blocks_per_page;
for (i = 0; i < blocks_per_page; i++) {
group = (first_block + i) >> 1;
if (group >= ngroups)
@ -1370,7 +1375,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
* above
*
*/
data = page_address(page) + (i * blocksize);
data = folio_address(folio) + (i * blocksize);
bitmap = bh[group - first_group]->b_data;
/*
@ -1385,8 +1390,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
if ((first_block + i) & 1) {
/* this is block of buddy */
BUG_ON(incore == NULL);
mb_debug(sb, "put buddy for group %u in page %lu/%x\n",
group, page->index, i * blocksize);
mb_debug(sb, "put buddy for group %u in folio %lu/%x\n",
group, folio->index, i * blocksize);
trace_ext4_mb_buddy_bitmap_load(sb, group);
grinfo->bb_fragments = 0;
memset(grinfo->bb_counters, 0,
@ -1404,8 +1409,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
} else {
/* this is block of bitmap */
BUG_ON(incore != NULL);
mb_debug(sb, "put bitmap for group %u in page %lu/%x\n",
group, page->index, i * blocksize);
mb_debug(sb, "put bitmap for group %u in folio %lu/%x\n",
group, folio->index, i * blocksize);
trace_ext4_mb_bitmap_load(sb, group);
/* see comments in ext4_mb_put_pa() */
@ -1423,7 +1428,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
incore = data;
}
}
SetPageUptodate(page);
folio_mark_uptodate(folio);
out:
if (bh) {
@ -1439,7 +1444,7 @@ out:
* Lock the buddy and bitmap pages. This make sure other parallel init_group
* on the same buddy page doesn't happen whild holding the buddy page lock.
* Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap
* are on the same page e4b->bd_buddy_page is NULL and return value is 0.
* are on the same page e4b->bd_buddy_folio is NULL and return value is 0.
*/
static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
@ -1447,10 +1452,10 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
int block, pnum, poff;
int blocks_per_page;
struct page *page;
struct folio *folio;
e4b->bd_buddy_page = NULL;
e4b->bd_bitmap_page = NULL;
e4b->bd_buddy_folio = NULL;
e4b->bd_bitmap_folio = NULL;
blocks_per_page = PAGE_SIZE / sb->s_blocksize;
/*
@ -1461,12 +1466,13 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
block = group * 2;
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
page = find_or_create_page(inode->i_mapping, pnum, gfp);
if (!page)
return -ENOMEM;
BUG_ON(page->mapping != inode->i_mapping);
e4b->bd_bitmap_page = page;
e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
folio = __filemap_get_folio(inode->i_mapping, pnum,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp);
if (IS_ERR(folio))
return PTR_ERR(folio);
BUG_ON(folio->mapping != inode->i_mapping);
e4b->bd_bitmap_folio = folio;
e4b->bd_bitmap = folio_address(folio) + (poff * sb->s_blocksize);
if (blocks_per_page >= 2) {
/* buddy and bitmap are on the same page */
@ -1474,23 +1480,24 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
}
/* blocks_per_page == 1, hence we need another page for the buddy */
page = find_or_create_page(inode->i_mapping, block + 1, gfp);
if (!page)
return -ENOMEM;
BUG_ON(page->mapping != inode->i_mapping);
e4b->bd_buddy_page = page;
folio = __filemap_get_folio(inode->i_mapping, block + 1,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp);
if (IS_ERR(folio))
return PTR_ERR(folio);
BUG_ON(folio->mapping != inode->i_mapping);
e4b->bd_buddy_folio = folio;
return 0;
}
static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
{
if (e4b->bd_bitmap_page) {
unlock_page(e4b->bd_bitmap_page);
put_page(e4b->bd_bitmap_page);
if (e4b->bd_bitmap_folio) {
folio_unlock(e4b->bd_bitmap_folio);
folio_put(e4b->bd_bitmap_folio);
}
if (e4b->bd_buddy_page) {
unlock_page(e4b->bd_buddy_page);
put_page(e4b->bd_buddy_page);
if (e4b->bd_buddy_folio) {
folio_unlock(e4b->bd_buddy_folio);
folio_put(e4b->bd_buddy_folio);
}
}
@ -1505,7 +1512,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
struct ext4_group_info *this_grp;
struct ext4_buddy e4b;
struct page *page;
struct folio *folio;
int ret = 0;
might_sleep();
@ -1532,16 +1539,16 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
goto err;
}
page = e4b.bd_bitmap_page;
ret = ext4_mb_init_cache(page, NULL, gfp);
folio = e4b.bd_bitmap_folio;
ret = ext4_mb_init_cache(folio, NULL, gfp);
if (ret)
goto err;
if (!PageUptodate(page)) {
if (!folio_test_uptodate(folio)) {
ret = -EIO;
goto err;
}
if (e4b.bd_buddy_page == NULL) {
if (e4b.bd_buddy_folio == NULL) {
/*
* If both the bitmap and buddy are in
* the same page we don't need to force
@ -1551,11 +1558,11 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
goto err;
}
/* init buddy cache */
page = e4b.bd_buddy_page;
ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
folio = e4b.bd_buddy_folio;
ret = ext4_mb_init_cache(folio, e4b.bd_bitmap, gfp);
if (ret)
goto err;
if (!PageUptodate(page)) {
if (!folio_test_uptodate(folio)) {
ret = -EIO;
goto err;
}
@ -1577,7 +1584,7 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
int block;
int pnum;
int poff;
struct page *page;
struct folio *folio;
int ret;
struct ext4_group_info *grp;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@ -1595,8 +1602,8 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
e4b->bd_info = grp;
e4b->bd_sb = sb;
e4b->bd_group = group;
e4b->bd_buddy_page = NULL;
e4b->bd_bitmap_page = NULL;
e4b->bd_buddy_folio = NULL;
e4b->bd_bitmap_folio = NULL;
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
/*
@ -1617,102 +1624,103 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
/* we could use find_or_create_page(), but it locks page
* what we'd like to avoid in fast path ... */
page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
if (page == NULL || !PageUptodate(page)) {
if (page)
/* Avoid locking the folio in the fast path ... */
folio = __filemap_get_folio(inode->i_mapping, pnum, FGP_ACCESSED, 0);
if (IS_ERR(folio) || !folio_test_uptodate(folio)) {
if (!IS_ERR(folio))
/*
* drop the page reference and try
* to get the page with lock. If we
* drop the folio reference and try
* to get the folio with lock. If we
* are not uptodate that implies
* somebody just created the page but
* is yet to initialize the same. So
* somebody just created the folio but
* is yet to initialize it. So
* wait for it to initialize.
*/
put_page(page);
page = find_or_create_page(inode->i_mapping, pnum, gfp);
if (page) {
if (WARN_RATELIMIT(page->mapping != inode->i_mapping,
"ext4: bitmap's paging->mapping != inode->i_mapping\n")) {
folio_put(folio);
folio = __filemap_get_folio(inode->i_mapping, pnum,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp);
if (!IS_ERR(folio)) {
if (WARN_RATELIMIT(folio->mapping != inode->i_mapping,
"ext4: bitmap's mapping != inode->i_mapping\n")) {
/* should never happen */
unlock_page(page);
folio_unlock(folio);
ret = -EINVAL;
goto err;
}
if (!PageUptodate(page)) {
ret = ext4_mb_init_cache(page, NULL, gfp);
if (!folio_test_uptodate(folio)) {
ret = ext4_mb_init_cache(folio, NULL, gfp);
if (ret) {
unlock_page(page);
folio_unlock(folio);
goto err;
}
mb_cmp_bitmaps(e4b, page_address(page) +
mb_cmp_bitmaps(e4b, folio_address(folio) +
(poff * sb->s_blocksize));
}
unlock_page(page);
folio_unlock(folio);
}
}
if (page == NULL) {
ret = -ENOMEM;
if (IS_ERR(folio)) {
ret = PTR_ERR(folio);
goto err;
}
if (!PageUptodate(page)) {
if (!folio_test_uptodate(folio)) {
ret = -EIO;
goto err;
}
/* Pages marked accessed already */
e4b->bd_bitmap_page = page;
e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
/* Folios marked accessed already */
e4b->bd_bitmap_folio = folio;
e4b->bd_bitmap = folio_address(folio) + (poff * sb->s_blocksize);
block++;
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
if (page == NULL || !PageUptodate(page)) {
if (page)
put_page(page);
page = find_or_create_page(inode->i_mapping, pnum, gfp);
if (page) {
if (WARN_RATELIMIT(page->mapping != inode->i_mapping,
"ext4: buddy bitmap's page->mapping != inode->i_mapping\n")) {
folio = __filemap_get_folio(inode->i_mapping, pnum, FGP_ACCESSED, 0);
if (IS_ERR(folio) || !folio_test_uptodate(folio)) {
if (!IS_ERR(folio))
folio_put(folio);
folio = __filemap_get_folio(inode->i_mapping, pnum,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp);
if (!IS_ERR(folio)) {
if (WARN_RATELIMIT(folio->mapping != inode->i_mapping,
"ext4: buddy bitmap's mapping != inode->i_mapping\n")) {
/* should never happen */
unlock_page(page);
folio_unlock(folio);
ret = -EINVAL;
goto err;
}
if (!PageUptodate(page)) {
ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
if (!folio_test_uptodate(folio)) {
ret = ext4_mb_init_cache(folio, e4b->bd_bitmap,
gfp);
if (ret) {
unlock_page(page);
folio_unlock(folio);
goto err;
}
}
unlock_page(page);
folio_unlock(folio);
}
}
if (page == NULL) {
ret = -ENOMEM;
if (IS_ERR(folio)) {
ret = PTR_ERR(folio);
goto err;
}
if (!PageUptodate(page)) {
if (!folio_test_uptodate(folio)) {
ret = -EIO;
goto err;
}
/* Pages marked accessed already */
e4b->bd_buddy_page = page;
e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
/* Folios marked accessed already */
e4b->bd_buddy_folio = folio;
e4b->bd_buddy = folio_address(folio) + (poff * sb->s_blocksize);
return 0;
err:
if (page)
put_page(page);
if (e4b->bd_bitmap_page)
put_page(e4b->bd_bitmap_page);
if (!IS_ERR_OR_NULL(folio))
folio_put(folio);
if (e4b->bd_bitmap_folio)
folio_put(e4b->bd_bitmap_folio);
e4b->bd_buddy = NULL;
e4b->bd_bitmap = NULL;
@ -1727,10 +1735,10 @@ static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
{
if (e4b->bd_bitmap_page)
put_page(e4b->bd_bitmap_page);
if (e4b->bd_buddy_page)
put_page(e4b->bd_buddy_page);
if (e4b->bd_bitmap_folio)
folio_put(e4b->bd_bitmap_folio);
if (e4b->bd_buddy_folio)
folio_put(e4b->bd_buddy_folio);
}
@ -2040,13 +2048,12 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
int ord;
int mlen = 0;
int max = 0;
int cur;
int start = ex->fe_start;
int len = ex->fe_len;
unsigned ret = 0;
int len0 = len;
void *buddy;
bool split = false;
int ord_start, ord_end;
BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
BUG_ON(e4b->bd_group != ex->fe_group);
@ -2071,16 +2078,12 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
/* let's maintain buddy itself */
while (len) {
if (!split)
ord = mb_find_order_for_block(e4b, start);
ord = mb_find_order_for_block(e4b, start);
if (((start >> ord) << ord) == start && len >= (1 << ord)) {
/* the whole chunk may be allocated at once! */
mlen = 1 << ord;
if (!split)
buddy = mb_find_buddy(e4b, ord, &max);
else
split = false;
buddy = mb_find_buddy(e4b, ord, &max);
BUG_ON((start >> ord) >= max);
mb_set_bit(start >> ord, buddy);
e4b->bd_info->bb_counters[ord]--;
@ -2094,20 +2097,29 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
if (ret == 0)
ret = len | (ord << 16);
/* we have to split large buddy */
BUG_ON(ord <= 0);
buddy = mb_find_buddy(e4b, ord, &max);
mb_set_bit(start >> ord, buddy);
e4b->bd_info->bb_counters[ord]--;
ord--;
cur = (start >> ord) & ~1U;
buddy = mb_find_buddy(e4b, ord, &max);
mb_clear_bit(cur, buddy);
mb_clear_bit(cur + 1, buddy);
e4b->bd_info->bb_counters[ord]++;
e4b->bd_info->bb_counters[ord]++;
split = true;
ord_start = (start >> ord) << ord;
ord_end = ord_start + (1 << ord);
/* first chunk */
if (start > ord_start)
ext4_mb_mark_free_simple(e4b->bd_sb, e4b->bd_buddy,
ord_start, start - ord_start,
e4b->bd_info);
/* last chunk */
if (start + len < ord_end) {
ext4_mb_mark_free_simple(e4b->bd_sb, e4b->bd_buddy,
start + len,
ord_end - (start + len),
e4b->bd_info);
break;
}
len = start + len - ord_end;
start = ord_end;
}
mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
@ -2149,10 +2161,10 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
* double allocate blocks. The reference is dropped
* in ext4_mb_release_context
*/
ac->ac_bitmap_page = e4b->bd_bitmap_page;
get_page(ac->ac_bitmap_page);
ac->ac_buddy_page = e4b->bd_buddy_page;
get_page(ac->ac_buddy_page);
ac->ac_bitmap_folio = e4b->bd_bitmap_folio;
folio_get(ac->ac_bitmap_folio);
ac->ac_buddy_folio = e4b->bd_buddy_folio;
folio_get(ac->ac_buddy_folio);
/* store last allocated for subsequent stream allocation */
if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
spin_lock(&sbi->s_md_lock);
@ -2675,7 +2687,7 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
int ret;
/*
* cr=CR_POWER2_ALIGNED/CR_GOAL_LEN_FAST is a very optimistic
* CR_POWER2_ALIGNED/CR_GOAL_LEN_FAST is a very optimistic
* search to find large good chunks almost for free. If buddy
* data is not ready, then this optimization makes no sense. But
* we never skip the first block group in a flex_bg, since this
@ -2856,6 +2868,7 @@ repeat:
group = ac->ac_g_ex.fe_group;
ac->ac_groups_linear_remaining = sbi->s_mb_max_linear_groups;
prefetch_grp = group;
nr = 0;
for (i = 0, new_cr = cr; i < ngroups; i++,
ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups)) {
@ -3186,7 +3199,6 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
}
static void *ext4_mb_seq_structs_summary_start(struct seq_file *seq, loff_t *pos)
__acquires(&EXT4_SB(sb)->s_mb_rb_lock)
{
struct super_block *sb = pde_data(file_inode(seq->file));
unsigned long position;
@ -3440,10 +3452,11 @@ static int ext4_mb_init_backend(struct super_block *sb)
}
if (sbi->s_mb_prefetch > ext4_get_groups_count(sb))
sbi->s_mb_prefetch = ext4_get_groups_count(sb);
/* now many real IOs to prefetch within a single allocation at cr=0
* given cr=0 is an CPU-related optimization we shouldn't try to
* load too many groups, at some point we should start to use what
* we've got in memory.
/*
* now many real IOs to prefetch within a single allocation at
* CR_POWER2_ALIGNED. Given CR_POWER2_ALIGNED is an CPU-related
* optimization we shouldn't try to load too many groups, at some point
* we should start to use what we've got in memory.
* with an average random access time 5ms, it'd take a second to get
* 200 groups (* N with flex_bg), so let's make this limit 4
*/
@ -3884,8 +3897,8 @@ static void ext4_free_data_in_buddy(struct super_block *sb,
/* No more items in the per group rb tree
* balance refcounts from ext4_mb_free_metadata()
*/
put_page(e4b.bd_buddy_page);
put_page(e4b.bd_bitmap_page);
folio_put(e4b.bd_buddy_folio);
folio_put(e4b.bd_bitmap_folio);
}
ext4_unlock_group(sb, entry->efd_group);
ext4_mb_unload_buddy(&e4b);
@ -5989,10 +6002,10 @@ static void ext4_mb_release_context(struct ext4_allocation_context *ac)
ext4_mb_put_pa(ac, ac->ac_sb, pa);
}
if (ac->ac_bitmap_page)
put_page(ac->ac_bitmap_page);
if (ac->ac_buddy_page)
put_page(ac->ac_buddy_page);
if (ac->ac_bitmap_folio)
folio_put(ac->ac_bitmap_folio);
if (ac->ac_buddy_folio)
folio_put(ac->ac_buddy_folio);
if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
mutex_unlock(&ac->ac_lg->lg_mutex);
ext4_mb_collect_stats(ac);
@ -6113,6 +6126,7 @@ ext4_mb_new_blocks_simple(struct ext4_allocation_request *ar, int *errp)
ext4_mb_mark_bb(sb, block, 1, true);
ar->len = 1;
*errp = 0;
return block;
}
@ -6307,8 +6321,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
struct rb_node *parent = NULL, *new_node;
BUG_ON(!ext4_handle_valid(handle));
BUG_ON(e4b->bd_bitmap_page == NULL);
BUG_ON(e4b->bd_buddy_page == NULL);
BUG_ON(e4b->bd_bitmap_folio == NULL);
BUG_ON(e4b->bd_buddy_folio == NULL);
new_node = &new_entry->efd_node;
cluster = new_entry->efd_start_cluster;
@ -6319,8 +6333,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
* otherwise we'll refresh it from
* on-disk bitmap and lose not-yet-available
* blocks */
get_page(e4b->bd_buddy_page);
get_page(e4b->bd_bitmap_page);
folio_get(e4b->bd_buddy_folio);
folio_get(e4b->bd_bitmap_folio);
}
while (*n) {
parent = *n;

View File

@ -187,14 +187,14 @@ struct ext4_allocation_context {
struct ext4_free_extent ac_f_ex;
/*
* goal len can change in CR1.5, so save the original len. This is
* used while adjusting the PA window and for accounting.
* goal len can change in CR_BEST_AVAIL_LEN, so save the original len.
* This is used while adjusting the PA window and for accounting.
*/
ext4_grpblk_t ac_orig_goal_len;
__u32 ac_flags; /* allocation hints */
__u32 ac_groups_linear_remaining;
__u16 ac_groups_scanned;
__u16 ac_groups_linear_remaining;
__u16 ac_found;
__u16 ac_cX_found[EXT4_MB_NUM_CRS];
__u16 ac_tail;
@ -204,8 +204,8 @@ struct ext4_allocation_context {
__u8 ac_2order; /* if request is to allocate 2^N blocks and
* N > 0, the field stores N, otherwise 0 */
__u8 ac_op; /* operation, for history only */
struct page *ac_bitmap_page;
struct page *ac_buddy_page;
struct folio *ac_bitmap_folio;
struct folio *ac_buddy_folio;
struct ext4_prealloc_space *ac_pa;
struct ext4_locality_group *ac_lg;
};
@ -215,9 +215,9 @@ struct ext4_allocation_context {
#define AC_STATUS_BREAK 3
struct ext4_buddy {
struct page *bd_buddy_page;
struct folio *bd_buddy_folio;
void *bd_buddy;
struct page *bd_bitmap_page;
struct folio *bd_bitmap_folio;
void *bd_bitmap;
struct ext4_group_info *bd_info;
struct super_block *bd_sb;

View File

@ -199,10 +199,8 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
continue;
if (!buffer_mapped(bh)) {
err = ext4_get_block(inode, block, bh, 0);
if (err) {
folio_set_error(folio);
if (err)
return err;
}
if (!buffer_mapped(bh)) {
folio_zero_range(folio, block_start, blocksize);
set_buffer_uptodate(bh);

View File

@ -2897,7 +2897,7 @@ retry:
inode = ext4_new_inode_start_handle(idmap, dir, mode,
NULL, 0, NULL,
EXT4_HT_DIR,
EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
EXT4_MAXQUOTAS_TRANS_BLOCKS(dir->i_sb) +
4 + EXT4_XATTR_TRANS_BLOCKS);
handle = ext4_journal_current_handle();
err = PTR_ERR(inode);

View File

@ -117,7 +117,6 @@ static void ext4_finish_bio(struct bio *bio)
if (bio->bi_status) {
int err = blk_status_to_errno(bio->bi_status);
folio_set_error(folio);
mapping_set_error(folio->mapping, err);
}
bh = head = folio_buffers(folio);
@ -441,8 +440,6 @@ int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *folio,
BUG_ON(!folio_test_locked(folio));
BUG_ON(folio_test_writeback(folio));
folio_clear_error(folio);
/*
* Comments copied from block_write_full_folio:
*

View File

@ -289,7 +289,6 @@ int ext4_mpage_readpages(struct inode *inode,
if (ext4_map_blocks(NULL, inode, &map, 0) < 0) {
set_error_page:
folio_set_error(folio);
folio_zero_segment(folio, 0,
folio_size(folio));
folio_unlock(folio);

View File

@ -2074,8 +2074,7 @@ static int unnote_qf_name(struct fs_context *fc, int qtype)
{
struct ext4_fs_context *ctx = fc->fs_private;
if (ctx->s_qf_names[qtype])
kfree(ctx->s_qf_names[qtype]);
kfree(ctx->s_qf_names[qtype]);
ctx->s_qf_names[qtype] = NULL;
ctx->qname_spec |= 1 << qtype;
@ -2480,8 +2479,7 @@ static int parse_options(struct fs_context *fc, char *options)
param.size = v_len;
ret = ext4_parse_param(fc, &param);
if (param.string)
kfree(param.string);
kfree(param.string);
if (ret < 0)
return ret;
}
@ -5338,6 +5336,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
#endif
super_set_uuid(sb, es->s_uuid, sizeof(es->s_uuid));
super_set_sysfs_name_bdev(sb);
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
@ -5547,19 +5546,15 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (err)
goto failed_mount6;
err = ext4_register_sysfs(sb);
if (err)
goto failed_mount7;
err = ext4_init_orphan_info(sb);
if (err)
goto failed_mount8;
goto failed_mount7;
#ifdef CONFIG_QUOTA
/* Enable quota usage during mount. */
if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
err = ext4_enable_quotas(sb);
if (err)
goto failed_mount9;
goto failed_mount8;
}
#endif /* CONFIG_QUOTA */
@ -5585,7 +5580,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
ext4_msg(sb, KERN_INFO, "recovery complete");
err = ext4_mark_recovery_complete(sb, es);
if (err)
goto failed_mount10;
goto failed_mount9;
}
if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev))
@ -5602,15 +5597,17 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
atomic_set(&sbi->s_warning_count, 0);
atomic_set(&sbi->s_msg_count, 0);
/* Register sysfs after all initializations are complete. */
err = ext4_register_sysfs(sb);
if (err)
goto failed_mount9;
return 0;
failed_mount10:
failed_mount9:
ext4_quotas_off(sb, EXT4_MAXQUOTAS);
failed_mount9: __maybe_unused
failed_mount8: __maybe_unused
ext4_release_orphan_info(sb);
failed_mount8:
ext4_unregister_sysfs(sb);
kobject_put(&sbi->s_kobj);
failed_mount7:
ext4_unregister_li_request(sb);
failed_mount6:
@ -6126,8 +6123,8 @@ static void ext4_update_super(struct super_block *sb)
__ext4_update_tstamp(&es->s_first_error_time,
&es->s_first_error_time_hi,
sbi->s_first_error_time);
strncpy(es->s_first_error_func, sbi->s_first_error_func,
sizeof(es->s_first_error_func));
strtomem_pad(es->s_first_error_func,
sbi->s_first_error_func, 0);
es->s_first_error_line =
cpu_to_le32(sbi->s_first_error_line);
es->s_first_error_ino =
@ -6140,8 +6137,7 @@ static void ext4_update_super(struct super_block *sb)
__ext4_update_tstamp(&es->s_last_error_time,
&es->s_last_error_time_hi,
sbi->s_last_error_time);
strncpy(es->s_last_error_func, sbi->s_last_error_func,
sizeof(es->s_last_error_func));
strtomem_pad(es->s_last_error_func, sbi->s_last_error_func, 0);
es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);

View File

@ -29,7 +29,10 @@ typedef enum {
attr_trigger_test_error,
attr_first_error_time,
attr_last_error_time,
attr_clusters_in_group,
attr_mb_order,
attr_feature,
attr_pointer_pi,
attr_pointer_ui,
attr_pointer_ul,
attr_pointer_u64,
@ -104,7 +107,7 @@ static ssize_t reserved_clusters_store(struct ext4_sb_info *sbi,
int ret;
ret = kstrtoull(skip_spaces(buf), 0, &val);
if (ret || val >= clusters)
if (ret || val >= clusters || (s64)val < 0)
return -EINVAL;
atomic64_set(&sbi->s_resv_clusters, val);
@ -178,6 +181,9 @@ static struct ext4_attr ext4_attr_##_name = { \
#define EXT4_RO_ATTR_ES_STRING(_name,_elname,_size) \
EXT4_ATTR_STRING(_name, 0444, _size, ext4_super_block, _elname)
#define EXT4_RW_ATTR_SBI_PI(_name,_elname) \
EXT4_ATTR_OFFSET(_name, 0644, pointer_pi, ext4_sb_info, _elname)
#define EXT4_RW_ATTR_SBI_UI(_name,_elname) \
EXT4_ATTR_OFFSET(_name, 0644, pointer_ui, ext4_sb_info, _elname)
@ -207,23 +213,25 @@ EXT4_ATTR_FUNC(sra_exceeded_retry_limit, 0444);
EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead,
ext4_sb_info, s_inode_readahead_blks);
EXT4_ATTR_OFFSET(mb_group_prealloc, 0644, clusters_in_group,
ext4_sb_info, s_mb_group_prealloc);
EXT4_ATTR_OFFSET(mb_best_avail_max_trim_order, 0644, mb_order,
ext4_sb_info, s_mb_best_avail_max_trim_order);
EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
EXT4_RW_ATTR_SBI_UI(mb_max_linear_groups, s_mb_max_linear_groups);
EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error);
EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst);
EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval);
EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
EXT4_RW_ATTR_SBI_UI(mb_best_avail_max_trim_order, s_mb_best_avail_max_trim_order);
EXT4_RW_ATTR_SBI_PI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
EXT4_RW_ATTR_SBI_PI(err_ratelimit_burst, s_err_ratelimit_state.burst);
EXT4_RW_ATTR_SBI_PI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval);
EXT4_RW_ATTR_SBI_PI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
EXT4_RW_ATTR_SBI_PI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
EXT4_RW_ATTR_SBI_PI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
#ifdef CONFIG_EXT4_DEBUG
EXT4_RW_ATTR_SBI_UL(simulate_fail, s_simulate_fail);
#endif
@ -366,13 +374,45 @@ static ssize_t __print_tstamp(char *buf, __le32 lo, __u8 hi)
#define print_tstamp(buf, es, tstamp) \
__print_tstamp(buf, (es)->tstamp, (es)->tstamp ## _hi)
static ssize_t ext4_generic_attr_show(struct ext4_attr *a,
struct ext4_sb_info *sbi, char *buf)
{
void *ptr = calc_ptr(a, sbi);
if (!ptr)
return 0;
switch (a->attr_id) {
case attr_inode_readahead:
case attr_clusters_in_group:
case attr_mb_order:
case attr_pointer_pi:
case attr_pointer_ui:
if (a->attr_ptr == ptr_ext4_super_block_offset)
return sysfs_emit(buf, "%u\n", le32_to_cpup(ptr));
return sysfs_emit(buf, "%u\n", *((unsigned int *) ptr));
case attr_pointer_ul:
return sysfs_emit(buf, "%lu\n", *((unsigned long *) ptr));
case attr_pointer_u8:
return sysfs_emit(buf, "%u\n", *((unsigned char *) ptr));
case attr_pointer_u64:
if (a->attr_ptr == ptr_ext4_super_block_offset)
return sysfs_emit(buf, "%llu\n", le64_to_cpup(ptr));
return sysfs_emit(buf, "%llu\n", *((unsigned long long *) ptr));
case attr_pointer_string:
return sysfs_emit(buf, "%.*s\n", a->attr_size, (char *) ptr);
case attr_pointer_atomic:
return sysfs_emit(buf, "%d\n", atomic_read((atomic_t *) ptr));
}
return 0;
}
static ssize_t ext4_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
s_kobj);
struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
void *ptr = calc_ptr(a, sbi);
switch (a->attr_id) {
case attr_delayed_allocation_blocks:
@ -391,45 +431,6 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
return sysfs_emit(buf, "%llu\n",
(unsigned long long)
percpu_counter_sum(&sbi->s_sra_exceeded_retry_limit));
case attr_inode_readahead:
case attr_pointer_ui:
if (!ptr)
return 0;
if (a->attr_ptr == ptr_ext4_super_block_offset)
return sysfs_emit(buf, "%u\n",
le32_to_cpup(ptr));
else
return sysfs_emit(buf, "%u\n",
*((unsigned int *) ptr));
case attr_pointer_ul:
if (!ptr)
return 0;
return sysfs_emit(buf, "%lu\n",
*((unsigned long *) ptr));
case attr_pointer_u8:
if (!ptr)
return 0;
return sysfs_emit(buf, "%u\n",
*((unsigned char *) ptr));
case attr_pointer_u64:
if (!ptr)
return 0;
if (a->attr_ptr == ptr_ext4_super_block_offset)
return sysfs_emit(buf, "%llu\n",
le64_to_cpup(ptr));
else
return sysfs_emit(buf, "%llu\n",
*((unsigned long long *) ptr));
case attr_pointer_string:
if (!ptr)
return 0;
return sysfs_emit(buf, "%.*s\n", a->attr_size,
(char *) ptr);
case attr_pointer_atomic:
if (!ptr)
return 0;
return sysfs_emit(buf, "%d\n",
atomic_read((atomic_t *) ptr));
case attr_feature:
return sysfs_emit(buf, "supported\n");
case attr_first_error_time:
@ -438,8 +439,64 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
return print_tstamp(buf, sbi->s_es, s_last_error_time);
case attr_journal_task:
return journal_task_show(sbi, buf);
default:
return ext4_generic_attr_show(a, sbi, buf);
}
}
static ssize_t ext4_generic_attr_store(struct ext4_attr *a,
struct ext4_sb_info *sbi,
const char *buf, size_t len)
{
int ret;
unsigned int t;
unsigned long lt;
void *ptr = calc_ptr(a, sbi);
if (!ptr)
return 0;
switch (a->attr_id) {
case attr_pointer_pi:
ret = kstrtouint(skip_spaces(buf), 0, &t);
if (ret)
return ret;
if ((int)t < 0)
return -EINVAL;
*((unsigned int *) ptr) = t;
return len;
case attr_pointer_ui:
ret = kstrtouint(skip_spaces(buf), 0, &t);
if (ret)
return ret;
if (a->attr_ptr == ptr_ext4_super_block_offset)
*((__le32 *) ptr) = cpu_to_le32(t);
else
*((unsigned int *) ptr) = t;
return len;
case attr_mb_order:
ret = kstrtouint(skip_spaces(buf), 0, &t);
if (ret)
return ret;
if (t > 64)
return -EINVAL;
*((unsigned int *) ptr) = t;
return len;
case attr_clusters_in_group:
ret = kstrtouint(skip_spaces(buf), 0, &t);
if (ret)
return ret;
if (t > sbi->s_clusters_per_group)
return -EINVAL;
*((unsigned int *) ptr) = t;
return len;
case attr_pointer_ul:
ret = kstrtoul(skip_spaces(buf), 0, &lt);
if (ret)
return ret;
*((unsigned long *) ptr) = lt;
return len;
}
return 0;
}
@ -450,38 +507,17 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
s_kobj);
struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
void *ptr = calc_ptr(a, sbi);
unsigned long t;
int ret;
switch (a->attr_id) {
case attr_reserved_clusters:
return reserved_clusters_store(sbi, buf, len);
case attr_pointer_ui:
if (!ptr)
return 0;
ret = kstrtoul(skip_spaces(buf), 0, &t);
if (ret)
return ret;
if (a->attr_ptr == ptr_ext4_super_block_offset)
*((__le32 *) ptr) = cpu_to_le32(t);
else
*((unsigned int *) ptr) = t;
return len;
case attr_pointer_ul:
if (!ptr)
return 0;
ret = kstrtoul(skip_spaces(buf), 0, &t);
if (ret)
return ret;
*((unsigned long *) ptr) = t;
return len;
case attr_inode_readahead:
return inode_readahead_blks_store(sbi, buf, len);
case attr_trigger_test_error:
return trigger_test_error(sbi, buf, len);
default:
return ext4_generic_attr_store(a, sbi, buf, len);
}
return 0;
}
static void ext4_sb_release(struct kobject *kobj)

View File

@ -1619,6 +1619,7 @@ out_err:
static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
struct ext4_xattr_search *s,
handle_t *handle, struct inode *inode,
struct inode *new_ea_inode,
bool is_block)
{
struct ext4_xattr_entry *last, *next;
@ -1626,7 +1627,6 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
size_t min_offs = s->end - s->base, name_len = strlen(i->name);
int in_inode = i->in_inode;
struct inode *old_ea_inode = NULL;
struct inode *new_ea_inode = NULL;
size_t old_size, new_size;
int ret;
@ -1711,38 +1711,11 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
old_ea_inode = NULL;
goto out;
}
}
if (i->value && in_inode) {
WARN_ON_ONCE(!i->value_len);
new_ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
i->value, i->value_len);
if (IS_ERR(new_ea_inode)) {
ret = PTR_ERR(new_ea_inode);
new_ea_inode = NULL;
goto out;
}
}
if (old_ea_inode) {
/* We are ready to release ref count on the old_ea_inode. */
ret = ext4_xattr_inode_dec_ref(handle, old_ea_inode);
if (ret) {
/* Release newly required ref count on new_ea_inode. */
if (new_ea_inode) {
int err;
err = ext4_xattr_inode_dec_ref(handle,
new_ea_inode);
if (err)
ext4_warning_inode(new_ea_inode,
"dec ref new_ea_inode err=%d",
err);
ext4_xattr_inode_free_quota(inode, new_ea_inode,
i->value_len);
}
if (ret)
goto out;
}
ext4_xattr_inode_free_quota(inode, old_ea_inode,
le32_to_cpu(here->e_value_size));
@ -1866,7 +1839,6 @@ update_hash:
ret = 0;
out:
iput(old_ea_inode);
iput(new_ea_inode);
return ret;
}
@ -1929,9 +1901,21 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
size_t old_ea_inode_quota = 0;
unsigned int ea_ino;
#define header(x) ((struct ext4_xattr_header *)(x))
/* If we need EA inode, prepare it before locking the buffer */
if (i->value && i->in_inode) {
WARN_ON_ONCE(!i->value_len);
ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
i->value, i->value_len);
if (IS_ERR(ea_inode)) {
error = PTR_ERR(ea_inode);
ea_inode = NULL;
goto cleanup;
}
}
if (s->base) {
int offset = (char *)s->here - bs->bh->b_data;
@ -1940,6 +1924,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
EXT4_JTR_NONE);
if (error)
goto cleanup;
lock_buffer(bs->bh);
if (header(s->base)->h_refcount == cpu_to_le32(1)) {
@ -1966,7 +1951,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
}
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, handle, inode,
true /* is_block */);
ea_inode, true /* is_block */);
ext4_xattr_block_csum_set(inode, bs->bh);
unlock_buffer(bs->bh);
if (error == -EFSCORRUPTED)
@ -2034,33 +2019,22 @@ clone_block:
s->end = s->base + sb->s_blocksize;
}
error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */);
error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode,
true /* is_block */);
if (error == -EFSCORRUPTED)
goto bad_block;
if (error)
goto cleanup;
if (i->value && s->here->e_value_inum) {
/*
* A ref count on ea_inode has been taken as part of the call to
* ext4_xattr_set_entry() above. We would like to drop this
* extra ref but we have to wait until the xattr block is
* initialized and has its own ref count on the ea_inode.
*/
ea_ino = le32_to_cpu(s->here->e_value_inum);
error = ext4_xattr_inode_iget(inode, ea_ino,
le32_to_cpu(s->here->e_hash),
&ea_inode);
if (error) {
ea_inode = NULL;
goto cleanup;
}
}
inserted:
if (!IS_LAST_ENTRY(s->first)) {
new_bh = ext4_xattr_block_cache_find(inode, header(s->base),
&ce);
new_bh = ext4_xattr_block_cache_find(inode, header(s->base), &ce);
if (IS_ERR(new_bh)) {
error = PTR_ERR(new_bh);
new_bh = NULL;
goto cleanup;
}
if (new_bh) {
/* We found an identical block in the cache. */
if (new_bh == bs->bh)
@ -2158,6 +2132,17 @@ getblk_failed:
ENTRY(header(s->base)+1));
if (error)
goto getblk_failed;
if (ea_inode) {
/* Drop the extra ref on ea_inode. */
error = ext4_xattr_inode_dec_ref(handle,
ea_inode);
if (error)
ext4_warning_inode(ea_inode,
"dec ref error=%d",
error);
iput(ea_inode);
ea_inode = NULL;
}
lock_buffer(new_bh);
error = ext4_journal_get_create_access(handle, sb,
@ -2198,17 +2183,16 @@ getblk_failed:
cleanup:
if (ea_inode) {
int error2;
if (error) {
int error2;
error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
if (error2)
ext4_warning_inode(ea_inode, "dec ref error=%d",
error2);
/* If there was an error, revert the quota charge. */
if (error)
error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
if (error2)
ext4_warning_inode(ea_inode, "dec ref error=%d",
error2);
ext4_xattr_inode_free_quota(inode, ea_inode,
i_size_read(ea_inode));
}
iput(ea_inode);
}
if (ce)
@ -2266,14 +2250,38 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
{
struct ext4_xattr_ibody_header *header;
struct ext4_xattr_search *s = &is->s;
struct inode *ea_inode = NULL;
int error;
if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
return -ENOSPC;
error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
if (error)
/* If we need EA inode, prepare it before locking the buffer */
if (i->value && i->in_inode) {
WARN_ON_ONCE(!i->value_len);
ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
i->value, i->value_len);
if (IS_ERR(ea_inode))
return PTR_ERR(ea_inode);
}
error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode,
false /* is_block */);
if (error) {
if (ea_inode) {
int error2;
error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
if (error2)
ext4_warning_inode(ea_inode, "dec ref error=%d",
error2);
ext4_xattr_inode_free_quota(inode, ea_inode,
i_size_read(ea_inode));
iput(ea_inode);
}
return error;
}
header = IHDR(inode, ext4_raw_inode(&is->iloc));
if (!IS_LAST_ENTRY(s->first)) {
header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
@ -2282,6 +2290,7 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
header->h_magic = cpu_to_le32(0);
ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
}
iput(ea_inode);
return 0;
}
@ -3090,8 +3099,8 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
*
* Find an identical extended attribute block.
*
* Returns a pointer to the block found, or NULL if such a block was
* not found or an error occurred.
* Returns a pointer to the block found, or NULL if such a block was not
* found, or an error pointer if an error occurred while reading ea block.
*/
static struct buffer_head *
ext4_xattr_block_cache_find(struct inode *inode,
@ -3113,11 +3122,11 @@ ext4_xattr_block_cache_find(struct inode *inode,
bh = ext4_sb_bread(inode->i_sb, ce->e_value, REQ_PRIO);
if (IS_ERR(bh)) {
if (PTR_ERR(bh) == -ENOMEM)
return NULL;
bh = NULL;
EXT4_ERROR_INODE(inode, "block %lu read error",
(unsigned long)ce->e_value);
if (PTR_ERR(bh) != -ENOMEM)
EXT4_ERROR_INODE(inode, "block %lu read error",
(unsigned long)ce->e_value);
mb_cache_entry_put(ea_block_cache, ce);
return bh;
} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
*pce = ce;
return bh;

View File

@ -337,8 +337,6 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
/* Checkpoint list management */
enum shrink_type {SHRINK_DESTROY, SHRINK_BUSY_STOP, SHRINK_BUSY_SKIP};
/*
* journal_shrink_one_cp_list
*
@ -350,7 +348,7 @@ enum shrink_type {SHRINK_DESTROY, SHRINK_BUSY_STOP, SHRINK_BUSY_SKIP};
* Called with j_list_lock held.
*/
static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
enum shrink_type type,
enum jbd2_shrink_type type,
bool *released)
{
struct journal_head *last_jh;
@ -367,12 +365,12 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
jh = next_jh;
next_jh = jh->b_cpnext;
if (type == SHRINK_DESTROY) {
if (type == JBD2_SHRINK_DESTROY) {
ret = __jbd2_journal_remove_checkpoint(jh);
} else {
ret = jbd2_journal_try_remove_checkpoint(jh);
if (ret < 0) {
if (type == SHRINK_BUSY_SKIP)
if (type == JBD2_SHRINK_BUSY_SKIP)
continue;
break;
}
@ -439,7 +437,7 @@ again:
tid = transaction->t_tid;
freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list,
SHRINK_BUSY_SKIP, &released);
JBD2_SHRINK_BUSY_SKIP, &released);
nr_freed += freed;
(*nr_to_scan) -= min(*nr_to_scan, freed);
if (*nr_to_scan == 0)
@ -472,21 +470,25 @@ out:
* journal_clean_checkpoint_list
*
* Find all the written-back checkpoint buffers in the journal and release them.
* If 'destroy' is set, release all buffers unconditionally.
* If 'type' is JBD2_SHRINK_DESTROY, release all buffers unconditionally. If
* 'type' is JBD2_SHRINK_BUSY_STOP, will stop release buffers if encounters a
* busy buffer. To avoid wasting CPU cycles scanning the buffer list in some
* cases, don't pass JBD2_SHRINK_BUSY_SKIP 'type' for this function.
*
* Called with j_list_lock held.
*/
void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
void __jbd2_journal_clean_checkpoint_list(journal_t *journal,
enum jbd2_shrink_type type)
{
transaction_t *transaction, *last_transaction, *next_transaction;
enum shrink_type type;
bool released;
WARN_ON_ONCE(type == JBD2_SHRINK_BUSY_SKIP);
transaction = journal->j_checkpoint_transactions;
if (!transaction)
return;
type = destroy ? SHRINK_DESTROY : SHRINK_BUSY_STOP;
last_transaction = transaction->t_cpprev;
next_transaction = transaction;
do {
@ -527,7 +529,7 @@ void jbd2_journal_destroy_checkpoint(journal_t *journal)
spin_unlock(&journal->j_list_lock);
break;
}
__jbd2_journal_clean_checkpoint_list(journal, true);
__jbd2_journal_clean_checkpoint_list(journal, JBD2_SHRINK_DESTROY);
spin_unlock(&journal->j_list_lock);
cond_resched();
}

View File

@ -501,7 +501,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* frees some memory
*/
spin_lock(&journal->j_list_lock);
__jbd2_journal_clean_checkpoint_list(journal, false);
__jbd2_journal_clean_checkpoint_list(journal, JBD2_SHRINK_BUSY_STOP);
spin_unlock(&journal->j_list_lock);
jbd2_debug(3, "JBD2: commit phase 1\n");
@ -571,7 +571,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
J_ASSERT(commit_transaction->t_nr_buffers <=
atomic_read(&commit_transaction->t_outstanding_credits));
err = 0;
bufs = 0;
descriptor = NULL;
while (commit_transaction->t_buffers) {

View File

@ -1434,7 +1434,9 @@ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
extern void jbd2_journal_commit_transaction(journal_t *);
/* Checkpoint list management */
void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy);
enum jbd2_shrink_type {JBD2_SHRINK_DESTROY, JBD2_SHRINK_BUSY_STOP, JBD2_SHRINK_BUSY_SKIP};
void __jbd2_journal_clean_checkpoint_list(journal_t *journal, enum jbd2_shrink_type type);
unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, unsigned long *nr_to_scan);
int __jbd2_journal_remove_checkpoint(struct journal_head *);
int jbd2_journal_try_remove_checkpoint(struct journal_head *jh);