Miscellaneous bug fixes and cleanups in ext4's multi-block allocator

and extent handling code.
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEK2m5VNv+CHkogTfJ8vlZVpUNgaMFAmW/G4YACgkQ8vlZVpUN
 gaPTpwf/c/Fk27GV8ge9PQtR6gmir/lyw2qkvK3Z+12aEsblZRmyvElyZWjAuNQG
 bciQyltabIPOA4XxfsZOrdgYI42n0rTTFG7bmI0lr+BJM/HRw0tGGMN91FZla0FP
 EXv/AiHKCqlT5OFZbD+8n1TzfdOgWotjug1VLteXve3YKjkDgt5IQm/0Gx9hKBld
 IR8SrQlD/rYe+VPvaHz5G4u09Ne5pUE5fDj3xE23wxfU5cloVzuVRCSOGWUCTnCW
 T0v6sHeKrmiLC8tIOZkBjer4nXC0MOu0p5+geAjwOArc9VJ1Lh2eAkH+GgDOVprx
 ahdl2qmbIbacBYECIeQ/+1i78+O1yw==
 =CmYr
 -----END PGP SIGNATURE-----

Merge tag 'for-linus-6.8-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 fixes from Ted Ts'o:
 "Miscellaneous bug fixes and cleanups in ext4's multi-block allocator
  and extent handling code"

* tag 'for-linus-6.8-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (23 commits)
  ext4: make ext4_set_iomap() recognize IOMAP_DELALLOC map type
  ext4: make ext4_map_blocks() distinguish delalloc only extent
  ext4: add a hole extent entry in cache after punch
  ext4: correct the hole length returned by ext4_map_blocks()
  ext4: convert to exclusive lock while inserting delalloc extents
  ext4: refactor ext4_da_map_blocks()
  ext4: remove 'needed' in trace_ext4_discard_preallocations
  ext4: remove unnecessary parameter "needed" in ext4_discard_preallocations
  ext4: remove unused return value of ext4_mb_release_group_pa
  ext4: remove unused return value of ext4_mb_release_inode_pa
  ext4: remove unused return value of ext4_mb_release
  ext4: remove unused ext4_allocation_context::ac_groups_considered
  ext4: remove unneeded return value of ext4_mb_release_context
  ext4: remove unused parameter ngroup in ext4_mb_choose_next_group_*()
  ext4: remove unused return value of __mb_check_buddy
  ext4: mark the group block bitmap as corrupted before reporting an error
  ext4: avoid allocating blocks from corrupted group in ext4_mb_find_by_goal()
  ext4: avoid allocating blocks from corrupted group in ext4_mb_try_best_found()
  ext4: avoid dividing by 0 in mb_update_avg_fragment_size() when block bitmap corrupt
  ext4: avoid bb_free and bb_fragments inconsistency in mb_free_blocks()
  ...
This commit is contained in:
Linus Torvalds 2024-02-04 07:33:01 +00:00
commit 3f24fcdacd
11 changed files with 203 additions and 189 deletions

View file

@ -252,8 +252,10 @@ struct ext4_allocation_request {
#define EXT4_MAP_MAPPED BIT(BH_Mapped)
#define EXT4_MAP_UNWRITTEN BIT(BH_Unwritten)
#define EXT4_MAP_BOUNDARY BIT(BH_Boundary)
#define EXT4_MAP_DELAYED BIT(BH_Delay)
#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY)
EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
EXT4_MAP_DELAYED)
struct ext4_map_blocks {
ext4_fsblk_t m_pblk;
@ -2912,10 +2914,10 @@ extern const struct seq_operations ext4_mb_seq_groups_ops;
extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset);
extern int ext4_mb_init(struct super_block *);
extern int ext4_mb_release(struct super_block *);
extern void ext4_mb_release(struct super_block *);
extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
struct ext4_allocation_request *, int *);
extern void ext4_discard_preallocations(struct inode *, unsigned int);
extern void ext4_discard_preallocations(struct inode *);
extern int __init ext4_init_mballoc(void);
extern void ext4_exit_mballoc(void);
extern ext4_group_t ext4_mb_prefetch(struct super_block *sb,

View file

@ -100,7 +100,7 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
* i_rwsem. So we can safely drop the i_data_sem here.
*/
BUG_ON(EXT4_JOURNAL(inode) == NULL);
ext4_discard_preallocations(inode, 0);
ext4_discard_preallocations(inode);
up_write(&EXT4_I(inode)->i_data_sem);
*dropped = 1;
return 0;
@ -2229,7 +2229,7 @@ static int ext4_fill_es_cache_info(struct inode *inode,
/*
* ext4_ext_determine_hole - determine hole around given block
* ext4_ext_find_hole - find hole around given block according to the given path
* @inode: inode we lookup in
* @path: path in extent tree to @lblk
* @lblk: pointer to logical block around which we want to determine hole
@ -2241,9 +2241,9 @@ static int ext4_fill_es_cache_info(struct inode *inode,
* The function returns the length of a hole starting at @lblk. We update @lblk
* to the beginning of the hole if we managed to find it.
*/
static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
struct ext4_ext_path *path,
ext4_lblk_t *lblk)
static ext4_lblk_t ext4_ext_find_hole(struct inode *inode,
struct ext4_ext_path *path,
ext4_lblk_t *lblk)
{
int depth = ext_depth(inode);
struct ext4_extent *ex;
@ -2270,30 +2270,6 @@ static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
return len;
}
/*
* ext4_ext_put_gap_in_cache:
* calculate boundaries of the gap that the requested block fits into
* and cache this gap
*/
static void
ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,
ext4_lblk_t hole_len)
{
struct extent_status es;
ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
hole_start + hole_len - 1, &es);
if (es.es_len) {
/* There's delayed extent containing lblock? */
if (es.es_lblk <= hole_start)
return;
hole_len = min(es.es_lblk - hole_start, hole_len);
}
ext_debug(inode, " -> %u:%u\n", hole_start, hole_len);
ext4_es_insert_extent(inode, hole_start, hole_len, ~0,
EXTENT_STATUS_HOLE);
}
/*
* ext4_ext_rm_idx:
* removes index from the index block.
@ -4062,6 +4038,72 @@ static int get_implied_cluster_alloc(struct super_block *sb,
return 0;
}
/*
* Determine hole length around the given logical block, first try to
* locate and expand the hole from the given @path, and then adjust it
* if it's partially or completely converted to delayed extents, insert
* it into the extent cache tree if it's indeed a hole, finally return
* the length of the determined extent.
*/
static ext4_lblk_t ext4_ext_determine_insert_hole(struct inode *inode,
struct ext4_ext_path *path,
ext4_lblk_t lblk)
{
ext4_lblk_t hole_start, len;
struct extent_status es;
hole_start = lblk;
len = ext4_ext_find_hole(inode, path, &hole_start);
again:
ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
hole_start + len - 1, &es);
if (!es.es_len)
goto insert_hole;
/*
* There's a delalloc extent in the hole, handle it if the delalloc
* extent is in front of, behind and straddle the queried range.
*/
if (lblk >= es.es_lblk + es.es_len) {
/*
* The delalloc extent is in front of the queried range,
* find again from the queried start block.
*/
len -= lblk - hole_start;
hole_start = lblk;
goto again;
} else if (in_range(lblk, es.es_lblk, es.es_len)) {
/*
* The delalloc extent containing lblk, it must have been
* added after ext4_map_blocks() checked the extent status
* tree so we are not holding i_rwsem and delalloc info is
* only stabilized by i_data_sem we are going to release
* soon. Don't modify the extent status tree and report
* extent as a hole, just adjust the length to the delalloc
* extent's after lblk.
*/
len = es.es_lblk + es.es_len - lblk;
return len;
} else {
/*
* The delalloc extent is partially or completely behind
* the queried range, update hole length until the
* beginning of the delalloc extent.
*/
len = min(es.es_lblk - hole_start, len);
}
insert_hole:
/* Put just found gap into cache to speed up subsequent requests */
ext_debug(inode, " -> %u:%u\n", hole_start, len);
ext4_es_insert_extent(inode, hole_start, len, ~0, EXTENT_STATUS_HOLE);
/* Update hole_len to reflect hole size after lblk */
if (hole_start != lblk)
len -= lblk - hole_start;
return len;
}
/*
* Block allocation/map/preallocation routine for extents based files
@ -4179,22 +4221,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* we couldn't try to create block if create flag is zero
*/
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
ext4_lblk_t hole_start, hole_len;
ext4_lblk_t len;
hole_start = map->m_lblk;
hole_len = ext4_ext_determine_hole(inode, path, &hole_start);
/*
* put just found gap into cache to speed up
* subsequent requests
*/
ext4_ext_put_gap_in_cache(inode, hole_start, hole_len);
len = ext4_ext_determine_insert_hole(inode, path, map->m_lblk);
/* Update hole_len to reflect hole size after map->m_lblk */
if (hole_start != map->m_lblk)
hole_len -= map->m_lblk - hole_start;
map->m_pblk = 0;
map->m_len = min_t(unsigned int, map->m_len, hole_len);
map->m_len = min_t(unsigned int, map->m_len, len);
goto out;
}
@ -4313,7 +4345,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* not a good idea to call discard here directly,
* but otherwise we'd need to call it every free().
*/
ext4_discard_preallocations(inode, 0);
ext4_discard_preallocations(inode);
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE;
ext4_free_blocks(handle, inode, NULL, newblock,
@ -5357,7 +5389,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode, 0);
ext4_discard_preallocations(inode);
ext4_es_remove_extent(inode, punch_start, EXT_MAX_BLOCKS - punch_start);
ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
@ -5365,7 +5397,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
up_write(&EXT4_I(inode)->i_data_sem);
goto out_stop;
}
ext4_discard_preallocations(inode, 0);
ext4_discard_preallocations(inode);
ret = ext4_ext_shift_extents(inode, handle, punch_stop,
punch_stop - punch_start, SHIFT_LEFT);
@ -5497,7 +5529,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
goto out_stop;
down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode, 0);
ext4_discard_preallocations(inode);
path = ext4_find_extent(inode, offset_lblk, NULL, 0);
if (IS_ERR(path)) {

View file

@ -174,7 +174,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
(atomic_read(&inode->i_writecount) == 1) &&
!EXT4_I(inode)->i_reserved_data_blocks) {
down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode, 0);
ext4_discard_preallocations(inode);
up_write(&EXT4_I(inode)->i_data_sem);
}
if (is_dx(inode) && filp->private_data)

View file

@ -714,7 +714,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode,
* i_rwsem. So we can safely drop the i_data_sem here.
*/
BUG_ON(EXT4_JOURNAL(inode) == NULL);
ext4_discard_preallocations(inode, 0);
ext4_discard_preallocations(inode);
up_write(&EXT4_I(inode)->i_data_sem);
*dropped = 1;
return 0;

View file

@ -371,7 +371,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
*/
if ((ei->i_reserved_data_blocks == 0) &&
!inode_is_open_for_write(inode))
ext4_discard_preallocations(inode, 0);
ext4_discard_preallocations(inode);
}
static int __check_block_validity(struct inode *inode, const char *func,
@ -515,6 +515,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
map->m_len = retval;
} else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) {
map->m_pblk = 0;
map->m_flags |= ext4_es_is_delayed(&es) ?
EXT4_MAP_DELAYED : 0;
retval = es.es_len - (map->m_lblk - es.es_lblk);
if (retval > map->m_len)
retval = map->m_len;
@ -1703,11 +1705,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
/* Lookup extent status tree firstly */
if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
if (ext4_es_is_hole(&es)) {
retval = 0;
down_read(&EXT4_I(inode)->i_data_sem);
if (ext4_es_is_hole(&es))
goto add_delayed;
}
/*
* Delayed extent could be allocated by fallocate.
@ -1749,26 +1748,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
retval = ext4_ext_map_blocks(NULL, inode, map, 0);
else
retval = ext4_ind_map_blocks(NULL, inode, map, 0);
add_delayed:
if (retval == 0) {
int ret;
/*
* XXX: __block_prepare_write() unmaps passed block,
* is it OK?
*/
ret = ext4_insert_delayed_block(inode, map->m_lblk);
if (ret != 0) {
retval = ret;
goto out_unlock;
}
map_bh(bh, inode->i_sb, invalid_block);
set_buffer_new(bh);
set_buffer_delay(bh);
} else if (retval > 0) {
if (retval < 0) {
up_read(&EXT4_I(inode)->i_data_sem);
return retval;
}
if (retval > 0) {
unsigned int status;
if (unlikely(retval != map->m_len)) {
@ -1783,11 +1767,21 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
map->m_pblk, status);
up_read(&EXT4_I(inode)->i_data_sem);
return retval;
}
up_read(&EXT4_I(inode)->i_data_sem);
out_unlock:
up_read((&EXT4_I(inode)->i_data_sem));
add_delayed:
down_write(&EXT4_I(inode)->i_data_sem);
retval = ext4_insert_delayed_block(inode, map->m_lblk);
up_write(&EXT4_I(inode)->i_data_sem);
if (retval)
return retval;
map_bh(bh, inode->i_sb, invalid_block);
set_buffer_new(bh);
set_buffer_delay(bh);
return retval;
}
@ -3268,6 +3262,9 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap,
iomap->addr = (u64) map->m_pblk << blkbits;
if (flags & IOMAP_DAX)
iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off;
} else if (map->m_flags & EXT4_MAP_DELAYED) {
iomap->type = IOMAP_DELALLOC;
iomap->addr = IOMAP_NULL_ADDR;
} else {
iomap->type = IOMAP_HOLE;
iomap->addr = IOMAP_NULL_ADDR;
@ -3430,35 +3427,11 @@ const struct iomap_ops ext4_iomap_overwrite_ops = {
.iomap_end = ext4_iomap_end,
};
static bool ext4_iomap_is_delalloc(struct inode *inode,
struct ext4_map_blocks *map)
{
struct extent_status es;
ext4_lblk_t offset = 0, end = map->m_lblk + map->m_len - 1;
ext4_es_find_extent_range(inode, &ext4_es_is_delayed,
map->m_lblk, end, &es);
if (!es.es_len || es.es_lblk > end)
return false;
if (es.es_lblk > map->m_lblk) {
map->m_len = es.es_lblk - map->m_lblk;
return false;
}
offset = map->m_lblk - es.es_lblk;
map->m_len = es.es_len - offset;
return true;
}
static int ext4_iomap_begin_report(struct inode *inode, loff_t offset,
loff_t length, unsigned int flags,
struct iomap *iomap, struct iomap *srcmap)
{
int ret;
bool delalloc = false;
struct ext4_map_blocks map;
u8 blkbits = inode->i_blkbits;
@ -3499,13 +3472,8 @@ static int ext4_iomap_begin_report(struct inode *inode, loff_t offset,
ret = ext4_map_blocks(NULL, inode, &map, 0);
if (ret < 0)
return ret;
if (ret == 0)
delalloc = ext4_iomap_is_delalloc(inode, &map);
set_iomap:
ext4_set_iomap(inode, iomap, &map, offset, length, flags);
if (delalloc && iomap->type == IOMAP_HOLE)
iomap->type = IOMAP_DELALLOC;
return 0;
}
@ -4015,12 +3983,12 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
/* If there are blocks to remove, do it */
if (stop_block > first_block) {
ext4_lblk_t hole_len = stop_block - first_block;
down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode, 0);
ext4_discard_preallocations(inode);
ext4_es_remove_extent(inode, first_block,
stop_block - first_block);
ext4_es_remove_extent(inode, first_block, hole_len);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
ret = ext4_ext_remove_space(inode, first_block,
@ -4029,6 +3997,8 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
ret = ext4_ind_remove_space(handle, inode, first_block,
stop_block);
ext4_es_insert_extent(inode, first_block, hole_len, ~0,
EXTENT_STATUS_HOLE);
up_write(&EXT4_I(inode)->i_data_sem);
}
ext4_fc_track_range(handle, inode, first_block, stop_block);
@ -4170,7 +4140,7 @@ int ext4_truncate(struct inode *inode)
down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode, 0);
ext4_discard_preallocations(inode);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
err = ext4_ext_truncate(handle, inode);

View file

@ -467,7 +467,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
ext4_reset_inode_seed(inode);
ext4_reset_inode_seed(inode_bl);
ext4_discard_preallocations(inode, 0);
ext4_discard_preallocations(inode);
err = ext4_mark_inode_dirty(handle, inode);
if (err < 0) {

View file

@ -564,14 +564,14 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
ext4_grp_locked_error(sb, e4b->bd_group,
inode ? inode->i_ino : 0,
blocknr,
"freeing block already freed "
"(bit %u)",
first + i);
ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
}
mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
}
@ -677,7 +677,7 @@ do { \
} \
} while (0)
static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
static void __mb_check_buddy(struct ext4_buddy *e4b, char *file,
const char *function, int line)
{
struct super_block *sb = e4b->bd_sb;
@ -696,7 +696,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
void *buddy2;
if (e4b->bd_info->bb_check_counter++ % 10)
return 0;
return;
while (order > 1) {
buddy = mb_find_buddy(e4b, order, &max);
@ -758,7 +758,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
grp = ext4_get_group_info(sb, e4b->bd_group);
if (!grp)
return NULL;
return;
list_for_each(cur, &grp->bb_prealloc_list) {
ext4_group_t groupnr;
struct ext4_prealloc_space *pa;
@ -768,7 +768,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
for (i = 0; i < pa->pa_len; i++)
MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
}
return 0;
}
#undef MB_CHECK_ASSERT
#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
@ -842,7 +841,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
struct ext4_sb_info *sbi = EXT4_SB(sb);
int new_order;
if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0)
if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_fragments == 0)
return;
new_order = mb_avg_fragment_size_order(sb,
@ -871,7 +870,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
* cr level needs an update.
*/
static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context *ac,
enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups)
enum criteria *new_cr, ext4_group_t *group)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_group_info *iter;
@ -945,7 +944,7 @@ ext4_mb_find_good_group_avg_frag_lists(struct ext4_allocation_context *ac, int o
* order. Updates *new_cr if cr level needs an update.
*/
static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context *ac,
enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups)
enum criteria *new_cr, ext4_group_t *group)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_group_info *grp = NULL;
@ -990,7 +989,7 @@ static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context *
* much and fall to CR_GOAL_LEN_SLOW in that case.
*/
static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context *ac,
enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups)
enum criteria *new_cr, ext4_group_t *group)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_group_info *grp = NULL;
@ -1125,11 +1124,11 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
}
if (*new_cr == CR_POWER2_ALIGNED) {
ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group, ngroups);
ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group);
} else if (*new_cr == CR_GOAL_LEN_FAST) {
ext4_mb_choose_next_group_goal_fast(ac, new_cr, group, ngroups);
ext4_mb_choose_next_group_goal_fast(ac, new_cr, group);
} else if (*new_cr == CR_BEST_AVAIL_LEN) {
ext4_mb_choose_next_group_best_avail(ac, new_cr, group, ngroups);
ext4_mb_choose_next_group_best_avail(ac, new_cr, group);
} else {
/*
* TODO: For CR=2, we can arrange groups in an rb tree sorted by
@ -1233,6 +1232,24 @@ void ext4_mb_generate_buddy(struct super_block *sb,
atomic64_add(period, &sbi->s_mb_generation_time);
}
static void mb_regenerate_buddy(struct ext4_buddy *e4b)
{
int count;
int order = 1;
void *buddy;
while ((buddy = mb_find_buddy(e4b, order++, &count)))
mb_set_bits(buddy, 0, count);
e4b->bd_info->bb_fragments = 0;
memset(e4b->bd_info->bb_counters, 0,
sizeof(*e4b->bd_info->bb_counters) *
(e4b->bd_sb->s_blocksize_bits + 2));
ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
e4b->bd_bitmap, e4b->bd_group, e4b->bd_info);
}
/* The buddy information is attached the buddy cache inode
* for convenience. The information regarding each group
* is loaded via ext4_mb_load_buddy. The information involve
@ -1891,11 +1908,6 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
mb_check_buddy(e4b);
mb_free_blocks_double(inode, e4b, first, count);
this_cpu_inc(discard_pa_seq);
e4b->bd_info->bb_free += count;
if (first < e4b->bd_info->bb_first_free)
e4b->bd_info->bb_first_free = first;
/* access memory sequentially: check left neighbour,
* clear range and then check right neighbour
*/
@ -1909,21 +1921,31 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t blocknr;
/*
* Fastcommit replay can free already freed blocks which
* corrupts allocation info. Regenerate it.
*/
if (sbi->s_mount_state & EXT4_FC_REPLAY) {
mb_regenerate_buddy(e4b);
goto check;
}
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += EXT4_C2B(sbi, block);
if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
ext4_grp_locked_error(sb, e4b->bd_group,
inode ? inode->i_ino : 0,
blocknr,
"freeing already freed block (bit %u); block bitmap corrupt.",
block);
ext4_mark_group_bitmap_corrupted(
sb, e4b->bd_group,
ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
}
goto done;
ext4_grp_locked_error(sb, e4b->bd_group,
inode ? inode->i_ino : 0, blocknr,
"freeing already freed block (bit %u); block bitmap corrupt.",
block);
return;
}
this_cpu_inc(discard_pa_seq);
e4b->bd_info->bb_free += count;
if (first < e4b->bd_info->bb_first_free)
e4b->bd_info->bb_first_free = first;
/* let's maintain fragments counter */
if (left_is_free && right_is_free)
e4b->bd_info->bb_fragments--;
@ -1948,9 +1970,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
if (first <= last)
mb_buddy_mark_free(e4b, first >> 1, last >> 1);
done:
mb_set_largest_free_order(sb, e4b->bd_info);
mb_update_avg_fragment_size(sb, e4b->bd_info);
check:
mb_check_buddy(e4b);
}
@ -2276,6 +2298,9 @@ void ext4_mb_try_best_found(struct ext4_allocation_context *ac,
return;
ext4_lock_group(ac->ac_sb, group);
if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
goto out;
max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
if (max > 0) {
@ -2283,6 +2308,7 @@ void ext4_mb_try_best_found(struct ext4_allocation_context *ac,
ext4_mb_use_best_found(ac, e4b);
}
out:
ext4_unlock_group(ac->ac_sb, group);
ext4_mb_unload_buddy(e4b);
}
@ -2309,12 +2335,10 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
if (err)
return err;
if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
ext4_mb_unload_buddy(e4b);
return 0;
}
ext4_lock_group(ac->ac_sb, group);
if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
goto out;
max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
ac->ac_g_ex.fe_len, &ex);
ex.fe_logical = 0xDEADFA11; /* debug value */
@ -2347,6 +2371,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
ac->ac_b_ex = ex;
ext4_mb_use_best_found(ac, e4b);
}
out:
ext4_unlock_group(ac->ac_sb, group);
ext4_mb_unload_buddy(e4b);
@ -2380,12 +2405,12 @@ void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
k = mb_find_next_zero_bit(buddy, max, 0);
if (k >= max) {
ext4_mark_group_bitmap_corrupted(ac->ac_sb,
e4b->bd_group,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0,
"%d free clusters of order %d. But found 0",
grp->bb_counters[i], i);
ext4_mark_group_bitmap_corrupted(ac->ac_sb,
e4b->bd_group,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
break;
}
ac->ac_found++;
@ -2436,12 +2461,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
* free blocks even though group info says we
* have free blocks
*/
ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
"%d free clusters as per "
"group info. But bitmap says 0",
free);
ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
break;
}
@ -2467,12 +2492,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
if (WARN_ON(ex.fe_len <= 0))
break;
if (free < ex.fe_len) {
ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
"%d free clusters as per "
"group info. But got %d blocks",
free, ex.fe_len);
ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
/*
* The number of free blocks differs. This mostly
* indicate that the bitmap is corrupt. So exit
@ -3725,7 +3750,7 @@ static int ext4_mb_cleanup_pa(struct ext4_group_info *grp)
return count;
}
int ext4_mb_release(struct super_block *sb)
void ext4_mb_release(struct super_block *sb)
{
ext4_group_t ngroups = ext4_get_groups_count(sb);
ext4_group_t i;
@ -3801,8 +3826,6 @@ int ext4_mb_release(struct super_block *sb)
}
free_percpu(sbi->s_locality_groups);
return 0;
}
static inline int ext4_issue_discard(struct super_block *sb,
@ -5284,7 +5307,7 @@ static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
* the caller MUST hold group/inode locks.
* TODO: optimize the case when there are no in-core structures yet
*/
static noinline_for_stack int
static noinline_for_stack void
ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
struct ext4_prealloc_space *pa)
{
@ -5334,11 +5357,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
*/
}
atomic_add(free, &sbi->s_mb_discarded);
return 0;
}
static noinline_for_stack int
static noinline_for_stack void
ext4_mb_release_group_pa(struct ext4_buddy *e4b,
struct ext4_prealloc_space *pa)
{
@ -5352,13 +5373,11 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) {
ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu",
e4b->bd_group, group, pa->pa_pstart);
return 0;
return;
}
mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
return 0;
}
/*
@ -5479,7 +5498,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
*
* FIXME!! Make sure it is valid at all the call sites
*/
void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
void ext4_discard_preallocations(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct super_block *sb = inode->i_sb;
@ -5491,9 +5510,8 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
struct rb_node *iter;
int err;
if (!S_ISREG(inode->i_mode)) {
if (!S_ISREG(inode->i_mode))
return;
}
if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
return;
@ -5501,15 +5519,12 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
mb_debug(sb, "discard preallocation for inode %lu\n",
inode->i_ino);
trace_ext4_discard_preallocations(inode,
atomic_read(&ei->i_prealloc_active), needed);
if (needed == 0)
needed = UINT_MAX;
atomic_read(&ei->i_prealloc_active));
repeat:
/* first, collect all pa's in the inode */
write_lock(&ei->i_prealloc_lock);
for (iter = rb_first(&ei->i_prealloc_node); iter && needed;
for (iter = rb_first(&ei->i_prealloc_node); iter;
iter = rb_next(iter)) {
pa = rb_entry(iter, struct ext4_prealloc_space,
pa_node.inode_node);
@ -5533,7 +5548,6 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
spin_unlock(&pa->pa_lock);
rb_erase(&pa->pa_node.inode_node, &ei->i_prealloc_node);
list_add(&pa->u.pa_tmp_list, &list);
needed--;
continue;
}
@ -5943,7 +5957,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
/*
* release all resource we used in allocation
*/
static int ext4_mb_release_context(struct ext4_allocation_context *ac)
static void ext4_mb_release_context(struct ext4_allocation_context *ac)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_prealloc_space *pa = ac->ac_pa;
@ -5980,7 +5994,6 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
mutex_unlock(&ac->ac_lg->lg_mutex);
ext4_mb_collect_stats(ac);
return 0;
}
static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
@ -6761,6 +6774,9 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
bool set_trimmed = false;
void *bitmap;
if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
return 0;
last = ext4_last_grp_cluster(sb, e4b->bd_group);
bitmap = e4b->bd_bitmap;
if (start == 0 && max >= last)

View file

@ -192,7 +192,6 @@ struct ext4_allocation_context {
*/
ext4_grpblk_t ac_orig_goal_len;
__u32 ac_groups_considered;
__u32 ac_flags; /* allocation hints */
__u16 ac_groups_scanned;
__u16 ac_groups_linear_remaining;

View file

@ -618,6 +618,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
goto out;
o_end = o_start + len;
*moved_len = 0;
while (o_start < o_end) {
struct ext4_extent *ex;
ext4_lblk_t cur_blk, next_blk;
@ -672,7 +673,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
*/
ext4_double_up_write_data_sem(orig_inode, donor_inode);
/* Swap original branches with new branches */
move_extent_per_page(o_filp, donor_inode,
*moved_len += move_extent_per_page(o_filp, donor_inode,
orig_page_index, donor_page_index,
offset_in_page, cur_len,
unwritten, &ret);
@ -682,14 +683,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
o_start += cur_len;
d_start += cur_len;
}
*moved_len = o_start - orig_blk;
if (*moved_len > len)
*moved_len = len;
out:
if (*moved_len) {
ext4_discard_preallocations(orig_inode, 0);
ext4_discard_preallocations(donor_inode, 0);
ext4_discard_preallocations(orig_inode);
ext4_discard_preallocations(donor_inode);
}
ext4_free_ext_path(path);

View file

@ -1525,7 +1525,7 @@ void ext4_clear_inode(struct inode *inode)
ext4_fc_del(inode);
invalidate_inode_buffers(inode);
clear_inode(inode);
ext4_discard_preallocations(inode, 0);
ext4_discard_preallocations(inode);
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
dquot_drop(inode);
if (EXT4_I(inode)->jinode) {

View file

@ -772,15 +772,14 @@ TRACE_EVENT(ext4_mb_release_group_pa,
);
TRACE_EVENT(ext4_discard_preallocations,
TP_PROTO(struct inode *inode, unsigned int len, unsigned int needed),
TP_PROTO(struct inode *inode, unsigned int len),
TP_ARGS(inode, len, needed),
TP_ARGS(inode, len),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( unsigned int, len )
__field( unsigned int, needed )
),
@ -788,13 +787,11 @@ TRACE_EVENT(ext4_discard_preallocations,
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->len = len;
__entry->needed = needed;
),
TP_printk("dev %d,%d ino %lu len: %u needed %u",
TP_printk("dev %d,%d ino %lu len: %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, __entry->len,
__entry->needed)
(unsigned long) __entry->ino, __entry->len)
);
TRACE_EVENT(ext4_mb_discard_preallocations,