ext4: refactor ext4_move_extents code base

ext4_move_extents is too complex for review. It has duplicate almost
each function available in the rest of other codebase. It has useless
artificial restriction orig_offset == donor_offset. But in fact logic
of ext4_move_extents is very simple:

Iterate extents one by one (similar to ext4_fill_fiemap_extents)
   ->Iterate each page covered extent (similar to generic_perform_write)
     ->swap extents for covered by page (can be shared with IOC_MOVE_DATA)

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
Dmitry Monakhov 2014-08-30 23:52:19 -04:00 committed by Theodore Ts'o
parent f8fb4f4150
commit fcf6b1b729
3 changed files with 337 additions and 890 deletions

View file

@ -2740,10 +2740,15 @@ extern int ext4_find_delalloc_range(struct inode *inode,
ext4_lblk_t lblk_start,
ext4_lblk_t lblk_end);
extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
extern int ext4_ext_precache(struct inode *inode);
extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
struct inode *inode2, ext4_lblk_t lblk1,
ext4_lblk_t lblk2, ext4_lblk_t count,
int mark_unwritten,int *err);
/* move_extent.c */
extern void ext4_double_down_write_data_sem(struct inode *first,

View file

@ -291,6 +291,19 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
return size;
}
static inline int
ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path, ext4_lblk_t lblk,
int nofail)
{
int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
return ext4_split_extent_at(handle, inode, path, lblk, unwritten ?
EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO |
(nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0));
}
/*
* Calculate the number of metadata blocks needed
* to allocate @blocks
@ -1559,7 +1572,7 @@ static int ext4_ext_search_right(struct inode *inode,
* allocated block. Thus, index entries have to be consistent
* with leaves.
*/
static ext4_lblk_t
ext4_lblk_t
ext4_ext_next_allocated_block(struct ext4_ext_path *path)
{
int depth;
@ -2854,24 +2867,14 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
*/
if (end >= ee_block &&
end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
int split_flag = 0;
if (ext4_ext_is_unwritten(ex))
split_flag = EXT4_EXT_MARK_UNWRIT1 |
EXT4_EXT_MARK_UNWRIT2;
/*
* Split the extent in two so that 'end' is the last
* block in the first new extent. Also we should not
* fail removing space due to ENOSPC so try to use
* reserved block if that happens.
*/
err = ext4_split_extent_at(handle, inode, path,
end + 1, split_flag,
EXT4_EX_NOCACHE |
EXT4_GET_BLOCKS_PRE_IO |
EXT4_GET_BLOCKS_METADATA_NOFAIL);
err = ext4_force_split_extent_at(handle, inode, path,
end + 1, 1);
if (err < 0)
goto out;
}
@ -5506,3 +5509,208 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
mutex_unlock(&inode->i_mutex);
return ret;
}
/**
* ext4_swap_extents - Swap extents between two inodes
*
* @inode1: First inode
* @inode2: Second inode
* @lblk1: Start block for first inode
* @lblk2: Start block for second inode
* @count: Number of blocks to swap
* @mark_unwritten: Mark second inode's extents as unwritten after swap
* @erp: Pointer to save error value
*
* This helper routine does exactly what is promise "swap extents". All other
* stuff such as page-cache locking consistency, bh mapping consistency or
* extent's data copying must be performed by caller.
* Locking:
* i_mutex is held for both inodes
* i_data_sem is locked for write for both inodes
* Assumptions:
* All pages from requested range are locked for both inodes
*/
int
ext4_swap_extents(handle_t *handle, struct inode *inode1,
struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
ext4_lblk_t count, int unwritten, int *erp)
{
struct ext4_ext_path *path1 = NULL;
struct ext4_ext_path *path2 = NULL;
int replaced_count = 0;
BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
BUG_ON(!mutex_is_locked(&inode1->i_mutex));
BUG_ON(!mutex_is_locked(&inode1->i_mutex));
*erp = ext4_es_remove_extent(inode1, lblk1, count);
if (*erp)
return 0;
*erp = ext4_es_remove_extent(inode2, lblk2, count);
if (*erp)
return 0;
while (count) {
struct ext4_extent *ex1, *ex2, tmp_ex;
ext4_lblk_t e1_blk, e2_blk;
int e1_len, e2_len, len;
int split = 0;
path1 = ext4_ext_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE);
if (IS_ERR(path1)) {
*erp = PTR_ERR(path1);
break;
}
path2 = ext4_ext_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE);
if (IS_ERR(path2)) {
*erp = PTR_ERR(path2);
break;
}
ex1 = path1[path1->p_depth].p_ext;
ex2 = path2[path2->p_depth].p_ext;
/* Do we have somthing to swap ? */
if (unlikely(!ex2 || !ex1))
break;
e1_blk = le32_to_cpu(ex1->ee_block);
e2_blk = le32_to_cpu(ex2->ee_block);
e1_len = ext4_ext_get_actual_len(ex1);
e2_len = ext4_ext_get_actual_len(ex2);
/* Hole handling */
if (!in_range(lblk1, e1_blk, e1_len) ||
!in_range(lblk2, e2_blk, e2_len)) {
ext4_lblk_t next1, next2;
/* if hole after extent, then go to next extent */
next1 = ext4_ext_next_allocated_block(path1);
next2 = ext4_ext_next_allocated_block(path2);
/* If hole before extent, then shift to that extent */
if (e1_blk > lblk1)
next1 = e1_blk;
if (e2_blk > lblk2)
next2 = e1_blk;
/* Do we have something to swap */
if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
break;
/* Move to the rightest boundary */
len = next1 - lblk1;
if (len < next2 - lblk2)
len = next2 - lblk2;
if (len > count)
len = count;
lblk1 += len;
lblk2 += len;
count -= len;
goto repeat;
}
/* Prepare left boundary */
if (e1_blk < lblk1) {
split = 1;
*erp = ext4_force_split_extent_at(handle, inode1,
path1, lblk1, 0);
if (*erp)
break;
}
if (e2_blk < lblk2) {
split = 1;
*erp = ext4_force_split_extent_at(handle, inode2,
path2, lblk2, 0);
if (*erp)
break;
}
/* ext4_split_extent_at() may retult in leaf extent split,
* path must to be revalidated. */
if (split)
goto repeat;
/* Prepare right boundary */
len = count;
if (len > e1_blk + e1_len - lblk1)
len = e1_blk + e1_len - lblk1;
if (len > e2_blk + e2_len - lblk2)
len = e2_blk + e2_len - lblk2;
if (len != e1_len) {
split = 1;
*erp = ext4_force_split_extent_at(handle, inode1,
path1, lblk1 + len, 0);
if (*erp)
break;
}
if (len != e2_len) {
split = 1;
*erp = ext4_force_split_extent_at(handle, inode2,
path2, lblk2 + len, 0);
if (*erp)
break;
}
/* ext4_split_extent_at() may retult in leaf extent split,
* path must to be revalidated. */
if (split)
goto repeat;
BUG_ON(e2_len != e1_len);
*erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth);
if (*erp)
break;
*erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth);
if (*erp)
break;
/* Both extents are fully inside boundaries. Swap it now */
tmp_ex = *ex1;
ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2));
ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex));
ex1->ee_len = cpu_to_le16(e2_len);
ex2->ee_len = cpu_to_le16(e1_len);
if (unwritten)
ext4_ext_mark_unwritten(ex2);
if (ext4_ext_is_unwritten(&tmp_ex))
ext4_ext_mark_unwritten(ex1);
ext4_ext_try_to_merge(handle, inode2, path2, ex2);
ext4_ext_try_to_merge(handle, inode1, path1, ex1);
*erp = ext4_ext_dirty(handle, inode2, path2 +
path2->p_depth);
if (*erp)
break;
*erp = ext4_ext_dirty(handle, inode1, path1 +
path1->p_depth);
/*
* Looks scarry ah..? second inode already points to new blocks,
* and it was successfully dirtied. But luckily error may happen
* only due to journal error, so full transaction will be
* aborted anyway.
*/
if (*erp)
break;
lblk1 += len;
lblk2 += len;
replaced_count += len;
count -= len;
repeat:
if (path1) {
ext4_ext_drop_refs(path1);
kfree(path1);
path1 = NULL;
}
if (path2) {
ext4_ext_drop_refs(path2);
kfree(path2);
path2 = NULL;
}
}
if (path1) {
ext4_ext_drop_refs(path1);
kfree(path1);
}
if (path2) {
ext4_ext_drop_refs(path2);
kfree(path2);
}
return replaced_count;
}

File diff suppressed because it is too large Load diff