mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-11-01 08:58:07 +00:00
XFS Bug fixes for 6.12-rc4
* Fix integer overflow in xrep_bmap * Fix stale dealloc punching for COW IO Signed-off-by: Carlos Maiolino <cem@kernel.org> -----BEGIN PGP SIGNATURE----- iJUEABMJAB0WIQQMHYkcUKcy4GgPe2RGdaER5QtfpgUCZw5LIwAKCRBGdaER5Qtf puRlAYDezbvs1dDSkKIGOt3inGdLptNAu4qniXBUkbYI9BzmtIVDueWP4Wo0dV3d gu3xrWQBfjFXdmEuBlwLuAFrp07AN18BVMj+DWCiEShsPHSoSPcF/IrDiz4BHvGv MKYq9CywFw== =Gj9b -----END PGP SIGNATURE----- Merge tag 'xfs-6.12-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux Pull xfs fixes from Carlos Maiolino: - Fix integer overflow in xrep_bmap - Fix stale dealloc punching for COW IO * tag 'xfs-6.12-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: xfs: punch delalloc extents from the COW fork for COW writes xfs: set IOMAP_F_SHARED for all COW fork allocations xfs: share more code in xfs_buffered_write_iomap_begin xfs: support the COW fork in xfs_bmap_punch_delalloc_range xfs: IOMAP_ZERO and IOMAP_UNSHARE already hold invalidate_lock xfs: take XFS_MMAPLOCK_EXCL xfs_file_write_zero_eof xfs: factor out a xfs_file_write_zero_eof helper iomap: move locking out of iomap_write_delalloc_release iomap: remove iomap_file_buffered_write_punch_delalloc iomap: factor out a iomap_last_written_block helper xfs: fix integer overflow in xrep_bmap
This commit is contained in:
commit
568570fdf2
9 changed files with 199 additions and 165 deletions
|
@ -208,7 +208,7 @@ The filesystem must arrange to `cancel
|
|||
such `reservations
|
||||
<https://lore.kernel.org/linux-xfs/20220817093627.GZ3600936@dread.disaster.area/>`_
|
||||
because writeback will not consume the reservation.
|
||||
The ``iomap_file_buffered_write_punch_delalloc`` can be called from a
|
||||
The ``iomap_write_delalloc_release`` can be called from a
|
||||
``->iomap_end`` function to find all the clean areas of the folios
|
||||
caching a fresh (``IOMAP_F_NEW``) delalloc mapping.
|
||||
It takes the ``invalidate_lock``.
|
||||
|
|
|
@ -1145,10 +1145,36 @@ static void iomap_write_delalloc_scan(struct inode *inode,
|
|||
}
|
||||
|
||||
/*
|
||||
* When a short write occurs, the filesystem might need to use ->iomap_end
|
||||
* to remove space reservations created in ->iomap_begin.
|
||||
*
|
||||
* For filesystems that use delayed allocation, there can be dirty pages over
|
||||
* the delalloc extent outside the range of a short write but still within the
|
||||
* delalloc extent allocated for this iomap if the write raced with page
|
||||
* faults.
|
||||
*
|
||||
* Punch out all the delalloc blocks in the range given except for those that
|
||||
* have dirty data still pending in the page cache - those are going to be
|
||||
* written and so must still retain the delalloc backing for writeback.
|
||||
*
|
||||
* The punch() callback *must* only punch delalloc extents in the range passed
|
||||
* to it. It must skip over all other types of extents in the range and leave
|
||||
* them completely unchanged. It must do this punch atomically with respect to
|
||||
* other extent modifications.
|
||||
*
|
||||
* The punch() callback may be called with a folio locked to prevent writeback
|
||||
* extent allocation racing at the edge of the range we are currently punching.
|
||||
* The locked folio may or may not cover the range being punched, so it is not
|
||||
* safe for the punch() callback to lock folios itself.
|
||||
*
|
||||
* Lock order is:
|
||||
*
|
||||
* inode->i_rwsem (shared or exclusive)
|
||||
* inode->i_mapping->invalidate_lock (exclusive)
|
||||
* folio_lock()
|
||||
* ->punch
|
||||
* internal filesystem allocation lock
|
||||
*
|
||||
* As we are scanning the page cache for data, we don't need to reimplement the
|
||||
* wheel - mapping_seek_hole_data() does exactly what we need to identify the
|
||||
* start and end of data ranges correctly even for sub-folio block sizes. This
|
||||
|
@ -1177,7 +1203,7 @@ static void iomap_write_delalloc_scan(struct inode *inode,
|
|||
* require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose
|
||||
* the code to subtle off-by-one bugs....
|
||||
*/
|
||||
static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
|
||||
void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
|
||||
loff_t end_byte, unsigned flags, struct iomap *iomap,
|
||||
iomap_punch_t punch)
|
||||
{
|
||||
|
@ -1185,12 +1211,13 @@ static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
|
|||
loff_t scan_end_byte = min(i_size_read(inode), end_byte);
|
||||
|
||||
/*
|
||||
* Lock the mapping to avoid races with page faults re-instantiating
|
||||
* folios and dirtying them via ->page_mkwrite whilst we walk the
|
||||
* cache and perform delalloc extent removal. Failing to do this can
|
||||
* leave dirty pages with no space reservation in the cache.
|
||||
* The caller must hold invalidate_lock to avoid races with page faults
|
||||
* re-instantiating folios and dirtying them via ->page_mkwrite whilst
|
||||
* we walk the cache and perform delalloc extent removal. Failing to do
|
||||
* this can leave dirty pages with no space reservation in the cache.
|
||||
*/
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
lockdep_assert_held_write(&inode->i_mapping->invalidate_lock);
|
||||
|
||||
while (start_byte < scan_end_byte) {
|
||||
loff_t data_end;
|
||||
|
||||
|
@ -1207,7 +1234,7 @@ static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
|
|||
if (start_byte == -ENXIO || start_byte == scan_end_byte)
|
||||
break;
|
||||
if (WARN_ON_ONCE(start_byte < 0))
|
||||
goto out_unlock;
|
||||
return;
|
||||
WARN_ON_ONCE(start_byte < punch_start_byte);
|
||||
WARN_ON_ONCE(start_byte > scan_end_byte);
|
||||
|
||||
|
@ -1218,7 +1245,7 @@ static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
|
|||
data_end = mapping_seek_hole_data(inode->i_mapping, start_byte,
|
||||
scan_end_byte, SEEK_HOLE);
|
||||
if (WARN_ON_ONCE(data_end < 0))
|
||||
goto out_unlock;
|
||||
return;
|
||||
|
||||
/*
|
||||
* If we race with post-direct I/O invalidation of the page cache,
|
||||
|
@ -1240,74 +1267,8 @@ static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
|
|||
if (punch_start_byte < end_byte)
|
||||
punch(inode, punch_start_byte, end_byte - punch_start_byte,
|
||||
iomap);
|
||||
out_unlock:
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
}
|
||||
|
||||
/*
|
||||
* When a short write occurs, the filesystem may need to remove reserved space
|
||||
* that was allocated in ->iomap_begin from it's ->iomap_end method. For
|
||||
* filesystems that use delayed allocation, we need to punch out delalloc
|
||||
* extents from the range that are not dirty in the page cache. As the write can
|
||||
* race with page faults, there can be dirty pages over the delalloc extent
|
||||
* outside the range of a short write but still within the delalloc extent
|
||||
* allocated for this iomap.
|
||||
*
|
||||
* This function uses [start_byte, end_byte) intervals (i.e. open ended) to
|
||||
* simplify range iterations.
|
||||
*
|
||||
* The punch() callback *must* only punch delalloc extents in the range passed
|
||||
* to it. It must skip over all other types of extents in the range and leave
|
||||
* them completely unchanged. It must do this punch atomically with respect to
|
||||
* other extent modifications.
|
||||
*
|
||||
* The punch() callback may be called with a folio locked to prevent writeback
|
||||
* extent allocation racing at the edge of the range we are currently punching.
|
||||
* The locked folio may or may not cover the range being punched, so it is not
|
||||
* safe for the punch() callback to lock folios itself.
|
||||
*
|
||||
* Lock order is:
|
||||
*
|
||||
* inode->i_rwsem (shared or exclusive)
|
||||
* inode->i_mapping->invalidate_lock (exclusive)
|
||||
* folio_lock()
|
||||
* ->punch
|
||||
* internal filesystem allocation lock
|
||||
*/
|
||||
void iomap_file_buffered_write_punch_delalloc(struct inode *inode,
|
||||
loff_t pos, loff_t length, ssize_t written, unsigned flags,
|
||||
struct iomap *iomap, iomap_punch_t punch)
|
||||
{
|
||||
loff_t start_byte;
|
||||
loff_t end_byte;
|
||||
unsigned int blocksize = i_blocksize(inode);
|
||||
|
||||
if (iomap->type != IOMAP_DELALLOC)
|
||||
return;
|
||||
|
||||
/* If we didn't reserve the blocks, we're not allowed to punch them. */
|
||||
if (!(iomap->flags & IOMAP_F_NEW))
|
||||
return;
|
||||
|
||||
/*
|
||||
* start_byte refers to the first unused block after a short write. If
|
||||
* nothing was written, round offset down to point at the first block in
|
||||
* the range.
|
||||
*/
|
||||
if (unlikely(!written))
|
||||
start_byte = round_down(pos, blocksize);
|
||||
else
|
||||
start_byte = round_up(pos + written, blocksize);
|
||||
end_byte = round_up(pos + length, blocksize);
|
||||
|
||||
/* Nothing to do if we've written the entire delalloc extent */
|
||||
if (start_byte >= end_byte)
|
||||
return;
|
||||
|
||||
iomap_write_delalloc_release(inode, start_byte, end_byte, flags, iomap,
|
||||
punch);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc);
|
||||
EXPORT_SYMBOL_GPL(iomap_write_delalloc_release);
|
||||
|
||||
static loff_t iomap_unshare_iter(struct iomap_iter *iter)
|
||||
{
|
||||
|
|
|
@ -801,7 +801,7 @@ xrep_bmap(
|
|||
{
|
||||
struct xrep_bmap *rb;
|
||||
char *descr;
|
||||
unsigned int max_bmbt_recs;
|
||||
xfs_extnum_t max_bmbt_recs;
|
||||
bool large_extcount;
|
||||
int error = 0;
|
||||
|
||||
|
|
|
@ -116,7 +116,7 @@ xfs_end_ioend(
|
|||
if (unlikely(error)) {
|
||||
if (ioend->io_flags & IOMAP_F_SHARED) {
|
||||
xfs_reflink_cancel_cow_range(ip, offset, size, true);
|
||||
xfs_bmap_punch_delalloc_range(ip, offset,
|
||||
xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, offset,
|
||||
offset + size);
|
||||
}
|
||||
goto done;
|
||||
|
@ -456,7 +456,7 @@ xfs_discard_folio(
|
|||
* byte of the next folio. Hence the end offset is only dependent on the
|
||||
* folio itself and not the start offset that is passed in.
|
||||
*/
|
||||
xfs_bmap_punch_delalloc_range(ip, pos,
|
||||
xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, pos,
|
||||
folio_pos(folio) + folio_size(folio));
|
||||
}
|
||||
|
||||
|
|
|
@ -442,11 +442,12 @@ xfs_getbmap(
|
|||
void
|
||||
xfs_bmap_punch_delalloc_range(
|
||||
struct xfs_inode *ip,
|
||||
int whichfork,
|
||||
xfs_off_t start_byte,
|
||||
xfs_off_t end_byte)
|
||||
{
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
struct xfs_ifork *ifp = &ip->i_df;
|
||||
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
|
||||
xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte);
|
||||
xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte);
|
||||
struct xfs_bmbt_irec got, del;
|
||||
|
@ -474,11 +475,14 @@ xfs_bmap_punch_delalloc_range(
|
|||
continue;
|
||||
}
|
||||
|
||||
xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur, &got, &del);
|
||||
xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got, &del);
|
||||
if (!xfs_iext_get_extent(ifp, &icur, &got))
|
||||
break;
|
||||
}
|
||||
|
||||
if (whichfork == XFS_COW_FORK && !ifp->if_bytes)
|
||||
xfs_inode_clear_cowblocks_tag(ip);
|
||||
|
||||
out_unlock:
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
|
@ -580,7 +584,7 @@ xfs_free_eofblocks(
|
|||
*/
|
||||
if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) {
|
||||
if (ip->i_delayed_blks) {
|
||||
xfs_bmap_punch_delalloc_range(ip,
|
||||
xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK,
|
||||
round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize),
|
||||
LLONG_MAX);
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap)
|
|||
}
|
||||
#endif /* CONFIG_XFS_RT */
|
||||
|
||||
void xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
|
||||
void xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, int whichfork,
|
||||
xfs_off_t start_byte, xfs_off_t end_byte);
|
||||
|
||||
struct kgetbmap {
|
||||
|
|
|
@ -347,10 +347,83 @@ xfs_file_splice_read(
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Take care of zeroing post-EOF blocks when they might exist.
|
||||
*
|
||||
* Returns 0 if successfully, a negative error for a failure, or 1 if this
|
||||
* function dropped the iolock and reacquired it exclusively and the caller
|
||||
* needs to restart the write sanity checks.
|
||||
*/
|
||||
static ssize_t
|
||||
xfs_file_write_zero_eof(
|
||||
struct kiocb *iocb,
|
||||
struct iov_iter *from,
|
||||
unsigned int *iolock,
|
||||
size_t count,
|
||||
bool *drained_dio)
|
||||
{
|
||||
struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host);
|
||||
loff_t isize;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* We need to serialise against EOF updates that occur in IO completions
|
||||
* here. We want to make sure that nobody is changing the size while
|
||||
* we do this check until we have placed an IO barrier (i.e. hold
|
||||
* XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. The
|
||||
* spinlock effectively forms a memory barrier once we have
|
||||
* XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and
|
||||
* hence be able to correctly determine if we need to run zeroing.
|
||||
*/
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
isize = i_size_read(VFS_I(ip));
|
||||
if (iocb->ki_pos <= isize) {
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
return 0;
|
||||
}
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
|
||||
if (iocb->ki_flags & IOCB_NOWAIT)
|
||||
return -EAGAIN;
|
||||
|
||||
if (!*drained_dio) {
|
||||
/*
|
||||
* If zeroing is needed and we are currently holding the iolock
|
||||
* shared, we need to update it to exclusive which implies
|
||||
* having to redo all checks before.
|
||||
*/
|
||||
if (*iolock == XFS_IOLOCK_SHARED) {
|
||||
xfs_iunlock(ip, *iolock);
|
||||
*iolock = XFS_IOLOCK_EXCL;
|
||||
xfs_ilock(ip, *iolock);
|
||||
iov_iter_reexpand(from, count);
|
||||
}
|
||||
|
||||
/*
|
||||
* We now have an IO submission barrier in place, but AIO can do
|
||||
* EOF updates during IO completion and hence we now need to
|
||||
* wait for all of them to drain. Non-AIO DIO will have drained
|
||||
* before we are given the XFS_IOLOCK_EXCL, and so for most
|
||||
* cases this wait is a no-op.
|
||||
*/
|
||||
inode_dio_wait(VFS_I(ip));
|
||||
*drained_dio = true;
|
||||
return 1;
|
||||
}
|
||||
|
||||
trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
|
||||
|
||||
xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
|
||||
error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL);
|
||||
xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Common pre-write limit and setup checks.
|
||||
*
|
||||
* Called with the iolocked held either shared and exclusive according to
|
||||
* Called with the iolock held either shared and exclusive according to
|
||||
* @iolock, and returns with it held. Might upgrade the iolock to exclusive
|
||||
* if called for a direct write beyond i_size.
|
||||
*/
|
||||
|
@ -360,13 +433,10 @@ xfs_file_write_checks(
|
|||
struct iov_iter *from,
|
||||
unsigned int *iolock)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
ssize_t error = 0;
|
||||
struct inode *inode = iocb->ki_filp->f_mapping->host;
|
||||
size_t count = iov_iter_count(from);
|
||||
bool drained_dio = false;
|
||||
loff_t isize;
|
||||
ssize_t error;
|
||||
|
||||
restart:
|
||||
error = generic_write_checks(iocb, from);
|
||||
|
@ -389,7 +459,7 @@ xfs_file_write_checks(
|
|||
* exclusively.
|
||||
*/
|
||||
if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) {
|
||||
xfs_iunlock(ip, *iolock);
|
||||
xfs_iunlock(XFS_I(inode), *iolock);
|
||||
*iolock = XFS_IOLOCK_EXCL;
|
||||
error = xfs_ilock_iocb(iocb, *iolock);
|
||||
if (error) {
|
||||
|
@ -400,64 +470,24 @@ xfs_file_write_checks(
|
|||
}
|
||||
|
||||
/*
|
||||
* If the offset is beyond the size of the file, we need to zero any
|
||||
* If the offset is beyond the size of the file, we need to zero all
|
||||
* blocks that fall between the existing EOF and the start of this
|
||||
* write. If zeroing is needed and we are currently holding the iolock
|
||||
* shared, we need to update it to exclusive which implies having to
|
||||
* redo all checks before.
|
||||
* write.
|
||||
*
|
||||
* We need to serialise against EOF updates that occur in IO completions
|
||||
* here. We want to make sure that nobody is changing the size while we
|
||||
* do this check until we have placed an IO barrier (i.e. hold the
|
||||
* XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. The
|
||||
* spinlock effectively forms a memory barrier once we have the
|
||||
* XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and
|
||||
* hence be able to correctly determine if we need to run zeroing.
|
||||
*
|
||||
* We can do an unlocked check here safely as IO completion can only
|
||||
* extend EOF. Truncate is locked out at this point, so the EOF can
|
||||
* not move backwards, only forwards. Hence we only need to take the
|
||||
* slow path and spin locks when we are at or beyond the current EOF.
|
||||
* We can do an unlocked check for i_size here safely as I/O completion
|
||||
* can only extend EOF. Truncate is locked out at this point, so the
|
||||
* EOF can not move backwards, only forwards. Hence we only need to take
|
||||
* the slow path when we are at or beyond the current EOF.
|
||||
*/
|
||||
if (iocb->ki_pos <= i_size_read(inode))
|
||||
goto out;
|
||||
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
isize = i_size_read(inode);
|
||||
if (iocb->ki_pos > isize) {
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
|
||||
if (iocb->ki_flags & IOCB_NOWAIT)
|
||||
return -EAGAIN;
|
||||
|
||||
if (!drained_dio) {
|
||||
if (*iolock == XFS_IOLOCK_SHARED) {
|
||||
xfs_iunlock(ip, *iolock);
|
||||
*iolock = XFS_IOLOCK_EXCL;
|
||||
xfs_ilock(ip, *iolock);
|
||||
iov_iter_reexpand(from, count);
|
||||
}
|
||||
/*
|
||||
* We now have an IO submission barrier in place, but
|
||||
* AIO can do EOF updates during IO completion and hence
|
||||
* we now need to wait for all of them to drain. Non-AIO
|
||||
* DIO will have drained before we are given the
|
||||
* XFS_IOLOCK_EXCL, and so for most cases this wait is a
|
||||
* no-op.
|
||||
*/
|
||||
inode_dio_wait(inode);
|
||||
drained_dio = true;
|
||||
if (iocb->ki_pos > i_size_read(inode)) {
|
||||
error = xfs_file_write_zero_eof(iocb, from, iolock, count,
|
||||
&drained_dio);
|
||||
if (error == 1)
|
||||
goto restart;
|
||||
}
|
||||
|
||||
trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
|
||||
error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL);
|
||||
if (error)
|
||||
return error;
|
||||
} else
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
}
|
||||
|
||||
out:
|
||||
return kiocb_modified(iocb);
|
||||
}
|
||||
|
||||
|
|
|
@ -975,6 +975,7 @@ xfs_buffered_write_iomap_begin(
|
|||
int allocfork = XFS_DATA_FORK;
|
||||
int error = 0;
|
||||
unsigned int lockmode = XFS_ILOCK_EXCL;
|
||||
unsigned int iomap_flags = 0;
|
||||
u64 seq;
|
||||
|
||||
if (xfs_is_shutdown(mp))
|
||||
|
@ -1145,6 +1146,11 @@ xfs_buffered_write_iomap_begin(
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
|
||||
* them out if the write happens to fail.
|
||||
*/
|
||||
iomap_flags |= IOMAP_F_NEW;
|
||||
if (allocfork == XFS_COW_FORK) {
|
||||
error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
|
||||
end_fsb - offset_fsb, prealloc_blocks, &cmap,
|
||||
|
@ -1162,19 +1168,11 @@ xfs_buffered_write_iomap_begin(
|
|||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
|
||||
* them out if the write happens to fail.
|
||||
*/
|
||||
seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW);
|
||||
xfs_iunlock(ip, lockmode);
|
||||
trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq);
|
||||
|
||||
found_imap:
|
||||
seq = xfs_iomap_inode_sequence(ip, 0);
|
||||
seq = xfs_iomap_inode_sequence(ip, iomap_flags);
|
||||
xfs_iunlock(ip, lockmode);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq);
|
||||
|
||||
convert_delay:
|
||||
xfs_iunlock(ip, lockmode);
|
||||
|
@ -1188,20 +1186,20 @@ xfs_buffered_write_iomap_begin(
|
|||
return 0;
|
||||
|
||||
found_cow:
|
||||
seq = xfs_iomap_inode_sequence(ip, 0);
|
||||
if (imap.br_startoff <= offset_fsb) {
|
||||
error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq);
|
||||
error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0,
|
||||
xfs_iomap_inode_sequence(ip, 0));
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
|
||||
xfs_iunlock(ip, lockmode);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
|
||||
IOMAP_F_SHARED, seq);
|
||||
} else {
|
||||
xfs_trim_extent(&cmap, offset_fsb,
|
||||
imap.br_startoff - offset_fsb);
|
||||
}
|
||||
|
||||
xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb);
|
||||
iomap_flags |= IOMAP_F_SHARED;
|
||||
seq = xfs_iomap_inode_sequence(ip, iomap_flags);
|
||||
xfs_iunlock(ip, lockmode);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, iomap_flags, seq);
|
||||
|
||||
out_unlock:
|
||||
xfs_iunlock(ip, lockmode);
|
||||
|
@ -1215,7 +1213,10 @@ xfs_buffered_write_delalloc_punch(
|
|||
loff_t length,
|
||||
struct iomap *iomap)
|
||||
{
|
||||
xfs_bmap_punch_delalloc_range(XFS_I(inode), offset, offset + length);
|
||||
xfs_bmap_punch_delalloc_range(XFS_I(inode),
|
||||
(iomap->flags & IOMAP_F_SHARED) ?
|
||||
XFS_COW_FORK : XFS_DATA_FORK,
|
||||
offset, offset + length);
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -1227,8 +1228,30 @@ xfs_buffered_write_iomap_end(
|
|||
unsigned flags,
|
||||
struct iomap *iomap)
|
||||
{
|
||||
iomap_file_buffered_write_punch_delalloc(inode, offset, length, written,
|
||||
flags, iomap, &xfs_buffered_write_delalloc_punch);
|
||||
loff_t start_byte, end_byte;
|
||||
|
||||
/* If we didn't reserve the blocks, we're not allowed to punch them. */
|
||||
if (iomap->type != IOMAP_DELALLOC || !(iomap->flags & IOMAP_F_NEW))
|
||||
return 0;
|
||||
|
||||
/* Nothing to do if we've written the entire delalloc extent */
|
||||
start_byte = iomap_last_written_block(inode, offset, written);
|
||||
end_byte = round_up(offset + length, i_blocksize(inode));
|
||||
if (start_byte >= end_byte)
|
||||
return 0;
|
||||
|
||||
/* For zeroing operations the callers already hold invalidate_lock. */
|
||||
if (flags & (IOMAP_UNSHARE | IOMAP_ZERO)) {
|
||||
rwsem_assert_held_write(&inode->i_mapping->invalidate_lock);
|
||||
iomap_write_delalloc_release(inode, start_byte, end_byte, flags,
|
||||
iomap, xfs_buffered_write_delalloc_punch);
|
||||
} else {
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
iomap_write_delalloc_release(inode, start_byte, end_byte, flags,
|
||||
iomap, xfs_buffered_write_delalloc_punch);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1435,6 +1458,8 @@ xfs_zero_range(
|
|||
{
|
||||
struct inode *inode = VFS_I(ip);
|
||||
|
||||
xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL);
|
||||
|
||||
if (IS_DAX(inode))
|
||||
return dax_zero_range(inode, pos, len, did_zero,
|
||||
&xfs_dax_write_iomap_ops);
|
||||
|
|
|
@ -256,6 +256,20 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i)
|
|||
return &i->iomap;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the file offset for the first unchanged block after a short write.
|
||||
*
|
||||
* If nothing was written, round @pos down to point at the first block in
|
||||
* the range, else round up to include the partially written block.
|
||||
*/
|
||||
static inline loff_t iomap_last_written_block(struct inode *inode, loff_t pos,
|
||||
ssize_t written)
|
||||
{
|
||||
if (unlikely(!written))
|
||||
return round_down(pos, i_blocksize(inode));
|
||||
return round_up(pos + written, i_blocksize(inode));
|
||||
}
|
||||
|
||||
ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
|
||||
const struct iomap_ops *ops, void *private);
|
||||
int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
|
||||
|
@ -276,9 +290,9 @@ vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf,
|
|||
|
||||
typedef void (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length,
|
||||
struct iomap *iomap);
|
||||
void iomap_file_buffered_write_punch_delalloc(struct inode *inode, loff_t pos,
|
||||
loff_t length, ssize_t written, unsigned flag,
|
||||
struct iomap *iomap, iomap_punch_t punch);
|
||||
void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
|
||||
loff_t end_byte, unsigned flags, struct iomap *iomap,
|
||||
iomap_punch_t punch);
|
||||
|
||||
int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
u64 start, u64 len, const struct iomap_ops *ops);
|
||||
|
|
Loading…
Reference in a new issue