mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-10-03 23:58:05 +00:00
New code for 5.10:
- Don't WARN_ON weird states that unprivileged users can create. - Don't invalidate page cache when direct writes want to fall back to buffered. - Fix some problems when readahead ios fail. - Fix a problem where inline data pages weren't getting flushed during an unshare operation. - Rework iomap to support arbitrarily many blocks per page in preparation to support THP for the page cache. - Fix a bug in the blocksize < pagesize buffered io path where we could fail to initialize the many-blocks-per-page uptodate bitmap correctly when the backing page is actually up to date. This could cause us to forget to write out dirty pages. - Split out the generic_write_sync at the end of the directio write path so that btrfs can drop the inode lock before sync'ing the file. - Call inode_dio_end before trying to sync the file after a O_DSYNC direct write (instead of afterwards) to match the behavior of the old directio code. -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEUzaAxoMeQq6m2jMV+H93GTRKtOsFAl9yB04ACgkQ+H93GTRK tOuZxw/+IrBV3HV45PtqQX+HC2F4ebax26cIJrmCQD0neiu16I7H3COjIGN/YOGw bN04VirC3bG4BtzVHO/eRHQOCwCevIpP3LkhT6yOfOgkO4Z9Xn/O7E+7uYtgT5Qi dBqOFe/aoB6+uHEHaioWUTxF1MlsVqEK/yPWjbSIdQGKFVE03Azj4V5QHtBouF2+ pNEk7lbBnF0ua3biambeyDO3JTR9dsziIPH8QzQ4M/fMuNLfR2v0s6d4Ol/ndVrC Lp3RtThLcioAXh8xSPMO6RVUFfK97SLgNCRngApFbIJn85z9yq7eI7llnhO+XcHF FBJ+XottlwJFDt+0xNUaHmjkfUH9GoK8VeFOd3zHvp6xgZZpDkjG2JJk9ZC8Qnn5 xg4grGngWshNdxFBf8S/O73bAJ1SyRcD5ePYGyMfiij3beGJ0aulKGoYOdDfC/4c hHcUc8XpjHSobg5gklQijBif0WIQos1Z4OyDK9d2LqrJOO0NUypO/t2YIdgPFzkj rXLmWlKsUYSZyefI5Z8q0AVy7TQGxstS9poC3lkXlsszQ1E5BNup0/bhCGTgCW+5 az9m41KXxPEDLxieOvIAUhHSSP02IAGQ9Lvvat1GnGfEqShAEWS/IvmIxHDbvyNW lZ0NLqNKsItKBH0oIPsrP7fHz2ES1hUIMIaLbApUwKpUcAxrCLY= =ocIt -----END PGP SIGNATURE----- Merge tag 'iomap-5.10-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux Pull iomap updates from Darrick Wong: "There's not a lot of new stuff going on here -- a little bit of code refactoring to make iomap workable with btrfs' fsync locking model, cleanups in preparation for adding THP support for filesystems, and fixing a data corruption issue for blocksize < pagesize filesystems. Summary: - Don't WARN_ON weird states that unprivileged users can create. - Don't invalidate page cache when direct writes want to fall back to buffered. - Fix some problems when readahead ios fail. - Fix a problem where inline data pages weren't getting flushed during an unshare operation. - Rework iomap to support arbitrarily many blocks per page in preparation to support THP for the page cache. - Fix a bug in the blocksize < pagesize buffered io path where we could fail to initialize the many-blocks-per-page uptodate bitmap correctly when the backing page is actually up to date. This could cause us to forget to write out dirty pages. - Split out the generic_write_sync at the end of the directio write path so that btrfs can drop the inode lock before sync'ing the file. - Call inode_dio_end before trying to sync the file after a O_DSYNC direct write (instead of afterwards) to match the behavior of the old directio code" * tag 'iomap-5.10-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: iomap: Call inode_dio_end() before generic_write_sync() iomap: Allow filesystem to call iomap_dio_complete without i_rwsem iomap: Set all uptodate bits for an Uptodate page iomap: Change calling convention for zeroing iomap: Convert iomap_write_end types iomap: Convert write_count to write_bytes_pending iomap: Convert read_count to read_bytes_pending iomap: Support arbitrarily many blocks per page iomap: Use bitmap ops to set uptodate bits iomap: Use kzalloc to allocate iomap_page fs: Introduce i_blocks_per_page iomap: Fix misplaced page flushing iomap: Use round_down/round_up macros in __iomap_write_begin iomap: Mark read blocks uptodate in write_begin iomap: Clear page error before beginning a write iomap: Fix direct I/O write consistency check iomap: fix WARN_ON_ONCE() from unprivileged users
This commit is contained in:
commit
37187df45a
8 changed files with 150 additions and 134 deletions
13
fs/dax.c
13
fs/dax.c
|
@ -1037,18 +1037,18 @@ static vm_fault_t dax_load_hole(struct xa_state *xas,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
|
s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap)
|
||||||
struct iomap *iomap)
|
|
||||||
{
|
{
|
||||||
sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
|
sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
|
||||||
pgoff_t pgoff;
|
pgoff_t pgoff;
|
||||||
long rc, id;
|
long rc, id;
|
||||||
void *kaddr;
|
void *kaddr;
|
||||||
bool page_aligned = false;
|
bool page_aligned = false;
|
||||||
|
unsigned offset = offset_in_page(pos);
|
||||||
|
unsigned size = min_t(u64, PAGE_SIZE - offset, length);
|
||||||
|
|
||||||
if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) &&
|
if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) &&
|
||||||
IS_ALIGNED(size, PAGE_SIZE))
|
(size == PAGE_SIZE))
|
||||||
page_aligned = true;
|
page_aligned = true;
|
||||||
|
|
||||||
rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff);
|
rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff);
|
||||||
|
@ -1058,8 +1058,7 @@ int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
|
||||||
id = dax_read_lock();
|
id = dax_read_lock();
|
||||||
|
|
||||||
if (page_aligned)
|
if (page_aligned)
|
||||||
rc = dax_zero_page_range(iomap->dax_dev, pgoff,
|
rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
|
||||||
size >> PAGE_SHIFT);
|
|
||||||
else
|
else
|
||||||
rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL);
|
rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL);
|
||||||
if (rc < 0) {
|
if (rc < 0) {
|
||||||
|
@ -1072,7 +1071,7 @@ int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
|
||||||
dax_flush(iomap->dax_dev, kaddr + offset, size);
|
dax_flush(iomap->dax_dev, kaddr + offset, size);
|
||||||
}
|
}
|
||||||
dax_read_unlock(id);
|
dax_read_unlock(id);
|
||||||
return 0;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
static loff_t
|
static loff_t
|
||||||
|
|
|
@ -22,18 +22,25 @@
|
||||||
#include "../internal.h"
|
#include "../internal.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Structure allocated for each page when block size < PAGE_SIZE to track
|
* Structure allocated for each page or THP when block size < page size
|
||||||
* sub-page uptodate status and I/O completions.
|
* to track sub-page uptodate status and I/O completions.
|
||||||
*/
|
*/
|
||||||
struct iomap_page {
|
struct iomap_page {
|
||||||
atomic_t read_count;
|
atomic_t read_bytes_pending;
|
||||||
atomic_t write_count;
|
atomic_t write_bytes_pending;
|
||||||
spinlock_t uptodate_lock;
|
spinlock_t uptodate_lock;
|
||||||
DECLARE_BITMAP(uptodate, PAGE_SIZE / 512);
|
unsigned long uptodate[];
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct iomap_page *to_iomap_page(struct page *page)
|
static inline struct iomap_page *to_iomap_page(struct page *page)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* per-block data is stored in the head page. Callers should
|
||||||
|
* not be dealing with tail pages (and if they are, they can
|
||||||
|
* call thp_head() first.
|
||||||
|
*/
|
||||||
|
VM_BUG_ON_PGFLAGS(PageTail(page), page);
|
||||||
|
|
||||||
if (page_has_private(page))
|
if (page_has_private(page))
|
||||||
return (struct iomap_page *)page_private(page);
|
return (struct iomap_page *)page_private(page);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -45,20 +52,16 @@ static struct iomap_page *
|
||||||
iomap_page_create(struct inode *inode, struct page *page)
|
iomap_page_create(struct inode *inode, struct page *page)
|
||||||
{
|
{
|
||||||
struct iomap_page *iop = to_iomap_page(page);
|
struct iomap_page *iop = to_iomap_page(page);
|
||||||
|
unsigned int nr_blocks = i_blocks_per_page(inode, page);
|
||||||
|
|
||||||
if (iop || i_blocksize(inode) == PAGE_SIZE)
|
if (iop || nr_blocks <= 1)
|
||||||
return iop;
|
return iop;
|
||||||
|
|
||||||
iop = kmalloc(sizeof(*iop), GFP_NOFS | __GFP_NOFAIL);
|
iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)),
|
||||||
atomic_set(&iop->read_count, 0);
|
GFP_NOFS | __GFP_NOFAIL);
|
||||||
atomic_set(&iop->write_count, 0);
|
|
||||||
spin_lock_init(&iop->uptodate_lock);
|
spin_lock_init(&iop->uptodate_lock);
|
||||||
bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
|
if (PageUptodate(page))
|
||||||
|
bitmap_fill(iop->uptodate, nr_blocks);
|
||||||
/*
|
|
||||||
* migrate_page_move_mapping() assumes that pages with private data have
|
|
||||||
* their count elevated by 1.
|
|
||||||
*/
|
|
||||||
attach_page_private(page, iop);
|
attach_page_private(page, iop);
|
||||||
return iop;
|
return iop;
|
||||||
}
|
}
|
||||||
|
@ -67,11 +70,14 @@ static void
|
||||||
iomap_page_release(struct page *page)
|
iomap_page_release(struct page *page)
|
||||||
{
|
{
|
||||||
struct iomap_page *iop = detach_page_private(page);
|
struct iomap_page *iop = detach_page_private(page);
|
||||||
|
unsigned int nr_blocks = i_blocks_per_page(page->mapping->host, page);
|
||||||
|
|
||||||
if (!iop)
|
if (!iop)
|
||||||
return;
|
return;
|
||||||
WARN_ON_ONCE(atomic_read(&iop->read_count));
|
WARN_ON_ONCE(atomic_read(&iop->read_bytes_pending));
|
||||||
WARN_ON_ONCE(atomic_read(&iop->write_count));
|
WARN_ON_ONCE(atomic_read(&iop->write_bytes_pending));
|
||||||
|
WARN_ON_ONCE(bitmap_full(iop->uptodate, nr_blocks) !=
|
||||||
|
PageUptodate(page));
|
||||||
kfree(iop);
|
kfree(iop);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -142,19 +148,11 @@ iomap_iop_set_range_uptodate(struct page *page, unsigned off, unsigned len)
|
||||||
struct inode *inode = page->mapping->host;
|
struct inode *inode = page->mapping->host;
|
||||||
unsigned first = off >> inode->i_blkbits;
|
unsigned first = off >> inode->i_blkbits;
|
||||||
unsigned last = (off + len - 1) >> inode->i_blkbits;
|
unsigned last = (off + len - 1) >> inode->i_blkbits;
|
||||||
bool uptodate = true;
|
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
spin_lock_irqsave(&iop->uptodate_lock, flags);
|
spin_lock_irqsave(&iop->uptodate_lock, flags);
|
||||||
for (i = 0; i < PAGE_SIZE / i_blocksize(inode); i++) {
|
bitmap_set(iop->uptodate, first, last - first + 1);
|
||||||
if (i >= first && i <= last)
|
if (bitmap_full(iop->uptodate, i_blocks_per_page(inode, page)))
|
||||||
set_bit(i, iop->uptodate);
|
|
||||||
else if (!test_bit(i, iop->uptodate))
|
|
||||||
uptodate = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (uptodate)
|
|
||||||
SetPageUptodate(page);
|
SetPageUptodate(page);
|
||||||
spin_unlock_irqrestore(&iop->uptodate_lock, flags);
|
spin_unlock_irqrestore(&iop->uptodate_lock, flags);
|
||||||
}
|
}
|
||||||
|
@ -171,13 +169,6 @@ iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len)
|
||||||
SetPageUptodate(page);
|
SetPageUptodate(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
iomap_read_finish(struct iomap_page *iop, struct page *page)
|
|
||||||
{
|
|
||||||
if (!iop || atomic_dec_and_test(&iop->read_count))
|
|
||||||
unlock_page(page);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
iomap_read_page_end_io(struct bio_vec *bvec, int error)
|
iomap_read_page_end_io(struct bio_vec *bvec, int error)
|
||||||
{
|
{
|
||||||
|
@ -191,7 +182,8 @@ iomap_read_page_end_io(struct bio_vec *bvec, int error)
|
||||||
iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len);
|
iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
iomap_read_finish(iop, page);
|
if (!iop || atomic_sub_and_test(bvec->bv_len, &iop->read_bytes_pending))
|
||||||
|
unlock_page(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -271,30 +263,19 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx->cur_page_in_bio = true;
|
ctx->cur_page_in_bio = true;
|
||||||
|
if (iop)
|
||||||
|
atomic_add(plen, &iop->read_bytes_pending);
|
||||||
|
|
||||||
/*
|
/* Try to merge into a previous segment if we can */
|
||||||
* Try to merge into a previous segment if we can.
|
|
||||||
*/
|
|
||||||
sector = iomap_sector(iomap, pos);
|
sector = iomap_sector(iomap, pos);
|
||||||
if (ctx->bio && bio_end_sector(ctx->bio) == sector)
|
if (ctx->bio && bio_end_sector(ctx->bio) == sector) {
|
||||||
is_contig = true;
|
if (__bio_try_merge_page(ctx->bio, page, plen, poff,
|
||||||
|
&same_page))
|
||||||
if (is_contig &&
|
|
||||||
__bio_try_merge_page(ctx->bio, page, plen, poff, &same_page)) {
|
|
||||||
if (!same_page && iop)
|
|
||||||
atomic_inc(&iop->read_count);
|
|
||||||
goto done;
|
goto done;
|
||||||
|
is_contig = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
if (!is_contig || bio_full(ctx->bio, plen)) {
|
||||||
* If we start a new segment we need to increase the read count, and we
|
|
||||||
* need to do so before submitting any previous full bio to make sure
|
|
||||||
* that we don't prematurely unlock the page.
|
|
||||||
*/
|
|
||||||
if (iop)
|
|
||||||
atomic_inc(&iop->read_count);
|
|
||||||
|
|
||||||
if (!ctx->bio || !is_contig || bio_full(ctx->bio, plen)) {
|
|
||||||
gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
|
gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
|
||||||
gfp_t orig_gfp = gfp;
|
gfp_t orig_gfp = gfp;
|
||||||
int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||||
|
@ -571,13 +552,13 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
|
||||||
{
|
{
|
||||||
struct iomap_page *iop = iomap_page_create(inode, page);
|
struct iomap_page *iop = iomap_page_create(inode, page);
|
||||||
loff_t block_size = i_blocksize(inode);
|
loff_t block_size = i_blocksize(inode);
|
||||||
loff_t block_start = pos & ~(block_size - 1);
|
loff_t block_start = round_down(pos, block_size);
|
||||||
loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
|
loff_t block_end = round_up(pos + len, block_size);
|
||||||
unsigned from = offset_in_page(pos), to = from + len, poff, plen;
|
unsigned from = offset_in_page(pos), to = from + len, poff, plen;
|
||||||
int status;
|
|
||||||
|
|
||||||
if (PageUptodate(page))
|
if (PageUptodate(page))
|
||||||
return 0;
|
return 0;
|
||||||
|
ClearPageError(page);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
iomap_adjust_read_range(inode, iop, &block_start,
|
iomap_adjust_read_range(inode, iop, &block_start,
|
||||||
|
@ -594,14 +575,13 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
|
||||||
if (WARN_ON_ONCE(flags & IOMAP_WRITE_F_UNSHARE))
|
if (WARN_ON_ONCE(flags & IOMAP_WRITE_F_UNSHARE))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
zero_user_segments(page, poff, from, to, poff + plen);
|
zero_user_segments(page, poff, from, to, poff + plen);
|
||||||
iomap_set_range_uptodate(page, poff, plen);
|
} else {
|
||||||
continue;
|
int status = iomap_read_page_sync(block_start, page,
|
||||||
}
|
poff, plen, srcmap);
|
||||||
|
|
||||||
status = iomap_read_page_sync(block_start, page, poff, plen,
|
|
||||||
srcmap);
|
|
||||||
if (status)
|
if (status)
|
||||||
return status;
|
return status;
|
||||||
|
}
|
||||||
|
iomap_set_range_uptodate(page, poff, plen);
|
||||||
} while ((block_start += plen) < block_end);
|
} while ((block_start += plen) < block_end);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -685,9 +665,8 @@ iomap_set_page_dirty(struct page *page)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(iomap_set_page_dirty);
|
EXPORT_SYMBOL_GPL(iomap_set_page_dirty);
|
||||||
|
|
||||||
static int
|
static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
|
||||||
__iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
|
size_t copied, struct page *page)
|
||||||
unsigned copied, struct page *page)
|
|
||||||
{
|
{
|
||||||
flush_dcache_page(page);
|
flush_dcache_page(page);
|
||||||
|
|
||||||
|
@ -709,15 +688,15 @@ __iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
|
||||||
return copied;
|
return copied;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
|
||||||
iomap_write_end_inline(struct inode *inode, struct page *page,
|
struct iomap *iomap, loff_t pos, size_t copied)
|
||||||
struct iomap *iomap, loff_t pos, unsigned copied)
|
|
||||||
{
|
{
|
||||||
void *addr;
|
void *addr;
|
||||||
|
|
||||||
WARN_ON_ONCE(!PageUptodate(page));
|
WARN_ON_ONCE(!PageUptodate(page));
|
||||||
BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
|
BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
|
||||||
|
|
||||||
|
flush_dcache_page(page);
|
||||||
addr = kmap_atomic(page);
|
addr = kmap_atomic(page);
|
||||||
memcpy(iomap->inline_data + pos, addr + pos, copied);
|
memcpy(iomap->inline_data + pos, addr + pos, copied);
|
||||||
kunmap_atomic(addr);
|
kunmap_atomic(addr);
|
||||||
|
@ -726,13 +705,14 @@ iomap_write_end_inline(struct inode *inode, struct page *page,
|
||||||
return copied;
|
return copied;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
/* Returns the number of bytes copied. May be 0. Cannot be an errno. */
|
||||||
iomap_write_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied,
|
static size_t iomap_write_end(struct inode *inode, loff_t pos, size_t len,
|
||||||
struct page *page, struct iomap *iomap, struct iomap *srcmap)
|
size_t copied, struct page *page, struct iomap *iomap,
|
||||||
|
struct iomap *srcmap)
|
||||||
{
|
{
|
||||||
const struct iomap_page_ops *page_ops = iomap->page_ops;
|
const struct iomap_page_ops *page_ops = iomap->page_ops;
|
||||||
loff_t old_size = inode->i_size;
|
loff_t old_size = inode->i_size;
|
||||||
int ret;
|
size_t ret;
|
||||||
|
|
||||||
if (srcmap->type == IOMAP_INLINE) {
|
if (srcmap->type == IOMAP_INLINE) {
|
||||||
ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
|
ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
|
||||||
|
@ -811,13 +791,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
|
||||||
|
|
||||||
copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
|
copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
|
||||||
|
|
||||||
flush_dcache_page(page);
|
copied = iomap_write_end(inode, pos, bytes, copied, page, iomap,
|
||||||
|
|
||||||
status = iomap_write_end(inode, pos, bytes, copied, page, iomap,
|
|
||||||
srcmap);
|
srcmap);
|
||||||
if (unlikely(status < 0))
|
|
||||||
break;
|
|
||||||
copied = status;
|
|
||||||
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
|
||||||
|
@ -891,11 +866,8 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
|
||||||
|
|
||||||
status = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
|
status = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
|
||||||
srcmap);
|
srcmap);
|
||||||
if (unlikely(status <= 0)) {
|
|
||||||
if (WARN_ON_ONCE(status == 0))
|
if (WARN_ON_ONCE(status == 0))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
|
||||||
|
@ -928,11 +900,13 @@ iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(iomap_file_unshare);
|
EXPORT_SYMBOL_GPL(iomap_file_unshare);
|
||||||
|
|
||||||
static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
|
static s64 iomap_zero(struct inode *inode, loff_t pos, u64 length,
|
||||||
unsigned bytes, struct iomap *iomap, struct iomap *srcmap)
|
struct iomap *iomap, struct iomap *srcmap)
|
||||||
{
|
{
|
||||||
struct page *page;
|
struct page *page;
|
||||||
int status;
|
int status;
|
||||||
|
unsigned offset = offset_in_page(pos);
|
||||||
|
unsigned bytes = min_t(u64, PAGE_SIZE - offset, length);
|
||||||
|
|
||||||
status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap);
|
status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap);
|
||||||
if (status)
|
if (status)
|
||||||
|
@ -944,38 +918,33 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
|
||||||
return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
|
return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
|
||||||
}
|
}
|
||||||
|
|
||||||
static loff_t
|
static loff_t iomap_zero_range_actor(struct inode *inode, loff_t pos,
|
||||||
iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
|
loff_t length, void *data, struct iomap *iomap,
|
||||||
void *data, struct iomap *iomap, struct iomap *srcmap)
|
struct iomap *srcmap)
|
||||||
{
|
{
|
||||||
bool *did_zero = data;
|
bool *did_zero = data;
|
||||||
loff_t written = 0;
|
loff_t written = 0;
|
||||||
int status;
|
|
||||||
|
|
||||||
/* already zeroed? we're done. */
|
/* already zeroed? we're done. */
|
||||||
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
|
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
|
||||||
return count;
|
return length;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
unsigned offset, bytes;
|
s64 bytes;
|
||||||
|
|
||||||
offset = offset_in_page(pos);
|
|
||||||
bytes = min_t(loff_t, PAGE_SIZE - offset, count);
|
|
||||||
|
|
||||||
if (IS_DAX(inode))
|
if (IS_DAX(inode))
|
||||||
status = dax_iomap_zero(pos, offset, bytes, iomap);
|
bytes = dax_iomap_zero(pos, length, iomap);
|
||||||
else
|
else
|
||||||
status = iomap_zero(inode, pos, offset, bytes, iomap,
|
bytes = iomap_zero(inode, pos, length, iomap, srcmap);
|
||||||
srcmap);
|
if (bytes < 0)
|
||||||
if (status < 0)
|
return bytes;
|
||||||
return status;
|
|
||||||
|
|
||||||
pos += bytes;
|
pos += bytes;
|
||||||
count -= bytes;
|
length -= bytes;
|
||||||
written += bytes;
|
written += bytes;
|
||||||
if (did_zero)
|
if (did_zero)
|
||||||
*did_zero = true;
|
*did_zero = true;
|
||||||
} while (count > 0);
|
} while (length > 0);
|
||||||
|
|
||||||
return written;
|
return written;
|
||||||
}
|
}
|
||||||
|
@ -1070,7 +1039,7 @@ EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
|
||||||
|
|
||||||
static void
|
static void
|
||||||
iomap_finish_page_writeback(struct inode *inode, struct page *page,
|
iomap_finish_page_writeback(struct inode *inode, struct page *page,
|
||||||
int error)
|
int error, unsigned int len)
|
||||||
{
|
{
|
||||||
struct iomap_page *iop = to_iomap_page(page);
|
struct iomap_page *iop = to_iomap_page(page);
|
||||||
|
|
||||||
|
@ -1079,10 +1048,10 @@ iomap_finish_page_writeback(struct inode *inode, struct page *page,
|
||||||
mapping_set_error(inode->i_mapping, -EIO);
|
mapping_set_error(inode->i_mapping, -EIO);
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE && !iop);
|
WARN_ON_ONCE(i_blocks_per_page(inode, page) > 1 && !iop);
|
||||||
WARN_ON_ONCE(iop && atomic_read(&iop->write_count) <= 0);
|
WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) <= 0);
|
||||||
|
|
||||||
if (!iop || atomic_dec_and_test(&iop->write_count))
|
if (!iop || atomic_sub_and_test(len, &iop->write_bytes_pending))
|
||||||
end_page_writeback(page);
|
end_page_writeback(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1116,7 +1085,8 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
|
||||||
|
|
||||||
/* walk each page on bio, ending page IO on them */
|
/* walk each page on bio, ending page IO on them */
|
||||||
bio_for_each_segment_all(bv, bio, iter_all)
|
bio_for_each_segment_all(bv, bio, iter_all)
|
||||||
iomap_finish_page_writeback(inode, bv->bv_page, error);
|
iomap_finish_page_writeback(inode, bv->bv_page, error,
|
||||||
|
bv->bv_len);
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
}
|
}
|
||||||
/* The ioend has been freed by bio_put() */
|
/* The ioend has been freed by bio_put() */
|
||||||
|
@ -1332,8 +1302,8 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page,
|
||||||
|
|
||||||
merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff,
|
merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff,
|
||||||
&same_page);
|
&same_page);
|
||||||
if (iop && !same_page)
|
if (iop)
|
||||||
atomic_inc(&iop->write_count);
|
atomic_add(len, &iop->write_bytes_pending);
|
||||||
|
|
||||||
if (!merged) {
|
if (!merged) {
|
||||||
if (bio_full(wpc->ioend->io_bio, len)) {
|
if (bio_full(wpc->ioend->io_bio, len)) {
|
||||||
|
@ -1375,8 +1345,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
|
||||||
int error = 0, count = 0, i;
|
int error = 0, count = 0, i;
|
||||||
LIST_HEAD(submit_list);
|
LIST_HEAD(submit_list);
|
||||||
|
|
||||||
WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE && !iop);
|
WARN_ON_ONCE(i_blocks_per_page(inode, page) > 1 && !iop);
|
||||||
WARN_ON_ONCE(iop && atomic_read(&iop->write_count) != 0);
|
WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) != 0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Walk through the page to find areas to write back. If we run off the
|
* Walk through the page to find areas to write back. If we run off the
|
||||||
|
|
|
@ -76,7 +76,7 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
|
||||||
dio->submit.cookie = submit_bio(bio);
|
dio->submit.cookie = submit_bio(bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
||||||
{
|
{
|
||||||
const struct iomap_dio_ops *dops = dio->dops;
|
const struct iomap_dio_ops *dops = dio->dops;
|
||||||
struct kiocb *iocb = dio->iocb;
|
struct kiocb *iocb = dio->iocb;
|
||||||
|
@ -108,7 +108,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
||||||
* ->end_io() when necessary, otherwise a racing buffer read would cache
|
* ->end_io() when necessary, otherwise a racing buffer read would cache
|
||||||
* zeros from unwritten extents.
|
* zeros from unwritten extents.
|
||||||
*/
|
*/
|
||||||
if (!dio->error &&
|
if (!dio->error && dio->size &&
|
||||||
(dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
|
(dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
|
||||||
int err;
|
int err;
|
||||||
err = invalidate_inode_pages2_range(inode->i_mapping,
|
err = invalidate_inode_pages2_range(inode->i_mapping,
|
||||||
|
@ -118,6 +118,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
||||||
dio_warn_stale_pagecache(iocb->ki_filp);
|
dio_warn_stale_pagecache(iocb->ki_filp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inode_dio_end(file_inode(iocb->ki_filp));
|
||||||
/*
|
/*
|
||||||
* If this is a DSYNC write, make sure we push it to stable storage now
|
* If this is a DSYNC write, make sure we push it to stable storage now
|
||||||
* that we've written data.
|
* that we've written data.
|
||||||
|
@ -125,11 +126,11 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
|
||||||
if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC))
|
if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC))
|
||||||
ret = generic_write_sync(iocb, ret);
|
ret = generic_write_sync(iocb, ret);
|
||||||
|
|
||||||
inode_dio_end(file_inode(iocb->ki_filp));
|
|
||||||
kfree(dio);
|
kfree(dio);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(iomap_dio_complete);
|
||||||
|
|
||||||
static void iomap_dio_complete_work(struct work_struct *work)
|
static void iomap_dio_complete_work(struct work_struct *work)
|
||||||
{
|
{
|
||||||
|
@ -388,6 +389,16 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
|
||||||
return iomap_dio_bio_actor(inode, pos, length, dio, iomap);
|
return iomap_dio_bio_actor(inode, pos, length, dio, iomap);
|
||||||
case IOMAP_INLINE:
|
case IOMAP_INLINE:
|
||||||
return iomap_dio_inline_actor(inode, pos, length, dio, iomap);
|
return iomap_dio_inline_actor(inode, pos, length, dio, iomap);
|
||||||
|
case IOMAP_DELALLOC:
|
||||||
|
/*
|
||||||
|
* DIO is not serialised against mmap() access at all, and so
|
||||||
|
* if the page_mkwrite occurs between the writeback and the
|
||||||
|
* iomap_apply() call in the DIO path, then it will see the
|
||||||
|
* DELALLOC block that the page-mkwrite allocated.
|
||||||
|
*/
|
||||||
|
pr_warn_ratelimited("Direct I/O collision with buffered writes! File: %pD4 Comm: %.20s\n",
|
||||||
|
dio->iocb->ki_filp, current->comm);
|
||||||
|
return -EIO;
|
||||||
default:
|
default:
|
||||||
WARN_ON_ONCE(1);
|
WARN_ON_ONCE(1);
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
@ -406,8 +417,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
|
||||||
* Returns -ENOTBLK In case of a page invalidation invalidation failure for
|
* Returns -ENOTBLK In case of a page invalidation invalidation failure for
|
||||||
* writes. The callers needs to fall back to buffered I/O in this case.
|
* writes. The callers needs to fall back to buffered I/O in this case.
|
||||||
*/
|
*/
|
||||||
ssize_t
|
struct iomap_dio *
|
||||||
iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
|
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
|
||||||
bool wait_for_completion)
|
bool wait_for_completion)
|
||||||
{
|
{
|
||||||
|
@ -421,14 +432,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
struct iomap_dio *dio;
|
struct iomap_dio *dio;
|
||||||
|
|
||||||
if (!count)
|
if (!count)
|
||||||
return 0;
|
return NULL;
|
||||||
|
|
||||||
if (WARN_ON(is_sync_kiocb(iocb) && !wait_for_completion))
|
if (WARN_ON(is_sync_kiocb(iocb) && !wait_for_completion))
|
||||||
return -EIO;
|
return ERR_PTR(-EIO);
|
||||||
|
|
||||||
dio = kmalloc(sizeof(*dio), GFP_KERNEL);
|
dio = kmalloc(sizeof(*dio), GFP_KERNEL);
|
||||||
if (!dio)
|
if (!dio)
|
||||||
return -ENOMEM;
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
dio->iocb = iocb;
|
dio->iocb = iocb;
|
||||||
atomic_set(&dio->ref, 1);
|
atomic_set(&dio->ref, 1);
|
||||||
|
@ -558,7 +569,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
dio->wait_for_completion = wait_for_completion;
|
dio->wait_for_completion = wait_for_completion;
|
||||||
if (!atomic_dec_and_test(&dio->ref)) {
|
if (!atomic_dec_and_test(&dio->ref)) {
|
||||||
if (!wait_for_completion)
|
if (!wait_for_completion)
|
||||||
return -EIOCBQUEUED;
|
return ERR_PTR(-EIOCBQUEUED);
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||||
|
@ -574,10 +585,26 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
__set_current_state(TASK_RUNNING);
|
__set_current_state(TASK_RUNNING);
|
||||||
}
|
}
|
||||||
|
|
||||||
return iomap_dio_complete(dio);
|
return dio;
|
||||||
|
|
||||||
out_free_dio:
|
out_free_dio:
|
||||||
kfree(dio);
|
kfree(dio);
|
||||||
return ret;
|
if (ret)
|
||||||
|
return ERR_PTR(ret);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(__iomap_dio_rw);
|
||||||
|
|
||||||
|
ssize_t
|
||||||
|
iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
|
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
|
||||||
|
bool wait_for_completion)
|
||||||
|
{
|
||||||
|
struct iomap_dio *dio;
|
||||||
|
|
||||||
|
dio = __iomap_dio_rw(iocb, iter, ops, dops, wait_for_completion);
|
||||||
|
if (IS_ERR_OR_NULL(dio))
|
||||||
|
return PTR_ERR_OR_ZERO(dio);
|
||||||
|
return iomap_dio_complete(dio);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(iomap_dio_rw);
|
EXPORT_SYMBOL_GPL(iomap_dio_rw);
|
||||||
|
|
|
@ -473,7 +473,7 @@ static int metapage_readpage(struct file *fp, struct page *page)
|
||||||
struct inode *inode = page->mapping->host;
|
struct inode *inode = page->mapping->host;
|
||||||
struct bio *bio = NULL;
|
struct bio *bio = NULL;
|
||||||
int block_offset;
|
int block_offset;
|
||||||
int blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
|
int blocks_per_page = i_blocks_per_page(inode, page);
|
||||||
sector_t page_start; /* address of page in fs blocks */
|
sector_t page_start; /* address of page in fs blocks */
|
||||||
sector_t pblock;
|
sector_t pblock;
|
||||||
int xlen;
|
int xlen;
|
||||||
|
|
|
@ -544,7 +544,7 @@ xfs_discard_page(
|
||||||
page, ip->i_ino, offset);
|
page, ip->i_ino, offset);
|
||||||
|
|
||||||
error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
|
error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
|
||||||
PAGE_SIZE / i_blocksize(inode));
|
i_blocks_per_page(inode, page));
|
||||||
if (error && !XFS_FORCED_SHUTDOWN(mp))
|
if (error && !XFS_FORCED_SHUTDOWN(mp))
|
||||||
xfs_alert(mp, "page discard unable to remove delalloc mapping.");
|
xfs_alert(mp, "page discard unable to remove delalloc mapping.");
|
||||||
out_invalidate:
|
out_invalidate:
|
||||||
|
|
|
@ -231,8 +231,7 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
|
||||||
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
|
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
|
||||||
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
|
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
|
||||||
pgoff_t index);
|
pgoff_t index);
|
||||||
int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
|
s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap);
|
||||||
struct iomap *iomap);
|
|
||||||
static inline bool dax_mapping(struct address_space *mapping)
|
static inline bool dax_mapping(struct address_space *mapping)
|
||||||
{
|
{
|
||||||
return mapping->host && IS_DAX(mapping->host);
|
return mapping->host && IS_DAX(mapping->host);
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
struct address_space;
|
struct address_space;
|
||||||
struct fiemap_extent_info;
|
struct fiemap_extent_info;
|
||||||
struct inode;
|
struct inode;
|
||||||
|
struct iomap_dio;
|
||||||
struct iomap_writepage_ctx;
|
struct iomap_writepage_ctx;
|
||||||
struct iov_iter;
|
struct iov_iter;
|
||||||
struct kiocb;
|
struct kiocb;
|
||||||
|
@ -258,6 +259,10 @@ struct iomap_dio_ops {
|
||||||
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
|
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
|
||||||
bool wait_for_completion);
|
bool wait_for_completion);
|
||||||
|
struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
|
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
|
||||||
|
bool wait_for_completion);
|
||||||
|
ssize_t iomap_dio_complete(struct iomap_dio *dio);
|
||||||
int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
|
int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
|
||||||
|
|
||||||
#ifdef CONFIG_SWAP
|
#ifdef CONFIG_SWAP
|
||||||
|
|
|
@ -927,4 +927,20 @@ static inline int page_mkwrite_check_truncate(struct page *page,
|
||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* i_blocks_per_page - How many blocks fit in this page.
|
||||||
|
* @inode: The inode which contains the blocks.
|
||||||
|
* @page: The page (head page if the page is a THP).
|
||||||
|
*
|
||||||
|
* If the block size is larger than the size of this page, return zero.
|
||||||
|
*
|
||||||
|
* Context: The caller should hold a refcount on the page to prevent it
|
||||||
|
* from being split.
|
||||||
|
* Return: The number of filesystem blocks covered by this page.
|
||||||
|
*/
|
||||||
|
static inline
|
||||||
|
unsigned int i_blocks_per_page(struct inode *inode, struct page *page)
|
||||||
|
{
|
||||||
|
return thp_size(page) >> inode->i_blkbits;
|
||||||
|
}
|
||||||
#endif /* _LINUX_PAGEMAP_H */
|
#endif /* _LINUX_PAGEMAP_H */
|
||||||
|
|
Loading…
Reference in a new issue