bcachefs: Change buffered write path to write to partial pages

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2019-07-29 13:38:38 -04:00 committed by Kent Overstreet
parent 7f5e31e1a4
commit d1542e0362

View file

@ -519,7 +519,6 @@ struct bch_page_sector {
/* i_sectors: */
enum {
SECTOR_UNALLOCATED,
SECTOR_QUOTA_RESERVED,
SECTOR_DIRTY,
SECTOR_ALLOCATED,
} state:2;
@ -597,31 +596,6 @@ static struct bch_page_state *bch2_page_state_create(struct page *page,
return bch2_page_state(page) ?: __bch2_page_state_create(page, gfp);
}
static void bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *inode,
struct page *page)
{
struct bch_page_state *s = bch2_page_state(page);
struct disk_reservation disk_res = { 0 };
struct quota_res quota_res = { 0 };
unsigned i;
if (!s)
return;
for (i = 0; i < ARRAY_SIZE(s->s); i++) {
disk_res.sectors += s->s[i].replicas_reserved;
s->s[i].replicas_reserved = 0;
if (s->s[i].state == SECTOR_QUOTA_RESERVED) {
quota_res.sectors++;
s->s[i].state = SECTOR_UNALLOCATED;
}
}
bch2_quota_reservation_put(c, inode, &quota_res);
bch2_disk_reservation_put(c, &disk_res);
}
static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode)
{
/* XXX: this should not be open coded */
@ -672,100 +646,134 @@ static int bch2_get_page_disk_reservation(struct bch_fs *c,
return 0;
}
static int bch2_get_page_quota_reservation(struct bch_fs *c,
struct bch2_page_reservation {
struct disk_reservation disk;
struct quota_res quota;
};
static void bch2_page_reservation_init(struct bch_fs *c,
struct bch_inode_info *inode,
struct page *page, bool check_enospc)
struct bch2_page_reservation *res)
{
memset(res, 0, sizeof(*res));
res->disk.nr_replicas = inode_nr_replicas(c, inode);
}
static void bch2_page_reservation_put(struct bch_fs *c,
struct bch_inode_info *inode,
struct bch2_page_reservation *res)
{
bch2_disk_reservation_put(c, &res->disk);
bch2_quota_reservation_put(c, inode, &res->quota);
}
static int bch2_page_reservation_get(struct bch_fs *c,
struct bch_inode_info *inode, struct page *page,
struct bch2_page_reservation *res,
unsigned offset, unsigned len, bool check_enospc)
{
struct bch_page_state *s = bch2_page_state_create(page, 0);
struct quota_res quota_res = { 0 };
unsigned i, quota_res_sectors = 0;
unsigned i, disk_sectors = 0, quota_sectors = 0;
int ret;
if (!s)
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(s->s); i++)
quota_res_sectors += s->s[i].state == SECTOR_UNALLOCATED;
for (i = offset / 512;
i < DIV_ROUND_UP(offset + len, 512);
i++) {
disk_sectors += sectors_to_reserve(&s->s[i],
res->disk.nr_replicas);
quota_sectors += s->s[i].state == SECTOR_UNALLOCATED;
}
if (!quota_res_sectors)
return 0;
if (disk_sectors) {
ret = bch2_disk_reservation_add(c, &res->disk,
disk_sectors,
!check_enospc
? BCH_DISK_RESERVATION_NOFAIL
: 0);
if (unlikely(ret))
return ret;
}
ret = bch2_quota_reservation_add(c, inode, &quota_res,
quota_res_sectors,
check_enospc);
if (unlikely(ret))
return ret;
if (quota_sectors) {
ret = bch2_quota_reservation_add(c, inode, &res->quota,
quota_sectors,
check_enospc);
if (unlikely(ret)) {
struct disk_reservation tmp = {
.sectors = disk_sectors
};
for (i = 0; i < ARRAY_SIZE(s->s); i++)
if (s->s[i].state == SECTOR_UNALLOCATED)
s->s[i].state = SECTOR_QUOTA_RESERVED;
bch2_disk_reservation_put(c, &tmp);
res->disk.sectors -= disk_sectors;
return ret;
}
}
return 0;
}
static int bch2_get_page_reservation(struct bch_fs *c, struct bch_inode_info *inode,
struct page *page, bool check_enospc)
{
return bch2_get_page_disk_reservation(c, inode, page, check_enospc) ?:
bch2_get_page_quota_reservation(c, inode, page, check_enospc);
}
static void bch2_clear_page_bits(struct page *page)
{
struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_page_state *s = bch2_page_state(page);
struct disk_reservation disk_res = { 0 };
int i, dirty_sectors = 0;
if (!s)
return;
for (i = 0; i < ARRAY_SIZE(s->s); i++) {
disk_res.sectors += s->s[i].replicas_reserved;
s->s[i].replicas_reserved = 0;
if (s->s[i].state == SECTOR_DIRTY) {
dirty_sectors++;
s->s[i].state = SECTOR_UNALLOCATED;
}
}
bch2_disk_reservation_put(c, &disk_res);
if (dirty_sectors)
i_sectors_acct(c, inode, NULL, -dirty_sectors);
bch2_put_page_reservation(c, inode, page);
bch2_page_state_release(page);
}
static void __bch2_set_page_dirty(struct address_space *mapping, struct folio *folio)
static void bch2_set_page_dirty(struct bch_fs *c,
struct bch_inode_info *inode, struct page *page,
struct bch2_page_reservation *res,
unsigned offset, unsigned len)
{
struct bch_inode_info *inode = to_bch_ei(mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_page_state *s = bch2_page_state(&folio->page);
struct quota_res quota_res = { 0 };
struct bch_page_state *s = bch2_page_state(page);
unsigned i, dirty_sectors = 0;
BUG_ON(!s);
for (i = offset / 512;
i < DIV_ROUND_UP(offset + len, 512);
i++) {
unsigned sectors = sectors_to_reserve(&s->s[i],
res->disk.nr_replicas);
for (i = 0; i < ARRAY_SIZE(s->s); i++) {
if (s->s[i].state == SECTOR_QUOTA_RESERVED)
quota_res.sectors++;
BUG_ON(sectors > res->disk.sectors);
s->s[i].replicas_reserved += sectors;
res->disk.sectors -= sectors;
if (s->s[i].state == SECTOR_UNALLOCATED ||
s->s[i].state == SECTOR_QUOTA_RESERVED) {
if (s->s[i].state == SECTOR_UNALLOCATED) {
s->s[i].state = SECTOR_DIRTY;
dirty_sectors++;
}
}
if (dirty_sectors)
i_sectors_acct(c, inode, &quota_res, dirty_sectors);
bch2_quota_reservation_put(c, inode, &quota_res);
}
i_sectors_acct(c, inode, &res->quota, dirty_sectors);
static void bch2_set_page_dirty(struct address_space *mapping, struct page *page)
{
struct folio *folio = page_folio(page);
__bch2_set_page_dirty(mapping, folio);
filemap_dirty_folio(mapping, folio);
if (!PageDirty(page))
filemap_dirty_folio(inode->v.i_mapping, page_folio(page));
}
vm_fault_t bch2_page_fault(struct vm_fault *vmf)
@ -788,8 +796,11 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
struct bch_inode_info *inode = file_bch_inode(file);
struct address_space *mapping = file->f_mapping;
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_page_reservation res;
int ret = VM_FAULT_LOCKED;
bch2_page_reservation_init(c, inode, &res);
sb_start_pagefault(inode->v.i_sb);
file_update_time(file);
@ -809,18 +820,21 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
goto out;
}
if (bch2_get_page_reservation(c, inode, page, true)) {
if (bch2_page_reservation_get(c, inode, page, &res,
0, PAGE_SIZE, true)) {
unlock_page(page);
ret = VM_FAULT_SIGBUS;
goto out;
}
if (!PageDirty(page))
bch2_set_page_dirty(mapping, page);
bch2_set_page_dirty(c, inode, page, &res, 0, PAGE_SIZE);
wait_for_stable_page(page);
out:
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
sb_end_pagefault(inode->v.i_sb);
bch2_page_reservation_put(c, inode, &res);
return ret;
}
@ -1450,12 +1464,18 @@ int bch2_write_begin(struct file *file, struct address_space *mapping,
{
struct bch_inode_info *inode = to_bch_ei(mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_page_reservation *res;
pgoff_t index = pos >> PAGE_SHIFT;
unsigned offset = pos & (PAGE_SIZE - 1);
struct page *page;
int ret = -ENOMEM;
BUG_ON(inode_unhashed(&inode->v));
res = kmalloc(sizeof(*res), GFP_KERNEL);
if (!res)
return -ENOMEM;
bch2_page_reservation_init(c, inode, res);
*fsdata = res;
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
@ -1486,7 +1506,8 @@ int bch2_write_begin(struct file *file, struct address_space *mapping,
if (ret)
goto err;
out:
ret = bch2_get_page_reservation(c, inode, page, true);
ret = bch2_page_reservation_get(c, inode, page, res,
offset, len, true);
if (ret) {
if (!PageUptodate(page)) {
/*
@ -1509,6 +1530,8 @@ int bch2_write_begin(struct file *file, struct address_space *mapping,
*pagep = NULL;
err_unlock:
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
kfree(res);
*fsdata = NULL;
return ret;
}
@ -1518,6 +1541,8 @@ int bch2_write_end(struct file *file, struct address_space *mapping,
{
struct bch_inode_info *inode = to_bch_ei(mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_page_reservation *res = fsdata;
unsigned offset = pos & (PAGE_SIZE - 1);
lockdep_assert_held(&inode->v.i_rwsem);
@ -1540,18 +1565,19 @@ int bch2_write_end(struct file *file, struct address_space *mapping,
if (copied) {
if (!PageUptodate(page))
SetPageUptodate(page);
if (!PageDirty(page))
bch2_set_page_dirty(mapping, page);
bch2_set_page_dirty(c, inode, page, res, offset, copied);
inode->ei_last_dirtied = (unsigned long) current;
} else {
bch2_put_page_reservation(c, inode, page);
}
unlock_page(page);
put_page(page);
bch2_pagecache_add_put(&inode->ei_pagecache_lock);
bch2_page_reservation_put(c, inode, res);
kfree(res);
return copied;
}
@ -1564,15 +1590,19 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct page *pages[WRITE_BATCH_PAGES];
struct bch2_page_reservation res;
unsigned long index = pos >> PAGE_SHIFT;
unsigned offset = pos & (PAGE_SIZE - 1);
unsigned nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
unsigned i, copied = 0, nr_pages_copied = 0;
unsigned i, reserved = 0, set_dirty = 0;
unsigned copied = 0, nr_pages_copied = 0;
int ret = 0;
BUG_ON(!len);
BUG_ON(nr_pages > ARRAY_SIZE(pages));
bch2_page_reservation_init(c, inode, &res);
for (i = 0; i < nr_pages; i++) {
pages[i] = grab_cache_page_write_begin(mapping, index + i);
if (!pages[i]) {
@ -1599,19 +1629,25 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
}
}
for (i = 0; i < nr_pages; i++) {
ret = bch2_get_page_reservation(c, inode, pages[i], true);
while (reserved < len) {
struct page *page = pages[(offset + reserved) >> PAGE_SHIFT];
unsigned pg_offset = (offset + reserved) & (PAGE_SIZE - 1);
unsigned pg_len = min_t(unsigned, len - reserved,
PAGE_SIZE - pg_offset);
retry_reservation:
ret = bch2_page_reservation_get(c, inode, page, &res,
pg_offset, pg_len, true);
if (ret && !PageUptodate(pages[i])) {
ret = bch2_read_single_page(pages[i], mapping);
if (ret)
goto out;
ret = bch2_get_page_reservation(c, inode, pages[i], true);
if (ret && !PageUptodate(page)) {
ret = bch2_read_single_page(page, mapping);
if (!ret)
goto retry_reservation;
}
if (ret)
goto out;
reserved += pg_len;
}
if (mapping_writably_mapped(mapping))
@ -1621,16 +1657,16 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
while (copied < len) {
struct page *page = pages[(offset + copied) >> PAGE_SHIFT];
unsigned pg_offset = (offset + copied) & (PAGE_SIZE - 1);
unsigned pg_bytes = min_t(unsigned, len - copied,
PAGE_SIZE - pg_offset);
unsigned pg_len = min_t(unsigned, len - copied,
PAGE_SIZE - pg_offset);
unsigned pg_copied = copy_page_from_iter_atomic(page,
pg_offset, pg_bytes, iter);
pg_offset, pg_len, iter);
if (!pg_copied)
break;
flush_dcache_page(page);
copied += pg_copied;
if (pg_copied != pg_bytes)
break;
}
if (!copied)
@ -1653,22 +1689,29 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
copied -= (offset + copied) & (PAGE_SIZE - 1);
}
}
while (set_dirty < copied) {
struct page *page = pages[(offset + set_dirty) >> PAGE_SHIFT];
unsigned pg_offset = (offset + set_dirty) & (PAGE_SIZE - 1);
unsigned pg_len = min_t(unsigned, copied - set_dirty,
PAGE_SIZE - pg_offset);
if (!PageUptodate(page))
SetPageUptodate(page);
bch2_set_page_dirty(c, inode, page, &res, pg_offset, pg_len);
unlock_page(page);
put_page(page);
set_dirty += pg_len;
}
out:
for (i = 0; i < nr_pages_copied; i++) {
if (!PageUptodate(pages[i]))
SetPageUptodate(pages[i]);
if (!PageDirty(pages[i]))
bch2_set_page_dirty(mapping, pages[i]);
for (i = nr_pages_copied; i < nr_pages; i++) {
unlock_page(pages[i]);
put_page(pages[i]);
}
for (i = nr_pages_copied; i < nr_pages; i++) {
if (!PageDirty(pages[i]))
bch2_put_page_reservation(c, inode, pages[i]);
unlock_page(pages[i]);
put_page(pages[i]);
}
bch2_page_reservation_put(c, inode, &res);
return copied ?: ret;
}
@ -2322,7 +2365,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode,
* XXX: because we aren't currently tracking whether the page has actual
* data in it (vs. just 0s, or only partially written) this wrong. ick.
*/
ret = bch2_get_page_reservation(c, inode, page, false);
ret = bch2_get_page_disk_reservation(c, inode, page, false);
BUG_ON(ret);
if (index == start >> PAGE_SHIFT &&
@ -2333,8 +2376,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode,
else if (index == end >> PAGE_SHIFT)
zero_user_segment(page, 0, end_offset);
if (!PageDirty(page))
bch2_set_page_dirty(mapping, page);
filemap_dirty_folio(mapping, page_folio(page));
unlock:
unlock_page(page);
put_page(page);