linux-stable/fs/erofs/fscache.c
Jingbo Xu 61fef98945 erofs: unify anonymous inodes for blob
Currently there're two anonymous inodes (inode and anon_inode in struct
erofs_fscache) for each blob.  The former was introduced as the
address_space of page cache for bootstrap.

The latter was initially introduced as both the address_space of page
cache and also a sentinel in the shared domain.  Since now the management
of cookies in share domain has been decoupled with the anonymous inode,
there's no need to maintain an extra anonymous inode.  Let's unify these
two anonymous inodes.

Besides, in non-share-domain mode only bootstrap will allocate anonymous
inode.  To simplify the implementation, always allocate anonymous inode
for both bootstrap and data blobs.  Similarly release anonymous inodes
for data blobs when .put_super() is called, or we'll get "VFS: Busy
inodes after unmount." warning.

Also remove the redundant set_nlink() when initializing the anonymous
inode, since i_nlink has already been initialized to 1 when the inode
gets allocated.

Signed-off-by: Jingbo Xu <jefflexu@linux.alibaba.com>
Reviewed-by: Jia Zhu <zhujia.zj@bytedance.com>
Link: https://lore.kernel.org/r/20230209063913.46341-5-jefflexu@linux.alibaba.com
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2023-02-15 08:11:28 +08:00

611 lines
15 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2022, Alibaba Cloud
* Copyright (C) 2022, Bytedance Inc. All rights reserved.
*/
#include <linux/fscache.h>
#include "internal.h"
static DEFINE_MUTEX(erofs_domain_list_lock);
static DEFINE_MUTEX(erofs_domain_cookies_lock);
static LIST_HEAD(erofs_domain_list);
static LIST_HEAD(erofs_domain_cookies_list);
static struct vfsmount *erofs_pseudo_mnt;
struct erofs_fscache_request {
struct erofs_fscache_request *primary;
struct netfs_cache_resources cache_resources;
struct address_space *mapping; /* The mapping being accessed */
loff_t start; /* Start position */
size_t len; /* Length of the request */
size_t submitted; /* Length of submitted */
short error; /* 0 or error that occurred */
refcount_t ref;
};
static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping,
loff_t start, size_t len)
{
struct erofs_fscache_request *req;
req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL);
if (!req)
return ERR_PTR(-ENOMEM);
req->mapping = mapping;
req->start = start;
req->len = len;
refcount_set(&req->ref, 1);
return req;
}
static struct erofs_fscache_request *erofs_fscache_req_chain(struct erofs_fscache_request *primary,
size_t len)
{
struct erofs_fscache_request *req;
/* use primary request for the first submission */
if (!primary->submitted) {
refcount_inc(&primary->ref);
return primary;
}
req = erofs_fscache_req_alloc(primary->mapping,
primary->start + primary->submitted, len);
if (!IS_ERR(req)) {
req->primary = primary;
refcount_inc(&primary->ref);
}
return req;
}
static void erofs_fscache_req_complete(struct erofs_fscache_request *req)
{
struct folio *folio;
bool failed = req->error;
pgoff_t start_page = req->start / PAGE_SIZE;
pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1;
XA_STATE(xas, &req->mapping->i_pages, start_page);
rcu_read_lock();
xas_for_each(&xas, folio, last_page) {
if (xas_retry(&xas, folio))
continue;
if (!failed)
folio_mark_uptodate(folio);
folio_unlock(folio);
}
rcu_read_unlock();
}
static void erofs_fscache_req_put(struct erofs_fscache_request *req)
{
if (refcount_dec_and_test(&req->ref)) {
if (req->cache_resources.ops)
req->cache_resources.ops->end_operation(&req->cache_resources);
if (!req->primary)
erofs_fscache_req_complete(req);
else
erofs_fscache_req_put(req->primary);
kfree(req);
}
}
static void erofs_fscache_subreq_complete(void *priv,
ssize_t transferred_or_error, bool was_async)
{
struct erofs_fscache_request *req = priv;
if (IS_ERR_VALUE(transferred_or_error)) {
if (req->primary)
req->primary->error = transferred_or_error;
else
req->error = transferred_or_error;
}
erofs_fscache_req_put(req);
}
/*
* Read data from fscache (cookie, pstart, len), and fill the read data into
* page cache described by (req->mapping, lstart, len). @pstart describeis the
* start physical address in the cache file.
*/
static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
struct erofs_fscache_request *req, loff_t pstart, size_t len)
{
enum netfs_io_source source;
struct super_block *sb = req->mapping->host->i_sb;
struct netfs_cache_resources *cres = &req->cache_resources;
struct iov_iter iter;
loff_t lstart = req->start + req->submitted;
size_t done = 0;
int ret;
DBG_BUGON(len > req->len - req->submitted);
ret = fscache_begin_read_operation(cres, cookie);
if (ret)
return ret;
while (done < len) {
loff_t sstart = pstart + done;
size_t slen = len - done;
unsigned long flags = 1 << NETFS_SREQ_ONDEMAND;
source = cres->ops->prepare_ondemand_read(cres,
sstart, &slen, LLONG_MAX, &flags, 0);
if (WARN_ON(slen == 0))
source = NETFS_INVALID_READ;
if (source != NETFS_READ_FROM_CACHE) {
erofs_err(sb, "failed to fscache prepare_read (source %d)", source);
return -EIO;
}
refcount_inc(&req->ref);
iov_iter_xarray(&iter, ITER_DEST, &req->mapping->i_pages,
lstart + done, slen);
ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL,
erofs_fscache_subreq_complete, req);
if (ret == -EIOCBQUEUED)
ret = 0;
if (ret) {
erofs_err(sb, "failed to fscache_read (ret %d)", ret);
return ret;
}
done += slen;
}
DBG_BUGON(done != len);
return 0;
}
static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
{
int ret;
struct erofs_fscache *ctx = folio_mapping(folio)->host->i_private;
struct erofs_fscache_request *req;
req = erofs_fscache_req_alloc(folio_mapping(folio),
folio_pos(folio), folio_size(folio));
if (IS_ERR(req)) {
folio_unlock(folio);
return PTR_ERR(req);
}
ret = erofs_fscache_read_folios_async(ctx->cookie, req,
folio_pos(folio), folio_size(folio));
if (ret)
req->error = ret;
erofs_fscache_req_put(req);
return ret;
}
static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary)
{
struct address_space *mapping = primary->mapping;
struct inode *inode = mapping->host;
struct super_block *sb = inode->i_sb;
struct erofs_fscache_request *req;
struct erofs_map_blocks map;
struct erofs_map_dev mdev;
struct iov_iter iter;
loff_t pos = primary->start + primary->submitted;
size_t count;
int ret;
map.m_la = pos;
ret = erofs_map_blocks(inode, &map);
if (ret)
return ret;
if (map.m_flags & EROFS_MAP_META) {
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
erofs_blk_t blknr;
size_t offset, size;
void *src;
/* For tail packing layout, the offset may be non-zero. */
offset = erofs_blkoff(map.m_pa);
blknr = erofs_blknr(map.m_pa);
size = map.m_llen;
src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP);
if (IS_ERR(src))
return PTR_ERR(src);
iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE);
if (copy_to_iter(src + offset, size, &iter) != size) {
erofs_put_metabuf(&buf);
return -EFAULT;
}
iov_iter_zero(PAGE_SIZE - size, &iter);
erofs_put_metabuf(&buf);
primary->submitted += PAGE_SIZE;
return 0;
}
count = primary->len - primary->submitted;
if (!(map.m_flags & EROFS_MAP_MAPPED)) {
iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
iov_iter_zero(count, &iter);
primary->submitted += count;
return 0;
}
count = min_t(size_t, map.m_llen - (pos - map.m_la), count);
DBG_BUGON(!count || count % PAGE_SIZE);
mdev = (struct erofs_map_dev) {
.m_deviceid = map.m_deviceid,
.m_pa = map.m_pa,
};
ret = erofs_map_dev(sb, &mdev);
if (ret)
return ret;
req = erofs_fscache_req_chain(primary, count);
if (IS_ERR(req))
return PTR_ERR(req);
ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
req, mdev.m_pa + (pos - map.m_la), count);
erofs_fscache_req_put(req);
primary->submitted += count;
return ret;
}
static int erofs_fscache_data_read(struct erofs_fscache_request *req)
{
int ret;
do {
ret = erofs_fscache_data_read_slice(req);
if (ret)
req->error = ret;
} while (!ret && req->submitted < req->len);
return ret;
}
static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
{
struct erofs_fscache_request *req;
int ret;
req = erofs_fscache_req_alloc(folio_mapping(folio),
folio_pos(folio), folio_size(folio));
if (IS_ERR(req)) {
folio_unlock(folio);
return PTR_ERR(req);
}
ret = erofs_fscache_data_read(req);
erofs_fscache_req_put(req);
return ret;
}
static void erofs_fscache_readahead(struct readahead_control *rac)
{
struct erofs_fscache_request *req;
if (!readahead_count(rac))
return;
req = erofs_fscache_req_alloc(rac->mapping,
readahead_pos(rac), readahead_length(rac));
if (IS_ERR(req))
return;
/* The request completion will drop refs on the folios. */
while (readahead_folio(rac))
;
erofs_fscache_data_read(req);
erofs_fscache_req_put(req);
}
static const struct address_space_operations erofs_fscache_meta_aops = {
.read_folio = erofs_fscache_meta_read_folio,
};
const struct address_space_operations erofs_fscache_access_aops = {
.read_folio = erofs_fscache_read_folio,
.readahead = erofs_fscache_readahead,
};
static void erofs_fscache_domain_put(struct erofs_domain *domain)
{
mutex_lock(&erofs_domain_list_lock);
if (refcount_dec_and_test(&domain->ref)) {
list_del(&domain->list);
if (list_empty(&erofs_domain_list)) {
kern_unmount(erofs_pseudo_mnt);
erofs_pseudo_mnt = NULL;
}
fscache_relinquish_volume(domain->volume, NULL, false);
mutex_unlock(&erofs_domain_list_lock);
kfree(domain->domain_id);
kfree(domain);
return;
}
mutex_unlock(&erofs_domain_list_lock);
}
static int erofs_fscache_register_volume(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
char *domain_id = sbi->domain_id;
struct fscache_volume *volume;
char *name;
int ret = 0;
name = kasprintf(GFP_KERNEL, "erofs,%s",
domain_id ? domain_id : sbi->fsid);
if (!name)
return -ENOMEM;
volume = fscache_acquire_volume(name, NULL, NULL, 0);
if (IS_ERR_OR_NULL(volume)) {
erofs_err(sb, "failed to register volume for %s", name);
ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
volume = NULL;
}
sbi->volume = volume;
kfree(name);
return ret;
}
static int erofs_fscache_init_domain(struct super_block *sb)
{
int err;
struct erofs_domain *domain;
struct erofs_sb_info *sbi = EROFS_SB(sb);
domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
if (!domain)
return -ENOMEM;
domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL);
if (!domain->domain_id) {
kfree(domain);
return -ENOMEM;
}
err = erofs_fscache_register_volume(sb);
if (err)
goto out;
if (!erofs_pseudo_mnt) {
erofs_pseudo_mnt = kern_mount(&erofs_fs_type);
if (IS_ERR(erofs_pseudo_mnt)) {
err = PTR_ERR(erofs_pseudo_mnt);
goto out;
}
}
domain->volume = sbi->volume;
refcount_set(&domain->ref, 1);
list_add(&domain->list, &erofs_domain_list);
sbi->domain = domain;
return 0;
out:
kfree(domain->domain_id);
kfree(domain);
return err;
}
static int erofs_fscache_register_domain(struct super_block *sb)
{
int err;
struct erofs_domain *domain;
struct erofs_sb_info *sbi = EROFS_SB(sb);
mutex_lock(&erofs_domain_list_lock);
list_for_each_entry(domain, &erofs_domain_list, list) {
if (!strcmp(domain->domain_id, sbi->domain_id)) {
sbi->domain = domain;
sbi->volume = domain->volume;
refcount_inc(&domain->ref);
mutex_unlock(&erofs_domain_list_lock);
return 0;
}
}
err = erofs_fscache_init_domain(sb);
mutex_unlock(&erofs_domain_list_lock);
return err;
}
static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
char *name, unsigned int flags)
{
struct fscache_volume *volume = EROFS_SB(sb)->volume;
struct erofs_fscache *ctx;
struct fscache_cookie *cookie;
struct super_block *isb;
struct inode *inode;
int ret;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&ctx->node);
refcount_set(&ctx->ref, 1);
cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
name, strlen(name), NULL, 0, 0);
if (!cookie) {
erofs_err(sb, "failed to get cookie for %s", name);
ret = -EINVAL;
goto err;
}
fscache_use_cookie(cookie, false);
/*
* Allocate anonymous inode in global pseudo mount for shareable blobs,
* so that they are accessible among erofs fs instances.
*/
isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb;
inode = new_inode(isb);
if (!inode) {
erofs_err(sb, "failed to get anon inode for %s", name);
ret = -ENOMEM;
goto err_cookie;
}
inode->i_size = OFFSET_MAX;
inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
inode->i_private = ctx;
ctx->cookie = cookie;
ctx->inode = inode;
return ctx;
err_cookie:
fscache_unuse_cookie(cookie, NULL, NULL);
fscache_relinquish_cookie(cookie, false);
err:
kfree(ctx);
return ERR_PTR(ret);
}
static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
{
fscache_unuse_cookie(ctx->cookie, NULL, NULL);
fscache_relinquish_cookie(ctx->cookie, false);
iput(ctx->inode);
kfree(ctx->name);
kfree(ctx);
}
static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb,
char *name, unsigned int flags)
{
struct erofs_fscache *ctx;
struct erofs_domain *domain = EROFS_SB(sb)->domain;
ctx = erofs_fscache_acquire_cookie(sb, name, flags);
if (IS_ERR(ctx))
return ctx;
ctx->name = kstrdup(name, GFP_KERNEL);
if (!ctx->name) {
erofs_fscache_relinquish_cookie(ctx);
return ERR_PTR(-ENOMEM);
}
refcount_inc(&domain->ref);
ctx->domain = domain;
list_add(&ctx->node, &erofs_domain_cookies_list);
return ctx;
}
static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
char *name, unsigned int flags)
{
struct erofs_fscache *ctx;
struct erofs_domain *domain = EROFS_SB(sb)->domain;
flags |= EROFS_REG_COOKIE_SHARE;
mutex_lock(&erofs_domain_cookies_lock);
list_for_each_entry(ctx, &erofs_domain_cookies_list, node) {
if (ctx->domain != domain || strcmp(ctx->name, name))
continue;
if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) {
refcount_inc(&ctx->ref);
} else {
erofs_err(sb, "%s already exists in domain %s", name,
domain->domain_id);
ctx = ERR_PTR(-EEXIST);
}
mutex_unlock(&erofs_domain_cookies_lock);
return ctx;
}
ctx = erofs_domain_init_cookie(sb, name, flags);
mutex_unlock(&erofs_domain_cookies_lock);
return ctx;
}
struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
char *name,
unsigned int flags)
{
if (EROFS_SB(sb)->domain_id)
return erofs_domain_register_cookie(sb, name, flags);
return erofs_fscache_acquire_cookie(sb, name, flags);
}
void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
{
struct erofs_domain *domain = NULL;
if (!ctx)
return;
if (!ctx->domain)
return erofs_fscache_relinquish_cookie(ctx);
mutex_lock(&erofs_domain_cookies_lock);
if (refcount_dec_and_test(&ctx->ref)) {
domain = ctx->domain;
list_del(&ctx->node);
erofs_fscache_relinquish_cookie(ctx);
}
mutex_unlock(&erofs_domain_cookies_lock);
if (domain)
erofs_fscache_domain_put(domain);
}
int erofs_fscache_register_fs(struct super_block *sb)
{
int ret;
struct erofs_sb_info *sbi = EROFS_SB(sb);
struct erofs_fscache *fscache;
unsigned int flags = 0;
if (sbi->domain_id)
ret = erofs_fscache_register_domain(sb);
else
ret = erofs_fscache_register_volume(sb);
if (ret)
return ret;
/*
* When shared domain is enabled, using NEED_NOEXIST to guarantee
* the primary data blob (aka fsid) is unique in the shared domain.
*
* For non-shared-domain case, fscache_acquire_volume() invoked by
* erofs_fscache_register_volume() has already guaranteed
* the uniqueness of primary data blob.
*
* Acquired domain/volume will be relinquished in kill_sb() on error.
*/
if (sbi->domain_id)
flags |= EROFS_REG_COOKIE_NEED_NOEXIST;
fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags);
if (IS_ERR(fscache))
return PTR_ERR(fscache);
sbi->s_fscache = fscache;
return 0;
}
void erofs_fscache_unregister_fs(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
erofs_fscache_unregister_cookie(sbi->s_fscache);
if (sbi->domain)
erofs_fscache_domain_put(sbi->domain);
else
fscache_relinquish_volume(sbi->volume, NULL, false);
sbi->s_fscache = NULL;
sbi->volume = NULL;
sbi->domain = NULL;
}