mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-29 13:53:33 +00:00
ceph: fix possible deadlock when holding Fwb to get inline_data
[ Upstream commit 825978fd6a
]
1, mount with wsync.
2, create a file with O_RDWR, and the request was sent to mds.0:
ceph_atomic_open()-->
ceph_mdsc_do_request(openc)
finish_open(file, dentry, ceph_open)-->
ceph_open()-->
ceph_init_file()-->
ceph_init_file_info()-->
ceph_uninline_data()-->
{
...
if (inline_version == 1 || /* initial version, no data */
inline_version == CEPH_INLINE_NONE)
goto out_unlock;
...
}
The inline_version will be 1, which is the initial version for the
new create file. And here the ci->i_inline_version will keep with 1,
it's buggy.
3, buffer write to the file immediately:
ceph_write_iter()-->
ceph_get_caps(file, need=Fw, want=Fb, ...);
generic_perform_write()-->
a_ops->write_begin()-->
ceph_write_begin()-->
netfs_write_begin()-->
netfs_begin_read()-->
netfs_rreq_submit_slice()-->
netfs_read_from_server()-->
rreq->netfs_ops->issue_read()-->
ceph_netfs_issue_read()-->
{
...
if (ci->i_inline_version != CEPH_INLINE_NONE &&
ceph_netfs_issue_op_inline(subreq))
return;
...
}
ceph_put_cap_refs(ci, Fwb);
The ceph_netfs_issue_op_inline() will send a getattr(Fsr) request to
mds.1.
4, then the mds.1 will request the rd lock for CInode::filelock from
the auth mds.0, the mds.0 will do the CInode::filelock state transation
from excl --> sync, but it need to revoke the Fxwb caps back from the
clients.
While the kernel client has aleady held the Fwb caps and waiting for
the getattr(Fsr).
It's deadlock!
URL: https://tracker.ceph.com/issues/55377
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
62592a3314
commit
292b7a7275
1 changed files with 23 additions and 18 deletions
|
@ -1644,7 +1644,7 @@ int ceph_uninline_data(struct file *file)
|
|||
struct inode *inode = file_inode(file);
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
|
||||
struct ceph_osd_request *req;
|
||||
struct ceph_osd_request *req = NULL;
|
||||
struct ceph_cap_flush *prealloc_cf;
|
||||
struct folio *folio = NULL;
|
||||
u64 inline_version = CEPH_INLINE_NONE;
|
||||
|
@ -1652,18 +1652,6 @@ int ceph_uninline_data(struct file *file)
|
|||
int err = 0;
|
||||
u64 len;
|
||||
|
||||
prealloc_cf = ceph_alloc_cap_flush();
|
||||
if (!prealloc_cf)
|
||||
return -ENOMEM;
|
||||
|
||||
folio = read_mapping_folio(inode->i_mapping, 0, file);
|
||||
if (IS_ERR(folio)) {
|
||||
err = PTR_ERR(folio);
|
||||
goto out;
|
||||
}
|
||||
|
||||
folio_lock(folio);
|
||||
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
inline_version = ci->i_inline_version;
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
|
@ -1671,9 +1659,23 @@ int ceph_uninline_data(struct file *file)
|
|||
dout("uninline_data %p %llx.%llx inline_version %llu\n",
|
||||
inode, ceph_vinop(inode), inline_version);
|
||||
|
||||
if (inline_version == 1 || /* initial version, no data */
|
||||
inline_version == CEPH_INLINE_NONE)
|
||||
goto out_unlock;
|
||||
if (inline_version == CEPH_INLINE_NONE)
|
||||
return 0;
|
||||
|
||||
prealloc_cf = ceph_alloc_cap_flush();
|
||||
if (!prealloc_cf)
|
||||
return -ENOMEM;
|
||||
|
||||
if (inline_version == 1) /* initial version, no data */
|
||||
goto out_uninline;
|
||||
|
||||
folio = read_mapping_folio(inode->i_mapping, 0, file);
|
||||
if (IS_ERR(folio)) {
|
||||
err = PTR_ERR(folio);
|
||||
goto out;
|
||||
}
|
||||
|
||||
folio_lock(folio);
|
||||
|
||||
len = i_size_read(inode);
|
||||
if (len > folio_size(folio))
|
||||
|
@ -1739,6 +1741,7 @@ int ceph_uninline_data(struct file *file)
|
|||
ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
|
||||
req->r_end_latency, len, err);
|
||||
|
||||
out_uninline:
|
||||
if (!err) {
|
||||
int dirty;
|
||||
|
||||
|
@ -1757,8 +1760,10 @@ int ceph_uninline_data(struct file *file)
|
|||
if (err == -ECANCELED)
|
||||
err = 0;
|
||||
out_unlock:
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
if (folio) {
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
}
|
||||
out:
|
||||
ceph_free_cap_flush(prealloc_cf);
|
||||
dout("uninline_data %p %llx.%llx inline_version %llu = %d\n",
|
||||
|
|
Loading…
Reference in a new issue