ceph: fix potential use-after-free bug when trimming caps

commit aaf67de788 upstream.

When trimming the caps and just after the 'session->s_cap_lock' is
released in ceph_iterate_session_caps() the cap maybe removed by
another thread, and when using the stale cap memory in the callbacks
it will trigger use-after-free crash.

We need to check the existence of the cap just after the 'ci->i_ceph_lock'
being acquired. And do nothing if it's already removed.

Cc: stable@vger.kernel.org
Link: https://tracker.ceph.com/issues/43272
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Reviewed-by: Luís Henriques <lhenriques@suse.de>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Xiubo Li 2023-04-19 10:39:14 +08:00 committed by Greg Kroah-Hartman
parent 7e264f67b7
commit 448875a73e
5 changed files with 61 additions and 34 deletions

View File

@ -430,7 +430,7 @@ void ceph_reservation_status(struct ceph_fs_client *fsc,
* *
* Called with i_ceph_lock held. * Called with i_ceph_lock held.
*/ */
static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds) struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
{ {
struct ceph_cap *cap; struct ceph_cap *cap;
struct rb_node *n = ci->i_caps.rb_node; struct rb_node *n = ci->i_caps.rb_node;

View File

@ -248,14 +248,20 @@ static int metrics_caps_show(struct seq_file *s, void *p)
return 0; return 0;
} }
static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p) static int caps_show_cb(struct inode *inode, int mds, void *p)
{ {
struct ceph_inode_info *ci = ceph_inode(inode);
struct seq_file *s = p; struct seq_file *s = p;
struct ceph_cap *cap;
seq_printf(s, "0x%-17llx%-3d%-17s%-17s\n", ceph_ino(inode), spin_lock(&ci->i_ceph_lock);
cap->session->s_mds, cap = __get_cap_for_mds(ci, mds);
ceph_cap_string(cap->issued), if (cap)
ceph_cap_string(cap->implemented)); seq_printf(s, "0x%-17llx%-3d%-17s%-17s\n", ceph_ino(inode),
cap->session->s_mds,
ceph_cap_string(cap->issued),
ceph_cap_string(cap->implemented));
spin_unlock(&ci->i_ceph_lock);
return 0; return 0;
} }

View File

@ -1632,8 +1632,8 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
* Caller must hold session s_mutex. * Caller must hold session s_mutex.
*/ */
int ceph_iterate_session_caps(struct ceph_mds_session *session, int ceph_iterate_session_caps(struct ceph_mds_session *session,
int (*cb)(struct inode *, struct ceph_cap *, int (*cb)(struct inode *, int mds, void *),
void *), void *arg) void *arg)
{ {
struct list_head *p; struct list_head *p;
struct ceph_cap *cap; struct ceph_cap *cap;
@ -1645,6 +1645,8 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
spin_lock(&session->s_cap_lock); spin_lock(&session->s_cap_lock);
p = session->s_caps.next; p = session->s_caps.next;
while (p != &session->s_caps) { while (p != &session->s_caps) {
int mds;
cap = list_entry(p, struct ceph_cap, session_caps); cap = list_entry(p, struct ceph_cap, session_caps);
inode = igrab(&cap->ci->netfs.inode); inode = igrab(&cap->ci->netfs.inode);
if (!inode) { if (!inode) {
@ -1652,6 +1654,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
continue; continue;
} }
session->s_cap_iterator = cap; session->s_cap_iterator = cap;
mds = cap->mds;
spin_unlock(&session->s_cap_lock); spin_unlock(&session->s_cap_lock);
if (last_inode) { if (last_inode) {
@ -1663,7 +1666,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
old_cap = NULL; old_cap = NULL;
} }
ret = cb(inode, cap, arg); ret = cb(inode, mds, arg);
last_inode = inode; last_inode = inode;
spin_lock(&session->s_cap_lock); spin_lock(&session->s_cap_lock);
@ -1696,20 +1699,25 @@ out:
return ret; return ret;
} }
static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, static int remove_session_caps_cb(struct inode *inode, int mds, void *arg)
void *arg)
{ {
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
bool invalidate = false; bool invalidate = false;
int iputs; struct ceph_cap *cap;
int iputs = 0;
dout("removing cap %p, ci is %p, inode is %p\n",
cap, ci, &ci->netfs.inode);
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
iputs = ceph_purge_inode_cap(inode, cap, &invalidate); cap = __get_cap_for_mds(ci, mds);
if (cap) {
dout(" removing cap %p, ci is %p, inode is %p\n",
cap, ci, &ci->netfs.inode);
iputs = ceph_purge_inode_cap(inode, cap, &invalidate);
}
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
wake_up_all(&ci->i_cap_wq); if (cap)
wake_up_all(&ci->i_cap_wq);
if (invalidate) if (invalidate)
ceph_queue_invalidate(inode); ceph_queue_invalidate(inode);
while (iputs--) while (iputs--)
@ -1780,8 +1788,7 @@ enum {
* *
* caller must hold s_mutex. * caller must hold s_mutex.
*/ */
static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap, static int wake_up_session_cb(struct inode *inode, int mds, void *arg)
void *arg)
{ {
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
unsigned long ev = (unsigned long)arg; unsigned long ev = (unsigned long)arg;
@ -1792,12 +1799,14 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
ci->i_requested_max_size = 0; ci->i_requested_max_size = 0;
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
} else if (ev == RENEWCAPS) { } else if (ev == RENEWCAPS) {
if (cap->cap_gen < atomic_read(&cap->session->s_cap_gen)) { struct ceph_cap *cap;
/* mds did not re-issue stale cap */
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
cap = __get_cap_for_mds(ci, mds);
/* mds did not re-issue stale cap */
if (cap && cap->cap_gen < atomic_read(&cap->session->s_cap_gen))
cap->issued = cap->implemented = CEPH_CAP_PIN; cap->issued = cap->implemented = CEPH_CAP_PIN;
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
}
} else if (ev == FORCE_RO) { } else if (ev == FORCE_RO) {
} }
wake_up_all(&ci->i_cap_wq); wake_up_all(&ci->i_cap_wq);
@ -1959,16 +1968,22 @@ out:
* Yes, this is a bit sloppy. Our only real goal here is to respond to * Yes, this is a bit sloppy. Our only real goal here is to respond to
* memory pressure from the MDS, though, so it needn't be perfect. * memory pressure from the MDS, though, so it needn't be perfect.
*/ */
static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) static int trim_caps_cb(struct inode *inode, int mds, void *arg)
{ {
int *remaining = arg; int *remaining = arg;
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
int used, wanted, oissued, mine; int used, wanted, oissued, mine;
struct ceph_cap *cap;
if (*remaining <= 0) if (*remaining <= 0)
return -1; return -1;
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
cap = __get_cap_for_mds(ci, mds);
if (!cap) {
spin_unlock(&ci->i_ceph_lock);
return 0;
}
mine = cap->issued | cap->implemented; mine = cap->issued | cap->implemented;
used = __ceph_caps_used(ci); used = __ceph_caps_used(ci);
wanted = __ceph_caps_file_wanted(ci); wanted = __ceph_caps_file_wanted(ci);
@ -3911,26 +3926,22 @@ out_unlock:
/* /*
* Encode information about a cap for a reconnect with the MDS. * Encode information about a cap for a reconnect with the MDS.
*/ */
static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, static int reconnect_caps_cb(struct inode *inode, int mds, void *arg)
void *arg)
{ {
union { union {
struct ceph_mds_cap_reconnect v2; struct ceph_mds_cap_reconnect v2;
struct ceph_mds_cap_reconnect_v1 v1; struct ceph_mds_cap_reconnect_v1 v1;
} rec; } rec;
struct ceph_inode_info *ci = cap->ci; struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_reconnect_state *recon_state = arg; struct ceph_reconnect_state *recon_state = arg;
struct ceph_pagelist *pagelist = recon_state->pagelist; struct ceph_pagelist *pagelist = recon_state->pagelist;
struct dentry *dentry; struct dentry *dentry;
struct ceph_cap *cap;
char *path; char *path;
int pathlen = 0, err; int pathlen = 0, err = 0;
u64 pathbase; u64 pathbase;
u64 snap_follows; u64 snap_follows;
dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
inode, ceph_vinop(inode), cap, cap->cap_id,
ceph_cap_string(cap->issued));
dentry = d_find_primary(inode); dentry = d_find_primary(inode);
if (dentry) { if (dentry) {
/* set pathbase to parent dir when msg_version >= 2 */ /* set pathbase to parent dir when msg_version >= 2 */
@ -3947,6 +3958,15 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
} }
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
cap = __get_cap_for_mds(ci, mds);
if (!cap) {
spin_unlock(&ci->i_ceph_lock);
goto out_err;
}
dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
inode, ceph_vinop(inode), cap, cap->cap_id,
ceph_cap_string(cap->issued));
cap->seq = 0; /* reset cap seq */ cap->seq = 0; /* reset cap seq */
cap->issue_seq = 0; /* and issue_seq */ cap->issue_seq = 0; /* and issue_seq */
cap->mseq = 0; /* and migrate_seq */ cap->mseq = 0; /* and migrate_seq */

View File

@ -541,8 +541,7 @@ extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc); extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc);
extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr); extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr);
extern int ceph_iterate_session_caps(struct ceph_mds_session *session, extern int ceph_iterate_session_caps(struct ceph_mds_session *session,
int (*cb)(struct inode *, int (*cb)(struct inode *, int mds, void *),
struct ceph_cap *, void *),
void *arg); void *arg);
extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);

View File

@ -1192,6 +1192,8 @@ extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session); struct ceph_mds_session *session);
void ceph_kick_flushing_inode_caps(struct ceph_mds_session *session, void ceph_kick_flushing_inode_caps(struct ceph_mds_session *session,
struct ceph_inode_info *ci); struct ceph_inode_info *ci);
extern struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci,
int mds);
extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
int mds); int mds);
extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps, extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,