linux-stable/include/linux/sunrpc/rpc_pipe_fs.h
minoura makoto b18cba09e3 SUNRPC: ensure the matching upcall is in-flight upon downcall
Commit 9130b8dbc6 ("SUNRPC: allow for upcalls for the same uid
but different gss service") introduced `auth` argument to
__gss_find_upcall(), but in gss_pipe_downcall() it was left as NULL
since it (and auth->service) was not (yet) determined.

When multiple upcalls with the same uid and different service are
ongoing, it could happen that __gss_find_upcall(), which returns the
first match found in the pipe->in_downcall list, could not find the
correct gss_msg corresponding to the downcall we are looking for.
Moreover, it might return a msg which is not sent to rpc.gssd yet.

We could see mount.nfs process hung in D state with multiple mount.nfs
are executed in parallel.  The call trace below is of CentOS 7.9
kernel-3.10.0-1160.24.1.el7.x86_64 but we observed the same hang w/
elrepo kernel-ml-6.0.7-1.el7.

PID: 71258  TASK: ffff91ebd4be0000  CPU: 36  COMMAND: "mount.nfs"
 #0 [ffff9203ca3234f8] __schedule at ffffffffa3b8899f
 #1 [ffff9203ca323580] schedule at ffffffffa3b88eb9
 #2 [ffff9203ca323590] gss_cred_init at ffffffffc0355818 [auth_rpcgss]
 #3 [ffff9203ca323658] rpcauth_lookup_credcache at ffffffffc0421ebc
[sunrpc]
 #4 [ffff9203ca3236d8] gss_lookup_cred at ffffffffc0353633 [auth_rpcgss]
 #5 [ffff9203ca3236e8] rpcauth_lookupcred at ffffffffc0421581 [sunrpc]
 #6 [ffff9203ca323740] rpcauth_refreshcred at ffffffffc04223d3 [sunrpc]
 #7 [ffff9203ca3237a0] call_refresh at ffffffffc04103dc [sunrpc]
 #8 [ffff9203ca3237b8] __rpc_execute at ffffffffc041e1c9 [sunrpc]
 #9 [ffff9203ca323820] rpc_execute at ffffffffc0420a48 [sunrpc]

The scenario is like this. Let's say there are two upcalls for
services A and B, A -> B in pipe->in_downcall, B -> A in pipe->pipe.

When rpc.gssd reads pipe to get the upcall msg corresponding to
service B from pipe->pipe and then writes the response, in
gss_pipe_downcall the msg corresponding to service A will be picked
because only uid is used to find the msg and it is before the one for
B in pipe->in_downcall.  And the process waiting for the msg
corresponding to service A will be woken up.

Actual scheduing of that process might be after rpc.gssd processes the
next msg.  In rpc_pipe_generic_upcall it clears msg->errno (for A).
The process is scheduled to see gss_msg->ctx == NULL and
gss_msg->msg.errno == 0, therefore it cannot break the loop in
gss_create_upcall and is never woken up after that.

This patch adds a simple check to ensure that a msg which is not
sent to rpc.gssd yet is not chosen as the matching upcall upon
receiving a downcall.

Signed-off-by: minoura makoto <minoura@valinux.co.jp>
Signed-off-by: Hiroshi Shimamoto <h-shimamoto@nec.com>
Tested-by: Hiroshi Shimamoto <h-shimamoto@nec.com>
Cc: Trond Myklebust <trondmy@hammerspace.com>
Fixes: 9130b8dbc6 ("SUNRPC: allow for upcalls for same uid but different gss service")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
2022-12-15 18:13:53 -05:00

138 lines
3.9 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SUNRPC_RPC_PIPE_FS_H
#define _LINUX_SUNRPC_RPC_PIPE_FS_H
#include <linux/workqueue.h>
struct rpc_pipe_dir_head {
struct list_head pdh_entries;
struct dentry *pdh_dentry;
};
struct rpc_pipe_dir_object_ops;
struct rpc_pipe_dir_object {
struct list_head pdo_head;
const struct rpc_pipe_dir_object_ops *pdo_ops;
void *pdo_data;
};
struct rpc_pipe_dir_object_ops {
int (*create)(struct dentry *dir,
struct rpc_pipe_dir_object *pdo);
void (*destroy)(struct dentry *dir,
struct rpc_pipe_dir_object *pdo);
};
struct rpc_pipe_msg {
struct list_head list;
void *data;
size_t len;
size_t copied;
int errno;
};
struct rpc_pipe_ops {
ssize_t (*upcall)(struct file *, struct rpc_pipe_msg *, char __user *, size_t);
ssize_t (*downcall)(struct file *, const char __user *, size_t);
void (*release_pipe)(struct inode *);
int (*open_pipe)(struct inode *);
void (*destroy_msg)(struct rpc_pipe_msg *);
};
struct rpc_pipe {
struct list_head pipe;
struct list_head in_upcall;
struct list_head in_downcall;
int pipelen;
int nreaders;
int nwriters;
#define RPC_PIPE_WAIT_FOR_OPEN 1
int flags;
struct delayed_work queue_timeout;
const struct rpc_pipe_ops *ops;
spinlock_t lock;
struct dentry *dentry;
};
struct rpc_inode {
struct inode vfs_inode;
void *private;
struct rpc_pipe *pipe;
wait_queue_head_t waitq;
};
static inline struct rpc_inode *
RPC_I(struct inode *inode)
{
return container_of(inode, struct rpc_inode, vfs_inode);
}
enum {
SUNRPC_PIPEFS_NFS_PRIO,
SUNRPC_PIPEFS_RPC_PRIO,
};
extern int rpc_pipefs_notifier_register(struct notifier_block *);
extern void rpc_pipefs_notifier_unregister(struct notifier_block *);
enum {
RPC_PIPEFS_MOUNT,
RPC_PIPEFS_UMOUNT,
};
extern struct dentry *rpc_d_lookup_sb(const struct super_block *sb,
const unsigned char *dir_name);
extern int rpc_pipefs_init_net(struct net *net);
extern void rpc_pipefs_exit_net(struct net *net);
extern struct super_block *rpc_get_sb_net(const struct net *net);
extern void rpc_put_sb_net(const struct net *net);
extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *,
char __user *, size_t);
extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *);
/* returns true if the msg is in-flight, i.e., already eaten by the peer */
static inline bool rpc_msg_is_inflight(const struct rpc_pipe_msg *msg) {
return (msg->copied != 0 && list_empty(&msg->list));
}
struct rpc_clnt;
extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *);
extern int rpc_remove_client_dir(struct rpc_clnt *);
extern void rpc_init_pipe_dir_head(struct rpc_pipe_dir_head *pdh);
extern void rpc_init_pipe_dir_object(struct rpc_pipe_dir_object *pdo,
const struct rpc_pipe_dir_object_ops *pdo_ops,
void *pdo_data);
extern int rpc_add_pipe_dir_object(struct net *net,
struct rpc_pipe_dir_head *pdh,
struct rpc_pipe_dir_object *pdo);
extern void rpc_remove_pipe_dir_object(struct net *net,
struct rpc_pipe_dir_head *pdh,
struct rpc_pipe_dir_object *pdo);
extern struct rpc_pipe_dir_object *rpc_find_or_alloc_pipe_dir_object(
struct net *net,
struct rpc_pipe_dir_head *pdh,
int (*match)(struct rpc_pipe_dir_object *, void *),
struct rpc_pipe_dir_object *(*alloc)(void *),
void *data);
struct cache_detail;
extern struct dentry *rpc_create_cache_dir(struct dentry *,
const char *,
umode_t umode,
struct cache_detail *);
extern void rpc_remove_cache_dir(struct dentry *);
struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags);
void rpc_destroy_pipe_data(struct rpc_pipe *pipe);
extern struct dentry *rpc_mkpipe_dentry(struct dentry *, const char *, void *,
struct rpc_pipe *);
extern int rpc_unlink(struct dentry *);
extern int register_rpc_pipefs(void);
extern void unregister_rpc_pipefs(void);
extern bool gssd_running(struct net *net);
#endif