linux-stable/net/phonet/socket.c

772 lines
16 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* File: socket.c
*
* Phonet sockets
*
* Copyright (C) 2008 Nokia Corporation.
*
* Authors: Sakari Ailus <sakari.ailus@nokia.com>
* Rémi Denis-Courmont
*/
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 08:04:11 +00:00
#include <linux/gfp.h>
#include <linux/kernel.h>
#include <linux/net.h>
#include <linux/poll.h>
#include <linux/sched/signal.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <linux/phonet.h>
#include <linux/export.h>
#include <net/phonet/phonet.h>
#include <net/phonet/pep.h>
#include <net/phonet/pn_dev.h>
static int pn_socket_release(struct socket *sock)
{
struct sock *sk = sock->sk;
if (sk) {
sock->sk = NULL;
sk->sk_prot->close(sk, 0);
}
return 0;
}
#define PN_HASHSIZE 16
#define PN_HASHMASK (PN_HASHSIZE-1)
static struct {
struct hlist_head hlist[PN_HASHSIZE];
struct mutex lock;
} pnsocks;
void __init pn_sock_init(void)
{
unsigned int i;
for (i = 0; i < PN_HASHSIZE; i++)
INIT_HLIST_HEAD(pnsocks.hlist + i);
mutex_init(&pnsocks.lock);
}
static struct hlist_head *pn_hash_list(u16 obj)
{
return pnsocks.hlist + (obj & PN_HASHMASK);
}
/*
* Find address based on socket address, match only certain fields.
* Also grab sock if it was found. Remember to sock_put it later.
*/
struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn)
{
struct sock *sknode;
struct sock *rval = NULL;
u16 obj = pn_sockaddr_get_object(spn);
u8 res = spn->spn_resource;
struct hlist_head *hlist = pn_hash_list(obj);
rcu_read_lock();
hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-28 01:06:00 +00:00
sk_for_each_rcu(sknode, hlist) {
struct pn_sock *pn = pn_sk(sknode);
BUG_ON(!pn->sobject); /* unbound socket */
if (!net_eq(sock_net(sknode), net))
continue;
if (pn_port(obj)) {
/* Look up socket by port */
if (pn_port(pn->sobject) != pn_port(obj))
continue;
} else {
/* If port is zero, look up by resource */
if (pn->resource != res)
continue;
}
if (pn_addr(pn->sobject) &&
pn_addr(pn->sobject) != pn_addr(obj))
continue;
rval = sknode;
sock_hold(sknode);
break;
}
rcu_read_unlock();
return rval;
}
/* Deliver a broadcast packet (only in bottom-half) */
void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb)
{
struct hlist_head *hlist = pnsocks.hlist;
unsigned int h;
rcu_read_lock();
for (h = 0; h < PN_HASHSIZE; h++) {
struct sock *sknode;
hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-28 01:06:00 +00:00
sk_for_each(sknode, hlist) {
struct sk_buff *clone;
if (!net_eq(sock_net(sknode), net))
continue;
if (!sock_flag(sknode, SOCK_BROADCAST))
continue;
clone = skb_clone(skb, GFP_ATOMIC);
if (clone) {
sock_hold(sknode);
sk_receive_skb(sknode, clone, 0);
}
}
hlist++;
}
rcu_read_unlock();
}
int pn_sock_hash(struct sock *sk)
{
struct hlist_head *hlist = pn_hash_list(pn_sk(sk)->sobject);
mutex_lock(&pnsocks.lock);
sk_add_node_rcu(sk, hlist);
mutex_unlock(&pnsocks.lock);
return 0;
}
EXPORT_SYMBOL(pn_sock_hash);
void pn_sock_unhash(struct sock *sk)
{
mutex_lock(&pnsocks.lock);
sk_del_node_init_rcu(sk);
mutex_unlock(&pnsocks.lock);
pn_sock_unbind_all_res(sk);
synchronize_rcu();
}
EXPORT_SYMBOL(pn_sock_unhash);
static DEFINE_MUTEX(port_mutex);
static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len)
{
struct sock *sk = sock->sk;
struct pn_sock *pn = pn_sk(sk);
struct sockaddr_pn *spn = (struct sockaddr_pn *)addr;
int err;
u16 handle;
u8 saddr;
if (sk->sk_prot->bind)
return sk->sk_prot->bind(sk, addr, len);
if (len < sizeof(struct sockaddr_pn))
return -EINVAL;
if (spn->spn_family != AF_PHONET)
return -EAFNOSUPPORT;
handle = pn_sockaddr_get_object((struct sockaddr_pn *)addr);
saddr = pn_addr(handle);
if (saddr && phonet_address_lookup(sock_net(sk), saddr))
return -EADDRNOTAVAIL;
lock_sock(sk);
if (sk->sk_state != TCP_CLOSE || pn_port(pn->sobject)) {
err = -EINVAL; /* attempt to rebind */
goto out;
}
WARN_ON(sk_hashed(sk));
mutex_lock(&port_mutex);
err = sk->sk_prot->get_port(sk, pn_port(handle));
if (err)
goto out_port;
/* get_port() sets the port, bind() sets the address if applicable */
pn->sobject = pn_object(saddr, pn_port(pn->sobject));
pn->resource = spn->spn_resource;
/* Enable RX on the socket */
err = sk->sk_prot->hash(sk);
out_port:
mutex_unlock(&port_mutex);
out:
release_sock(sk);
return err;
}
static int pn_socket_autobind(struct socket *sock)
{
struct sockaddr_pn sa;
int err;
memset(&sa, 0, sizeof(sa));
sa.spn_family = AF_PHONET;
err = pn_socket_bind(sock, (struct sockaddr *)&sa,
sizeof(struct sockaddr_pn));
if (err != -EINVAL)
return err;
BUG_ON(!pn_port(pn_sk(sock->sk)->sobject));
return 0; /* socket was already bound */
}
static int pn_socket_connect(struct socket *sock, struct sockaddr *addr,
int len, int flags)
{
struct sock *sk = sock->sk;
struct pn_sock *pn = pn_sk(sk);
struct sockaddr_pn *spn = (struct sockaddr_pn *)addr;
struct task_struct *tsk = current;
long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
int err;
if (pn_socket_autobind(sock))
return -ENOBUFS;
if (len < sizeof(struct sockaddr_pn))
return -EINVAL;
if (spn->spn_family != AF_PHONET)
return -EAFNOSUPPORT;
lock_sock(sk);
switch (sock->state) {
case SS_UNCONNECTED:
if (sk->sk_state != TCP_CLOSE) {
err = -EISCONN;
goto out;
}
break;
case SS_CONNECTING:
err = -EALREADY;
goto out;
default:
err = -EISCONN;
goto out;
}
pn->dobject = pn_sockaddr_get_object(spn);
pn->resource = pn_sockaddr_get_resource(spn);
sock->state = SS_CONNECTING;
err = sk->sk_prot->connect(sk, addr, len);
if (err) {
sock->state = SS_UNCONNECTED;
pn->dobject = 0;
goto out;
}
while (sk->sk_state == TCP_SYN_SENT) {
DEFINE_WAIT(wait);
if (!timeo) {
err = -EINPROGRESS;
goto out;
}
if (signal_pending(tsk)) {
err = sock_intr_errno(timeo);
goto out;
}
prepare_to_wait_exclusive(sk_sleep(sk), &wait,
TASK_INTERRUPTIBLE);
release_sock(sk);
timeo = schedule_timeout(timeo);
lock_sock(sk);
finish_wait(sk_sleep(sk), &wait);
}
if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED))
err = 0;
else if (sk->sk_state == TCP_CLOSE_WAIT)
err = -ECONNRESET;
else
err = -ECONNREFUSED;
sock->state = err ? SS_UNCONNECTED : SS_CONNECTED;
out:
release_sock(sk);
return err;
}
static int pn_socket_accept(struct socket *sock, struct socket *newsock,
struct proto_accept_arg *arg)
{
struct sock *sk = sock->sk;
struct sock *newsk;
if (unlikely(sk->sk_state != TCP_LISTEN))
return -EINVAL;
newsk = sk->sk_prot->accept(sk, arg);
if (!newsk)
return arg->err;
lock_sock(newsk);
sock_graft(newsk, newsock);
newsock->state = SS_CONNECTED;
release_sock(newsk);
return 0;
}
static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
net: make getname() functions return length rather than use int* parameter Changes since v1: Added changes in these files: drivers/infiniband/hw/usnic/usnic_transport.c drivers/staging/lustre/lnet/lnet/lib-socket.c drivers/target/iscsi/iscsi_target_login.c drivers/vhost/net.c fs/dlm/lowcomms.c fs/ocfs2/cluster/tcp.c security/tomoyo/network.c Before: All these functions either return a negative error indicator, or store length of sockaddr into "int *socklen" parameter and return zero on success. "int *socklen" parameter is awkward. For example, if caller does not care, it still needs to provide on-stack storage for the value it does not need. None of the many FOO_getname() functions of various protocols ever used old value of *socklen. They always just overwrite it. This change drops this parameter, and makes all these functions, on success, return length of sockaddr. It's always >= 0 and can be differentiated from an error. Tests in callers are changed from "if (err)" to "if (err < 0)", where needed. rpc_sockname() lost "int buflen" parameter, since its only use was to be passed to kernel_getsockname() as &buflen and subsequently not used in any way. Userspace API is not changed. text data bss dec hex filename 30108430 2633624 873672 33615726 200ef6e vmlinux.before.o 30108109 2633612 873672 33615393 200ee21 vmlinux.o Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com> CC: David S. Miller <davem@davemloft.net> CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org CC: linux-bluetooth@vger.kernel.org CC: linux-decnet-user@lists.sourceforge.net CC: linux-wireless@vger.kernel.org CC: linux-rdma@vger.kernel.org CC: linux-sctp@vger.kernel.org CC: linux-nfs@vger.kernel.org CC: linux-x25@vger.kernel.org Signed-off-by: David S. Miller <davem@davemloft.net>
2018-02-12 19:00:20 +00:00
int peer)
{
struct sock *sk = sock->sk;
struct pn_sock *pn = pn_sk(sk);
memset(addr, 0, sizeof(struct sockaddr_pn));
addr->sa_family = AF_PHONET;
if (!peer) /* Race with bind() here is userland's problem. */
pn_sockaddr_set_object((struct sockaddr_pn *)addr,
pn->sobject);
net: make getname() functions return length rather than use int* parameter Changes since v1: Added changes in these files: drivers/infiniband/hw/usnic/usnic_transport.c drivers/staging/lustre/lnet/lnet/lib-socket.c drivers/target/iscsi/iscsi_target_login.c drivers/vhost/net.c fs/dlm/lowcomms.c fs/ocfs2/cluster/tcp.c security/tomoyo/network.c Before: All these functions either return a negative error indicator, or store length of sockaddr into "int *socklen" parameter and return zero on success. "int *socklen" parameter is awkward. For example, if caller does not care, it still needs to provide on-stack storage for the value it does not need. None of the many FOO_getname() functions of various protocols ever used old value of *socklen. They always just overwrite it. This change drops this parameter, and makes all these functions, on success, return length of sockaddr. It's always >= 0 and can be differentiated from an error. Tests in callers are changed from "if (err)" to "if (err < 0)", where needed. rpc_sockname() lost "int buflen" parameter, since its only use was to be passed to kernel_getsockname() as &buflen and subsequently not used in any way. Userspace API is not changed. text data bss dec hex filename 30108430 2633624 873672 33615726 200ef6e vmlinux.before.o 30108109 2633612 873672 33615393 200ee21 vmlinux.o Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com> CC: David S. Miller <davem@davemloft.net> CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org CC: linux-bluetooth@vger.kernel.org CC: linux-decnet-user@lists.sourceforge.net CC: linux-wireless@vger.kernel.org CC: linux-rdma@vger.kernel.org CC: linux-sctp@vger.kernel.org CC: linux-nfs@vger.kernel.org CC: linux-x25@vger.kernel.org Signed-off-by: David S. Miller <davem@davemloft.net>
2018-02-12 19:00:20 +00:00
return sizeof(struct sockaddr_pn);
}
static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
struct pep_sock *pn = pep_sk(sk);
__poll_t mask = 0;
poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_state == TCP_CLOSE)
return EPOLLERR;
if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
if (!skb_queue_empty_lockless(&pn->ctrlreq_queue))
mask |= EPOLLPRI;
if (!mask && sk->sk_state == TCP_CLOSE_WAIT)
return EPOLLHUP;
if (sk->sk_state == TCP_ESTABLISHED &&
refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf &&
atomic_read(&pn->tx_credits))
mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
return mask;
}
static int pn_socket_ioctl(struct socket *sock, unsigned int cmd,
unsigned long arg)
{
struct sock *sk = sock->sk;
struct pn_sock *pn = pn_sk(sk);
if (cmd == SIOCPNGETOBJECT) {
struct net_device *dev;
u16 handle;
u8 saddr;
if (get_user(handle, (__u16 __user *)arg))
return -EFAULT;
lock_sock(sk);
if (sk->sk_bound_dev_if)
dev = dev_get_by_index(sock_net(sk),
sk->sk_bound_dev_if);
else
dev = phonet_device_get(sock_net(sk));
if (dev && (dev->flags & IFF_UP))
saddr = phonet_address_get(dev, pn_addr(handle));
else
saddr = PN_NO_ADDR;
release_sock(sk);
dev_put(dev);
if (saddr == PN_NO_ADDR)
return -EHOSTUNREACH;
handle = pn_object(saddr, pn_port(pn->sobject));
return put_user(handle, (__u16 __user *)arg);
}
net: ioctl: Use kernel memory on protocol ioctl callbacks Most of the ioctls to net protocols operates directly on userspace argument (arg). Usually doing get_user()/put_user() directly in the ioctl callback. This is not flexible, because it is hard to reuse these functions without passing userspace buffers. Change the "struct proto" ioctls to avoid touching userspace memory and operate on kernel buffers, i.e., all protocol's ioctl callbacks is adapted to operate on a kernel memory other than on userspace (so, no more {put,get}_user() and friends being called in the ioctl callback). This changes the "struct proto" ioctl format in the following way: int (*ioctl)(struct sock *sk, int cmd, - unsigned long arg); + int *karg); (Important to say that this patch does not touch the "struct proto_ops" protocols) So, the "karg" argument, which is passed to the ioctl callback, is a pointer allocated to kernel space memory (inside a function wrapper). This buffer (karg) may contain input argument (copied from userspace in a prep function) and it might return a value/buffer, which is copied back to userspace if necessary. There is not one-size-fits-all format (that is I am using 'may' above), but basically, there are three type of ioctls: 1) Do not read from userspace, returns a result to userspace 2) Read an input parameter from userspace, and does not return anything to userspace 3) Read an input from userspace, and return a buffer to userspace. The default case (1) (where no input parameter is given, and an "int" is returned to userspace) encompasses more than 90% of the cases, but there are two other exceptions. Here is a list of exceptions: * Protocol RAW: * cmd = SIOCGETVIFCNT: * input and output = struct sioc_vif_req * cmd = SIOCGETSGCNT * input and output = struct sioc_sg_req * Explanation: for the SIOCGETVIFCNT case, userspace passes the input argument, which is struct sioc_vif_req. Then the callback populates the struct, which is copied back to userspace. * Protocol RAW6: * cmd = SIOCGETMIFCNT_IN6 * input and output = struct sioc_mif_req6 * cmd = SIOCGETSGCNT_IN6 * input and output = struct sioc_sg_req6 * Protocol PHONET: * cmd == SIOCPNADDRESOURCE | SIOCPNDELRESOURCE * input int (4 bytes) * Nothing is copied back to userspace. For the exception cases, functions sock_sk_ioctl_inout() will copy the userspace input, and copy it back to kernel space. The wrapper that prepare the buffer and put the buffer back to user is sk_ioctl(), so, instead of calling sk->sk_prot->ioctl(), the callee now calls sk_ioctl(), which will handle all cases. Signed-off-by: Breno Leitao <leitao@debian.org> Reviewed-by: Willem de Bruijn <willemb@google.com> Reviewed-by: David Ahern <dsahern@kernel.org> Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com> Link: https://lore.kernel.org/r/20230609152800.830401-1-leitao@debian.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-06-09 15:27:42 +00:00
return sk_ioctl(sk, cmd, (void __user *)arg);
}
static int pn_socket_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
int err = 0;
if (pn_socket_autobind(sock))
return -ENOBUFS;
lock_sock(sk);
if (sock->state != SS_UNCONNECTED) {
err = -EINVAL;
goto out;
}
if (sk->sk_state != TCP_LISTEN) {
sk->sk_state = TCP_LISTEN;
sk->sk_ack_backlog = 0;
}
sk->sk_max_ack_backlog = backlog;
out:
release_sock(sk);
return err;
}
static int pn_socket_sendmsg(struct socket *sock, struct msghdr *m,
size_t total_len)
{
struct sock *sk = sock->sk;
if (pn_socket_autobind(sock))
return -EAGAIN;
return sk->sk_prot->sendmsg(sk, m, total_len);
}
const struct proto_ops phonet_dgram_ops = {
.family = AF_PHONET,
.owner = THIS_MODULE,
.release = pn_socket_release,
.bind = pn_socket_bind,
.connect = sock_no_connect,
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = pn_socket_getname,
.poll = datagram_poll,
.ioctl = pn_socket_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.sendmsg = pn_socket_sendmsg,
.recvmsg = sock_common_recvmsg,
.mmap = sock_no_mmap,
};
const struct proto_ops phonet_stream_ops = {
.family = AF_PHONET,
.owner = THIS_MODULE,
.release = pn_socket_release,
.bind = pn_socket_bind,
.connect = pn_socket_connect,
.socketpair = sock_no_socketpair,
.accept = pn_socket_accept,
.getname = pn_socket_getname,
.poll = pn_socket_poll,
.ioctl = pn_socket_ioctl,
.listen = pn_socket_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_common_setsockopt,
.getsockopt = sock_common_getsockopt,
.sendmsg = pn_socket_sendmsg,
.recvmsg = sock_common_recvmsg,
.mmap = sock_no_mmap,
};
EXPORT_SYMBOL(phonet_stream_ops);
/* allocate port for a socket */
int pn_sock_get_port(struct sock *sk, unsigned short sport)
{
static int port_cur;
struct net *net = sock_net(sk);
struct pn_sock *pn = pn_sk(sk);
struct sockaddr_pn try_sa;
struct sock *tmpsk;
memset(&try_sa, 0, sizeof(struct sockaddr_pn));
try_sa.spn_family = AF_PHONET;
WARN_ON(!mutex_is_locked(&port_mutex));
if (!sport) {
/* search free port */
int port, pmin, pmax;
phonet_get_local_port_range(&pmin, &pmax);
for (port = pmin; port <= pmax; port++) {
port_cur++;
if (port_cur < pmin || port_cur > pmax)
port_cur = pmin;
pn_sockaddr_set_port(&try_sa, port_cur);
tmpsk = pn_find_sock_by_sa(net, &try_sa);
if (tmpsk == NULL) {
sport = port_cur;
goto found;
} else
sock_put(tmpsk);
}
} else {
/* try to find specific port */
pn_sockaddr_set_port(&try_sa, sport);
tmpsk = pn_find_sock_by_sa(net, &try_sa);
if (tmpsk == NULL)
/* No sock there! We can use that port... */
goto found;
else
sock_put(tmpsk);
}
/* the port must be in use already */
return -EADDRINUSE;
found:
pn->sobject = pn_object(pn_addr(pn->sobject), sport);
return 0;
}
EXPORT_SYMBOL(pn_sock_get_port);
#ifdef CONFIG_PROC_FS
static struct sock *pn_sock_get_idx(struct seq_file *seq, loff_t pos)
{
struct net *net = seq_file_net(seq);
struct hlist_head *hlist = pnsocks.hlist;
struct sock *sknode;
unsigned int h;
for (h = 0; h < PN_HASHSIZE; h++) {
hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-28 01:06:00 +00:00
sk_for_each_rcu(sknode, hlist) {
if (!net_eq(net, sock_net(sknode)))
continue;
if (!pos)
return sknode;
pos--;
}
hlist++;
}
return NULL;
}
static struct sock *pn_sock_get_next(struct seq_file *seq, struct sock *sk)
{
struct net *net = seq_file_net(seq);
do
sk = sk_next(sk);
while (sk && !net_eq(net, sock_net(sk)));
return sk;
}
static void *pn_sock_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(rcu)
{
rcu_read_lock();
return *pos ? pn_sock_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
static void *pn_sock_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct sock *sk;
if (v == SEQ_START_TOKEN)
sk = pn_sock_get_idx(seq, 0);
else
sk = pn_sock_get_next(seq, v);
(*pos)++;
return sk;
}
static void pn_sock_seq_stop(struct seq_file *seq, void *v)
__releases(rcu)
{
rcu_read_unlock();
}
static int pn_sock_seq_show(struct seq_file *seq, void *v)
{
seq_setwidth(seq, 127);
if (v == SEQ_START_TOKEN)
seq_puts(seq, "pt loc rem rs st tx_queue rx_queue "
" uid inode ref pointer drops");
else {
struct sock *sk = v;
struct pn_sock *pn = pn_sk(sk);
seq_printf(seq, "%2d %04X:%04X:%02X %02X %08X:%08X %5d %lu "
"%d %pK %u",
sk->sk_protocol, pn->sobject, pn->dobject,
pn->resource, sk->sk_state,
sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk),
from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
sock_i_ino(sk),
refcount_read(&sk->sk_refcnt), sk,
atomic_read(&sk->sk_drops));
}
seq_pad(seq, '\n');
return 0;
}
const struct seq_operations pn_sock_seq_ops = {
.start = pn_sock_seq_start,
.next = pn_sock_seq_next,
.stop = pn_sock_seq_stop,
.show = pn_sock_seq_show,
};
#endif
static struct {
struct sock *sk[256];
} pnres;
/*
* Find and hold socket based on resource.
*/
struct sock *pn_find_sock_by_res(struct net *net, u8 res)
{
struct sock *sk;
if (!net_eq(net, &init_net))
return NULL;
rcu_read_lock();
sk = rcu_dereference(pnres.sk[res]);
if (sk)
sock_hold(sk);
rcu_read_unlock();
return sk;
}
static DEFINE_MUTEX(resource_mutex);
int pn_sock_bind_res(struct sock *sk, u8 res)
{
int ret = -EADDRINUSE;
if (!net_eq(sock_net(sk), &init_net))
return -ENOIOCTLCMD;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (pn_socket_autobind(sk->sk_socket))
return -EAGAIN;
mutex_lock(&resource_mutex);
if (pnres.sk[res] == NULL) {
sock_hold(sk);
rcu_assign_pointer(pnres.sk[res], sk);
ret = 0;
}
mutex_unlock(&resource_mutex);
return ret;
}
int pn_sock_unbind_res(struct sock *sk, u8 res)
{
int ret = -ENOENT;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
mutex_lock(&resource_mutex);
if (pnres.sk[res] == sk) {
RCU_INIT_POINTER(pnres.sk[res], NULL);
ret = 0;
}
mutex_unlock(&resource_mutex);
if (ret == 0) {
synchronize_rcu();
sock_put(sk);
}
return ret;
}
void pn_sock_unbind_all_res(struct sock *sk)
{
unsigned int res, match = 0;
mutex_lock(&resource_mutex);
for (res = 0; res < 256; res++) {
if (pnres.sk[res] == sk) {
RCU_INIT_POINTER(pnres.sk[res], NULL);
match++;
}
}
mutex_unlock(&resource_mutex);
while (match > 0) {
__sock_put(sk);
match--;
}
/* Caller is responsible for RCU sync before final sock_put() */
}
#ifdef CONFIG_PROC_FS
static struct sock **pn_res_get_idx(struct seq_file *seq, loff_t pos)
{
struct net *net = seq_file_net(seq);
unsigned int i;
if (!net_eq(net, &init_net))
return NULL;
for (i = 0; i < 256; i++) {
if (pnres.sk[i] == NULL)
continue;
if (!pos)
return pnres.sk + i;
pos--;
}
return NULL;
}
static struct sock **pn_res_get_next(struct seq_file *seq, struct sock **sk)
{
struct net *net = seq_file_net(seq);
unsigned int i;
BUG_ON(!net_eq(net, &init_net));
for (i = (sk - pnres.sk) + 1; i < 256; i++)
if (pnres.sk[i])
return pnres.sk + i;
return NULL;
}
static void *pn_res_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(resource_mutex)
{
mutex_lock(&resource_mutex);
return *pos ? pn_res_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
static void *pn_res_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct sock **sk;
if (v == SEQ_START_TOKEN)
sk = pn_res_get_idx(seq, 0);
else
sk = pn_res_get_next(seq, v);
(*pos)++;
return sk;
}
static void pn_res_seq_stop(struct seq_file *seq, void *v)
__releases(resource_mutex)
{
mutex_unlock(&resource_mutex);
}
static int pn_res_seq_show(struct seq_file *seq, void *v)
{
seq_setwidth(seq, 63);
if (v == SEQ_START_TOKEN)
seq_puts(seq, "rs uid inode");
else {
struct sock **psk = v;
struct sock *sk = *psk;
seq_printf(seq, "%02X %5u %lu",
(int) (psk - pnres.sk),
from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
sock_i_ino(sk));
}
seq_pad(seq, '\n');
return 0;
}
const struct seq_operations pn_res_seq_ops = {
.start = pn_res_seq_start,
.next = pn_res_seq_next,
.stop = pn_res_seq_stop,
.show = pn_res_seq_show,
};
#endif